summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuc Verhaegen <libv@skynet.be>2010-03-14 08:21:16 +0100
committerLuc Verhaegen <libv@skynet.be>2010-03-14 08:21:16 +0100
commit0c8469d1892b441c38d1cb09d6bbf85692c89e92 (patch)
tree92b872bb7839b100a4f476945bea081e83f3587b
parentc40fdff4f4a1c7f53c0c3cdff1ff8bce4f5e168f (diff)
Import radeon, r200, r300 and r600 dri drivers from mesa 7.6.0.7.6.0
-rw-r--r--Makefile.am2
-rw-r--r--configure.ac14
-rw-r--r--r200/Makefile.am22
-rw-r--r--r200/r200_cmdbuf.c547
-rw-r--r--r200/r200_context.c447
-rw-r--r--r200/r200_context.h470
-rw-r--r--r200/r200_fragshader.c2
-rw-r--r--r200/r200_ioctl.c849
-rw-r--r--r200/r200_ioctl.h145
-rw-r--r--r200/r200_lock.c116
-rw-r--r--r200/r200_lock.h106
-rw-r--r--r200/r200_maos.h1
-rw-r--r--r200/r200_maos_arrays.c381
-rw-r--r--r200/r200_pixel.c128
-rw-r--r--r200/r200_reg.h6
-rw-r--r--r200/r200_sanity.c4
-rw-r--r--r200/r200_span.c307
-rw-r--r--r200/r200_state.c630
-rw-r--r--r200/r200_state.h12
-rw-r--r--r200/r200_state_init.c1289
-rw-r--r--r200/r200_swtcl.c269
-rw-r--r--r200/r200_swtcl.h11
-rw-r--r--r200/r200_tcl.c143
-rw-r--r--r200/r200_tex.c858
-rw-r--r--r200/r200_tex.h7
-rw-r--r--r200/r200_texmem.c530
-rw-r--r--r200/r200_texstate.c817
-rw-r--r--r200/r200_vertprog.c18
-rw-r--r--r300/Makefile.am41
-rw-r--r--r300/compiler/Makefile.am20
-rw-r--r--r300/compiler/memory_pool.c95
-rw-r--r--r300/compiler/memory_pool.h49
-rw-r--r--r300/compiler/r300_fragprog.c416
-rw-r--r--r300/compiler/r300_fragprog.h49
-rw-r--r--r300/compiler/r300_fragprog_emit.c (renamed from r300/r300_fragprog_emit.c)216
-rw-r--r--r300/compiler/r300_fragprog_swizzle.c (renamed from r300/r300_fragprog_swizzle.c)20
-rw-r--r--r300/compiler/r300_fragprog_swizzle.h (renamed from r300/r300_fragprog_swizzle.h)0
-rw-r--r--r300/compiler/r3xx_fragprog.c149
-rw-r--r--r300/compiler/r3xx_vertprog.c656
-rw-r--r--r300/compiler/r3xx_vertprog_dump.c177
-rw-r--r--r300/compiler/r500_fragprog.c449
-rw-r--r--r300/compiler/r500_fragprog.h (renamed from r300/r500_fragprog.h)29
-rw-r--r--r300/compiler/r500_fragprog_emit.c (renamed from r300/r500_fragprog_emit.c)84
-rw-r--r--r300/compiler/radeon_code.c171
-rw-r--r--r300/compiler/radeon_code.h207
-rw-r--r--r300/compiler/radeon_compiler.c262
-rw-r--r--r300/compiler/radeon_compiler.h108
-rw-r--r--r300/compiler/radeon_nqssadce.c (renamed from r300/radeon_nqssadce.c)177
-rw-r--r--r300/compiler/radeon_nqssadce.h (renamed from r300/radeon_nqssadce.h)19
-rw-r--r--r300/compiler/radeon_program.c211
-rw-r--r--r300/compiler/radeon_program.h (renamed from r300/radeon_program.h)76
-rw-r--r--r300/compiler/radeon_program_alu.c (renamed from r300/radeon_program_alu.c)464
-rw-r--r--r300/compiler/radeon_program_alu.h (renamed from r300/radeon_program_alu.h)21
-rw-r--r--r300/compiler/radeon_program_pair.c (renamed from r300/radeon_program_pair.c)354
-rw-r--r--r300/compiler/radeon_program_pair.h (renamed from r300/radeon_program_pair.h)37
-rw-r--r--r300/r300_cmdbuf.c953
-rw-r--r--r300/r300_cmdbuf.h81
-rw-r--r--r300/r300_context.c658
-rw-r--r--r300/r300_context.h783
-rw-r--r--r300/r300_draw.c718
-rw-r--r--r300/r300_emit.c482
-rw-r--r--r300/r300_emit.h225
-rw-r--r--r300/r300_fragprog.c699
-rw-r--r--r300/r300_fragprog.h132
-rw-r--r--r300/r300_fragprog_common.c277
-rw-r--r--r300/r300_fragprog_common.h37
-rw-r--r--r300/r300_ioctl.c1118
-rw-r--r--r300/r300_ioctl.h16
-rw-r--r--r300/r300_mem.c385
-rw-r--r--r300/r300_mem.h37
-rw-r--r--r300/r300_reg.h48
-rw-r--r--r300/r300_render.c503
-rw-r--r--r300/r300_render.h69
-rw-r--r--r300/r300_shader.c119
-rw-r--r--r300/r300_state.c1273
-rw-r--r--r300/r300_state.h28
-rw-r--r--r300/r300_swtcl.c712
-rw-r--r--r300/r300_swtcl.h20
-rw-r--r--r300/r300_tex.c874
-rw-r--r--r300/r300_tex.h13
-rw-r--r--r300/r300_texmem.c568
-rw-r--r--r300/r300_texstate.c622
-rw-r--r--r300/r300_vertprog.c1584
-rw-r--r--r300/r300_vertprog.h28
-rw-r--r--r300/r500_fragprog.c718
-rw-r--r--r300/radeon_context.c330
-rw-r--r--r300/radeon_context.h159
-rw-r--r--r300/radeon_ioctl.c396
-rw-r--r--r300/radeon_lock.c137
-rw-r--r--r300/radeon_lock.h115
-rw-r--r--r300/radeon_program.c128
-rw-r--r--r300/radeon_span.c349
-rw-r--r--r300/radeon_state.c244
-rw-r--r--r300/radeon_state.h43
-rwxr-xr-xr600/Lindent2
-rw-r--r--r600/Makefile.am48
-rw-r--r--r600/defaultendian.h38
-rw-r--r--r600/r600_cmdbuf.c514
-rw-r--r--r600/r600_cmdbuf.h212
-rw-r--r--r600/r600_context.c394
-rw-r--r--r600/r600_context.h189
-rw-r--r--r600/r600_emit.c117
-rw-r--r--r600/r600_emit.h (renamed from r200/r200_span.h)34
-rw-r--r--r600/r600_reg.h121
-rw-r--r--r600/r600_reg_auto_r6xx.h3089
-rw-r--r--r600/r600_reg_r6xx.h492
-rw-r--r--r600/r600_reg_r7xx.h149
-rw-r--r--r600/r600_tex.c440
-rw-r--r--r600/r600_tex.h (renamed from r300/radeon_ioctl.h)52
-rw-r--r--r600/r600_texstate.c960
-rw-r--r--r600/r700_assembler.c4334
-rw-r--r--r600/r700_assembler.h516
-rw-r--r--r600/r700_chip.c1274
-rw-r--r--r600/r700_chip.h503
-rw-r--r--r600/r700_chipoffset.h693
-rw-r--r--r600/r700_clear.c118
-rw-r--r--r600/r700_clear.h33
-rw-r--r--r600/r700_debug.c60
-rw-r--r--r600/r700_debug.h39
-rw-r--r--r600/r700_driconf.h33
-rw-r--r--r600/r700_fragprog.c476
-rw-r--r--r600/r700_fragprog.h66
-rw-r--r--r600/r700_ioctl.c50
-rw-r--r--r600/r700_ioctl.h35
-rw-r--r--r600/r700_oglprog.c149
-rw-r--r--r600/r700_oglprog.h34
-rw-r--r--r600/r700_render.c480
-rw-r--r--r600/r700_shader.c527
-rw-r--r--r600/r700_shader.h150
-rw-r--r--r600/r700_shaderinst.c224
-rw-r--r--r600/r700_shaderinst.h321
-rw-r--r--r600/r700_state.c1808
-rw-r--r--r600/r700_state.h46
-rw-r--r--r600/r700_vertprog.c480
-rw-r--r--r600/r700_vertprog.h94
-rw-r--r--r600/sq_micro_reg.h2008
-rw-r--r--radeon/Makefile.am22
-rw-r--r--radeon/radeon_bo_drm.h219
-rw-r--r--radeon/radeon_bo_legacy.c926
-rw-r--r--radeon/radeon_bo_legacy.h50
-rw-r--r--radeon/radeon_bocs_wrapper.h99
-rw-r--r--radeon/radeon_buffer_objects.c229
-rw-r--r--radeon/radeon_buffer_objects.h52
-rw-r--r--radeon/radeon_chipset.h152
-rw-r--r--radeon/radeon_cmdbuf.h121
-rw-r--r--radeon/radeon_common.c1349
-rw-r--r--radeon/radeon_common.h87
-rw-r--r--radeon/radeon_common_context.c802
-rw-r--r--radeon/radeon_common_context.h594
-rw-r--r--radeon/radeon_compat.c301
-rw-r--r--radeon/radeon_context.c459
-rw-r--r--radeon/radeon_context.h386
-rw-r--r--radeon/radeon_cs_drm.h246
-rw-r--r--radeon/radeon_cs_legacy.c409
-rw-r--r--radeon/radeon_cs_legacy.h40
-rw-r--r--radeon/radeon_cs_space_drm.c234
-rw-r--r--radeon/radeon_debug.c107
-rw-r--r--radeon/radeon_debug.h170
-rw-r--r--radeon/radeon_dma.c481
-rw-r--r--radeon/radeon_dma.h58
-rw-r--r--radeon/radeon_fbo.c599
-rw-r--r--radeon/radeon_ioctl.c1359
-rw-r--r--radeon/radeon_ioctl.h137
-rw-r--r--radeon/radeon_lighting.c6
-rw-r--r--radeon/radeon_lock.c128
-rw-r--r--radeon/radeon_lock.h83
-rw-r--r--radeon/radeon_maos.h1
-rw-r--r--radeon/radeon_maos_arrays.c463
-rw-r--r--radeon/radeon_maos_vbtmp.h3
-rw-r--r--radeon/radeon_maos_verts.c36
-rw-r--r--radeon/radeon_mipmap_tree.c421
-rw-r--r--radeon/radeon_mipmap_tree.h101
-rw-r--r--radeon/radeon_queryobj.c235
-rw-r--r--radeon/radeon_queryobj.h55
-rw-r--r--radeon/radeon_sanity.c6
-rw-r--r--radeon/radeon_sanity.h2
-rw-r--r--radeon/radeon_screen.c1210
-rw-r--r--radeon/radeon_screen.h13
-rw-r--r--radeon/radeon_span.c812
-rw-r--r--radeon/radeon_span.h3
-rw-r--r--radeon/radeon_state.c646
-rw-r--r--radeon/radeon_state.h15
-rw-r--r--radeon/radeon_state_init.c989
-rw-r--r--radeon/radeon_swtcl.c280
-rw-r--r--radeon/radeon_swtcl.h3
-rw-r--r--radeon/radeon_tcl.c148
-rw-r--r--radeon/radeon_tex.c535
-rw-r--r--radeon/radeon_tex.h8
-rw-r--r--radeon/radeon_texmem.c404
-rw-r--r--radeon/radeon_texstate.c805
-rw-r--r--radeon/radeon_texture.c1052
-rw-r--r--radeon/radeon_texture.h122
-rw-r--r--radeon/server/radeon_reg.h8
193 files changed, 45359 insertions, 22693 deletions
diff --git a/Makefile.am b/Makefile.am
index 37b4466..03cd1e9 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,3 +1,3 @@
AUTOMAKE_OPTIONS = foreign
-SUBDIRS = radeon r200 r300
+SUBDIRS = radeon r200 r300 r600
diff --git a/configure.ac b/configure.ac
index 4cadb87..d9be032 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
# Process this file with autoconf to produce a configure script
AC_PREREQ(2.57)
-AC_INIT([mesa-dri-radeon], 7.5.0, [], mesa-dri-radeon)
+AC_INIT([mesa-dri-radeon], 7.6.0, [], mesa-dri-radeon)
AM_INIT_AUTOMAKE([dist-bzip2])
@@ -16,12 +16,20 @@ AC_PROG_CC
AC_HEADER_STDC
PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0])
-PKG_CHECK_MODULES([DRI], [libmesadri >= 7.5.0 libmesadri < 7.7.0
- libmesadricommon >= 7.5.0 libmesadricommon < 7.7.0])
+# we now need dri_metaops.h
+PKG_CHECK_MODULES([DRI], [libmesadri >= 7.6.0 libmesadri < 7.7.0
+ libmesadricommon >= 7.6.0 libmesadricommon < 7.7.0])
+
+# libdrm 2.4.17 changed the api significantly.
+PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon libdrm <= 2.4.16],
+ HAVE_LIBDRM_RADEON=yes, HAVE_LIBDRM_RADEON=no)
+AM_CONDITIONAL(HAVE_LIBDRM_RADEON, test "x$HAVE_LIBDRM_RADEON" = xyes)
AC_OUTPUT([
Makefile
radeon/Makefile
r200/Makefile
r300/Makefile
+ r300/compiler/Makefile
+ r600/Makefile
])
diff --git a/r200/Makefile.am b/r200/Makefile.am
index 0234e1d..804b285 100644
--- a/r200/Makefile.am
+++ b/r200/Makefile.am
@@ -9,21 +9,37 @@ r200_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \
$(DRM_LIBS) $(DRI_LIBS)
r200_dri_ladir = @libdir@/dri
r200_dri_la_SOURCES = \
+ ../radeon/radeon_bo_legacy.c \
+ ../radeon/radeon_common_context.c \
+ ../radeon/radeon_common.c \
+ ../radeon/radeon_cs_legacy.c \
+ ../radeon/radeon_dma.c \
+ ../radeon/radeon_debug.c \
+ ../radeon/radeon_fbo.c \
+ ../radeon/radeon_lock.c \
+ ../radeon/radeon_mipmap_tree.c \
+ ../radeon/radeon_queryobj.c \
+ ../radeon/radeon_span.c \
+ ../radeon/radeon_texture.c \
r200_context.c \
r200_ioctl.c \
- r200_lock.c \
r200_state.c \
r200_state_init.c \
r200_cmdbuf.c \
r200_pixel.c \
r200_tex.c \
- r200_texmem.c \
r200_texstate.c \
r200_tcl.c \
r200_swtcl.c \
- r200_span.c \
r200_maos.c \
r200_sanity.c \
r200_fragshader.c \
r200_vertprog.c \
../radeon/radeon_screen.c
+
+if HAVE_LIBDRM_RADEON
+r200_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
+r200_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
+r200_dri_la_SOURCES += \
+ ../radeon/radeon_cs_space_drm.c
+endif
diff --git a/r200/r200_cmdbuf.c b/r200/r200_cmdbuf.c
index e163377..1d1bea6 100644
--- a/r200/r200_cmdbuf.c
+++ b/r200/r200_cmdbuf.c
@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "swrast/swrast.h"
#include "main/simple_list.h"
+#include "radeon_common.h"
#include "r200_context.h"
#include "r200_state.h"
#include "r200_ioctl.h"
@@ -45,160 +46,71 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_sanity.h"
#include "radeon_reg.h"
-static void print_state_atom( struct r200_state_atom *state )
-{
- int i;
-
- fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
-
- if (0 & R200_DEBUG & DEBUG_VERBOSE)
- for (i = 0 ; i < state->cmd_size ; i++)
- fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
-
-}
-
/* The state atoms will be emitted in the order they appear in the atom list,
* so this step is important.
*/
+#define insert_at_tail_if(atom_list, atom) \
+ do { \
+ struct radeon_state_atom* __atom = (atom); \
+ if (__atom->check) \
+ insert_at_tail((atom_list), __atom); \
+ } while(0)
+
void r200SetUpAtomList( r200ContextPtr rmesa )
{
int i, mtu;
- mtu = rmesa->glCtx->Const.MaxTextureUnits;
-
- make_empty_list(&rmesa->hw.atomlist);
- rmesa->hw.atomlist.name = "atom-list";
-
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf );
+ mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
+
+ make_empty_list(&rmesa->radeon.hw.atomlist);
+ rmesa->radeon.hw.atomlist.name = "atom-list";
+
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.set );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf );
for (i = 0; i < mtu; ++i)
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] );
for (i = 0; i < mtu; ++i)
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] );
for (i = 0; i < 6; ++i)
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] );
for (i = 0; i < 8; ++i)
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] );
for (i = 0; i < 3 + mtu; ++i)
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt );
for (i = 0; i < 2; ++i)
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] );
for (i = 0; i < 6; ++i)
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.spr );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ptp );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.prf );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pvs );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[0] );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[1] );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[0] );
- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[1] );
-}
-
-static void r200SaveHwState( r200ContextPtr rmesa )
-{
- struct r200_state_atom *atom;
- char * dest = rmesa->backup_store.cmd_buf;
-
- if (R200_DEBUG & DEBUG_STATE)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- rmesa->backup_store.cmd_used = 0;
-
- foreach( atom, &rmesa->hw.atomlist ) {
- if ( atom->check( rmesa->glCtx, atom->idx ) ) {
- int size = atom->cmd_size * 4;
- memcpy( dest, atom->cmd, size);
- dest += size;
- rmesa->backup_store.cmd_used += size;
- if (R200_DEBUG & DEBUG_STATE)
- print_state_atom( atom );
- }
- }
-
- assert( rmesa->backup_store.cmd_used <= R200_CMD_BUF_SZ );
- if (R200_DEBUG & DEBUG_STATE)
- fprintf(stderr, "Returning to r200EmitState\n");
-}
-
-void r200EmitState( r200ContextPtr rmesa )
-{
- char *dest;
- int mtu;
- struct r200_state_atom *atom;
-
- if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- if (rmesa->save_on_next_emit) {
- r200SaveHwState(rmesa);
- rmesa->save_on_next_emit = GL_FALSE;
- }
-
- if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
- return;
-
- mtu = rmesa->glCtx->Const.MaxTextureUnits;
-
- /* To avoid going across the entire set of states multiple times, just check
- * for enough space for the case of emitting all state, and inline the
- * r200AllocCmdBuf code here without all the checks.
- */
- r200EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size );
-
- /* we need to calculate dest after EnsureCmdBufSpace
- as we may flush the buffer - airlied */
- dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
- if (R200_DEBUG & DEBUG_STATE) {
- foreach( atom, &rmesa->hw.atomlist ) {
- if ( atom->dirty || rmesa->hw.all_dirty ) {
- if ( atom->check( rmesa->glCtx, atom->idx ) )
- print_state_atom( atom );
- else
- fprintf(stderr, "skip state %s\n", atom->name);
- }
- }
- }
-
- foreach( atom, &rmesa->hw.atomlist ) {
- if ( rmesa->hw.all_dirty )
- atom->dirty = GL_TRUE;
- if ( atom->dirty ) {
- if ( atom->check( rmesa->glCtx, atom->idx ) ) {
- int size = atom->cmd_size * 4;
- memcpy( dest, atom->cmd, size);
- dest += size;
- rmesa->store.cmd_used += size;
- atom->dirty = GL_FALSE;
- }
- }
- }
-
- assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
-
- rmesa->hw.is_dirty = GL_FALSE;
- rmesa->hw.all_dirty = GL_FALSE;
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] );
+ insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.sci );
}
/* Fire a section of the retained (indexed_verts) buffer as a regular
@@ -208,51 +120,81 @@ void r200EmitVbufPrim( r200ContextPtr rmesa,
GLuint primitive,
GLuint vertex_nr )
{
- drm_radeon_cmd_header_t *cmd;
+ BATCH_LOCALS(&rmesa->radeon);
assert(!(primitive & R200_VF_PRIM_WALK_IND));
- r200EmitState( rmesa );
-
- if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
- fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
- rmesa->store.cmd_used/4, primitive, vertex_nr);
+ radeonEmitState(&rmesa->radeon);
- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VBUF_BUFSZ,
- __FUNCTION__ );
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
- cmd[1].i = R200_CP_CMD_3D_DRAW_VBUF_2;
- cmd[2].i = (primitive |
- R200_VF_PRIM_WALK_LIST |
- R200_VF_COLOR_ORDER_RGBA |
- (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
+ radeon_print(RADEON_RENDER|RADEON_SWRENDER,RADEON_VERBOSE,
+ "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
+ rmesa->store.cmd_used/4, primitive, vertex_nr);
+
+ BEGIN_BATCH(3);
+ OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
+ OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA |
+ (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
+ END_BATCH();
}
+static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type)
+{
+ BATCH_LOCALS(&rmesa->radeon);
+
+ if (vertex_count > 0) {
+ BEGIN_BATCH(8+2);
+ OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_INDX_2, 0);
+ OUT_BATCH(R200_VF_PRIM_WALK_IND |
+ R200_VF_COLOR_ORDER_RGBA |
+ ((vertex_count + 0) << 16) |
+ type);
+
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
+ OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
+ OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
+ rmesa->radeon.tcl.elt_dma_bo,
+ rmesa->radeon.tcl.elt_dma_offset,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ OUT_BATCH((vertex_count + 1)/2);
+ } else {
+ OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
+ OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
+ OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
+ OUT_BATCH((vertex_count + 1)/2);
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.elt_dma_bo,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ }
+ END_BATCH();
+ }
+}
-void r200FlushElts( r200ContextPtr rmesa )
+void r200FlushElts(GLcontext *ctx)
{
- int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
- int dwords;
- int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 12)) / 2;
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ int nr, elt_used = rmesa->tcl.elt_used;
+
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used);
+
+ assert( rmesa->radeon.dma.flush == r200FlushElts );
+ rmesa->radeon.dma.flush = NULL;
- if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
- fprintf(stderr, "%s\n", __FUNCTION__);
+ nr = elt_used / 2;
- assert( rmesa->dma.flush == r200FlushElts );
- rmesa->dma.flush = NULL;
+ radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
- /* Cope with odd number of elts:
- */
- rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
- dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
+ r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive);
- cmd[1] |= (dwords - 3) << 16;
- cmd[2] |= nr << R200_VF_VERTEX_NUMBER_SHIFT;
+ radeon_bo_unref(rmesa->radeon.tcl.elt_dma_bo);
+ rmesa->radeon.tcl.elt_dma_bo = NULL;
- if (R200_DEBUG & DEBUG_SYNC) {
- fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
- r200Finish( rmesa->glCtx );
+ if (R200_ELT_BUF_SZ > elt_used)
+ radeonReturnDmaRegion(&rmesa->radeon, R200_ELT_BUF_SZ - elt_used);
+
+ if (radeon_is_debug_enabled(RADEON_SYNC, RADEON_CRITICAL)) {
+ radeon_print(RADEON_SYNC, RADEON_NORMAL, "%s: Syncing\n", __FUNCTION__);
+ radeonFinish( rmesa->radeon.glCtx );
}
}
@@ -261,168 +203,153 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
GLuint primitive,
GLuint min_nr )
{
- drm_radeon_cmd_header_t *cmd;
GLushort *retval;
- if (R200_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
assert((primitive & R200_VF_PRIM_WALK_IND));
- r200EmitState( rmesa );
-
- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, ELTS_BUFSZ(min_nr),
- __FUNCTION__ );
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
- cmd[1].i = R200_CP_CMD_3D_DRAW_INDX_2;
- cmd[2].i = (primitive |
- R200_VF_PRIM_WALK_IND |
- R200_VF_COLOR_ORDER_RGBA);
-
-
- retval = (GLushort *)(cmd+3);
+ radeonEmitState(&rmesa->radeon);
- if (R200_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s: header 0x%x prim %x \n",
- __FUNCTION__,
- cmd[1].i, primitive);
+ radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
+ &rmesa->radeon.tcl.elt_dma_offset, R200_ELT_BUF_SZ, 4);
+ rmesa->tcl.elt_used = min_nr * 2;
- assert(!rmesa->dma.flush);
- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
- rmesa->dma.flush = r200FlushElts;
-
- rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
+ radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
+ retval = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
+
+ assert(!rmesa->radeon.dma.flush);
+ rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+ rmesa->radeon.dma.flush = r200FlushElts;
return retval;
}
+void r200EmitMaxVtxIndex(r200ContextPtr rmesa, int count)
+{
+ BATCH_LOCALS(&rmesa->radeon);
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH(CP_PACKET0(R200_SE_VF_MAX_VTX_INDX, 0));
+ OUT_BATCH(count);
+ END_BATCH();
+ }
+}
void r200EmitVertexAOS( r200ContextPtr rmesa,
- GLuint vertex_size,
- GLuint offset )
+ GLuint vertex_size,
+ struct radeon_bo *bo,
+ GLuint offset )
{
- drm_radeon_cmd_header_t *cmd;
+ BATCH_LOCALS(&rmesa->radeon);
- if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
- fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n",
+ radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s: vertex_size 0x%x offset 0x%x \n",
__FUNCTION__, vertex_size, offset);
- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
- __FUNCTION__ );
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
- cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (2 << 16);
- cmd[2].i = 1;
- cmd[3].i = vertex_size | (vertex_size << 8);
- cmd[4].i = offset;
+ BEGIN_BATCH(7);
+ OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2);
+ OUT_BATCH(1);
+ OUT_BATCH(vertex_size | (vertex_size << 8));
+ OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
}
-
-void r200EmitAOS( r200ContextPtr rmesa,
- struct r200_dma_region **component,
- GLuint nr,
- GLuint offset )
+void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
{
- drm_radeon_cmd_header_t *cmd;
- int sz = AOS_BUFSZ(nr);
+ BATCH_LOCALS(&rmesa->radeon);
+ uint32_t voffset;
+ int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
int i;
- int *tmp;
-
- if (R200_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s nr arrays: %d\n", __FUNCTION__, nr);
-
- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sz, __FUNCTION__ );
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
- cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (((sz / sizeof(int)) - 3) << 16);
- cmd[2].i = nr;
- tmp = &cmd[0].i;
- cmd += 3;
-
- for (i = 0 ; i < nr ; i++) {
- if (i & 1) {
- cmd[0].i |= ((component[i]->aos_stride << 24) |
- (component[i]->aos_size << 16));
- cmd[2].i = (component[i]->aos_start +
- offset * component[i]->aos_stride * 4);
- cmd += 3;
+
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE,
+ "%s: nr=%d, ofs=0x%08x\n",
+ __FUNCTION__, nr, offset);
+
+ BEGIN_BATCH(sz+2+ (nr*2));
+ OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
+ OUT_BATCH(nr);
+
+
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ for (i = 0; i + 1 < nr; i += 2) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+ (rmesa->radeon.tcl.aos[i].stride << 8) |
+ (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+ (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[i].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[i+1].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
}
- else {
- cmd[0].i = ((component[i]->aos_stride << 8) |
- (component[i]->aos_size << 0));
- cmd[1].i = (component[i]->aos_start +
- offset * component[i]->aos_stride * 4);
+
+ if (nr & 1) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+ (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[nr - 1].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
+ } else {
+ for (i = 0; i + 1 < nr; i += 2) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+ (rmesa->radeon.tcl.aos[i].stride << 8) |
+ (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+ (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ OUT_BATCH(voffset);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ OUT_BATCH(voffset);
+ }
+
+ if (nr & 1) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+ (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ OUT_BATCH(voffset);
+ }
+ for (i = 0; i + 1 < nr; i += 2) {
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[i+0].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[i+1].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
+ if (nr & 1) {
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[nr-1].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
}
}
-
- if (R200_DEBUG & DEBUG_VERTS) {
- fprintf(stderr, "%s:\n", __FUNCTION__);
- for (i = 0 ; i < sz ; i++)
- fprintf(stderr, " %d: %x\n", i, tmp[i]);
- }
-}
-
-void r200EmitBlit( r200ContextPtr rmesa,
- GLuint color_fmt,
- GLuint src_pitch,
- GLuint src_offset,
- GLuint dst_pitch,
- GLuint dst_offset,
- GLint srcx, GLint srcy,
- GLint dstx, GLint dsty,
- GLuint w, GLuint h )
-{
- drm_radeon_cmd_header_t *cmd;
-
- if (R200_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
- __FUNCTION__,
- src_pitch, src_offset, srcx, srcy,
- dst_pitch, dst_offset, dstx, dsty,
- w, h);
-
- assert( (src_pitch & 63) == 0 );
- assert( (dst_pitch & 63) == 0 );
- assert( (src_offset & 1023) == 0 );
- assert( (dst_offset & 1023) == 0 );
- assert( w < (1<<16) );
- assert( h < (1<<16) );
-
- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 8 * sizeof(int),
- __FUNCTION__ );
-
-
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
- cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16);
- cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
- RADEON_GMC_DST_PITCH_OFFSET_CNTL |
- RADEON_GMC_BRUSH_NONE |
- (color_fmt << 8) |
- RADEON_GMC_SRC_DATATYPE_COLOR |
- RADEON_ROP3_S |
- RADEON_DP_SRC_SOURCE_MEMORY |
- RADEON_GMC_CLR_CMP_CNTL_DIS |
- RADEON_GMC_WR_MSK_DIS );
-
- cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
- cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
- cmd[5].i = (srcx << 16) | srcy;
- cmd[6].i = (dstx << 16) | dsty; /* dst */
- cmd[7].i = (w << 16) | h;
-}
-
-
-void r200EmitWait( r200ContextPtr rmesa, GLuint flags )
-{
- drm_radeon_cmd_header_t *cmd;
-
- assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
-
- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 1 * sizeof(int),
- __FUNCTION__ );
- cmd[0].i = 0;
- cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
- cmd[0].wait.flags = flags;
+ END_BATCH();
}
diff --git a/r200/r200_context.c b/r200/r200_context.c
index 058296e..3ddb5bf 100644
--- a/r200/r200_context.c
+++ b/r200/r200_context.c
@@ -54,14 +54,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_context.h"
#include "r200_ioctl.h"
#include "r200_state.h"
-#include "r200_span.h"
#include "r200_pixel.h"
#include "r200_tex.h"
#include "r200_swtcl.h"
#include "r200_tcl.h"
#include "r200_maos.h"
#include "r200_vertprog.h"
+#include "radeon_queryobj.h"
+#include "radeon_span.h"
+
+#define need_GL_ARB_occlusion_query
#define need_GL_ARB_vertex_program
#define need_GL_ATI_fragment_shader
#define need_GL_EXT_blend_minmax
@@ -71,6 +74,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define need_GL_EXT_blend_func_separate
#define need_GL_NV_vertex_program
#define need_GL_ARB_point_parameters
+#define need_GL_EXT_framebuffer_object
#include "extension_helper.h"
#define DRIVER_DATE "20060602"
@@ -78,9 +82,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "vblank.h"
#include "utils.h"
#include "xmlpool.h" /* for symbolic values of enum-type options */
-#ifndef R200_DEBUG
-int R200_DEBUG = (0);
-#endif
/* Return various strings for glGetString().
*/
@@ -89,8 +90,8 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
r200ContextPtr rmesa = R200_CONTEXT(ctx);
static char buffer[128];
unsigned offset;
- GLuint agp_mode = (rmesa->r200Screen->card_type == RADEON_CARD_PCI)? 0 :
- rmesa->r200Screen->AGPMode;
+ GLuint agp_mode = (rmesa->radeon.radeonScreen->card_type == RADEON_CARD_PCI)? 0 :
+ rmesa->radeon.radeonScreen->AGPMode;
switch ( name ) {
case GL_VENDOR:
@@ -101,7 +102,7 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
agp_mode );
sprintf( & buffer[ offset ], " %sTCL",
- !(rmesa->TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
+ !(rmesa->radeon.TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
? "" : "NO-" );
return (GLubyte *)buffer;
@@ -117,6 +118,7 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
const struct dri_extension card_extensions[] =
{
{ "GL_ARB_multitexture", NULL },
+ { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions},
{ "GL_ARB_texture_border_clamp", NULL },
{ "GL_ARB_texture_env_add", NULL },
{ "GL_ARB_texture_env_combine", NULL },
@@ -126,6 +128,7 @@ const struct dri_extension card_extensions[] =
{ "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions },
{ "GL_EXT_blend_subtract", NULL },
{ "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+ { "GL_EXT_packed_depth_stencil", NULL},
{ "GL_EXT_secondary_color", GL_EXT_secondary_color_functions },
{ "GL_EXT_stencil_wrap", NULL },
{ "GL_EXT_texture_edge_clamp", NULL },
@@ -167,6 +170,11 @@ const struct dri_extension point_extensions[] = {
{ NULL, NULL }
};
+const struct dri_extension mm_extensions[] = {
+ { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+ { NULL, NULL }
+};
+
extern const struct tnl_pipeline_stage _r200_render_stage;
extern const struct tnl_pipeline_stage _r200_tcl_stage;
@@ -213,26 +221,54 @@ static void r200InitDriverFuncs( struct dd_function_table *functions )
functions->GetString = r200GetString;
}
-static const struct dri_debug_control debug_control[] =
+
+static void r200_get_lock(radeonContextPtr radeon)
{
- { "fall", DEBUG_FALLBACKS },
- { "tex", DEBUG_TEXTURE },
- { "ioctl", DEBUG_IOCTL },
- { "prim", DEBUG_PRIMS },
- { "vert", DEBUG_VERTS },
- { "state", DEBUG_STATE },
- { "code", DEBUG_CODEGEN },
- { "vfmt", DEBUG_VFMT },
- { "vtxf", DEBUG_VFMT },
- { "verb", DEBUG_VERBOSE },
- { "dri", DEBUG_DRI },
- { "dma", DEBUG_DMA },
- { "san", DEBUG_SANITY },
- { "sync", DEBUG_SYNC },
- { "pix", DEBUG_PIXEL },
- { "mem", DEBUG_MEMORY },
- { NULL, 0 }
-};
+ r200ContextPtr rmesa = (r200ContextPtr)radeon;
+ drm_radeon_sarea_t *sarea = radeon->sarea;
+
+ R200_STATECHANGE( rmesa, ctx );
+ if (rmesa->radeon.sarea->tiling_enabled) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+ }
+ else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
+
+ if ( sarea->ctx_owner != rmesa->radeon.dri.hwContext ) {
+ sarea->ctx_owner = rmesa->radeon.dri.hwContext;
+ if (!radeon->radeonScreen->kernel_mm)
+ radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
+ }
+
+}
+
+static void r200_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
+{
+}
+
+static void r200_emit_query_finish(radeonContextPtr radeon)
+{
+ BATCH_LOCALS(radeon);
+ struct radeon_query_object *query = radeon->query.current;
+
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZPASS_ADDR, 0));
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+ query->curr_offset += sizeof(uint32_t);
+ assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
+
+static void r200_init_vtbl(radeonContextPtr radeon)
+{
+ radeon->vtbl.get_lock = r200_get_lock;
+ radeon->vtbl.update_viewport_offset = r200UpdateViewportOffset;
+ radeon->vtbl.emit_cs_header = r200_vtbl_emit_cs_header;
+ radeon->vtbl.swtcl_flush = r200_swtcl_flush;
+ radeon->vtbl.fallback = r200Fallback;
+ radeon->vtbl.update_scissor = r200_vtbl_update_scissor;
+ radeon->vtbl.emit_query_finish = r200_emit_query_finish;
+}
/* Create the device specific rendering context.
@@ -245,9 +281,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
struct dd_function_table functions;
r200ContextPtr rmesa;
- GLcontext *ctx, *shareCtx;
+ GLcontext *ctx;
int i;
- int tcl_mode, fthrottle_mode;
+ int tcl_mode;
assert(glVisual);
assert(driContextPriv);
@@ -257,7 +293,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
rmesa = (r200ContextPtr) CALLOC( sizeof(*rmesa) );
if ( !rmesa )
return GL_FALSE;
-
+
+ r200_init_vtbl(&rmesa->radeon);
/* init exp fog table data */
r200InitStaticFogData();
@@ -265,12 +302,13 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
* Do this here so that initialMaxAnisotropy is set before we create
* the default textures.
*/
- driParseConfigFiles (&rmesa->optionCache, &screen->optionCache,
+ driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
screen->driScreen->myNum, "r200");
- rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
- "def_max_anisotropy");
+ rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
+ "def_max_anisotropy");
- if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
+ if ( sPriv->drm_version.major == 1
+ && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
if ( sPriv->drm_version.minor < 13 )
fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
"disabling.\n", sPriv->drm_version.minor );
@@ -287,63 +325,20 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
_mesa_init_driver_functions(&functions);
r200InitDriverFuncs(&functions);
r200InitIoctlFuncs(&functions);
- r200InitStateFuncs(&functions);
+ r200InitStateFuncs(&functions, screen->kernel_mm);
r200InitTextureFuncs(&functions);
r200InitShaderFuncs(&functions);
+ radeonInitQueryObjFunctions(&functions);
- /* Allocate and initialize the Mesa context */
- if (sharedContextPrivate)
- shareCtx = ((r200ContextPtr) sharedContextPrivate)->glCtx;
- else
- shareCtx = NULL;
- rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
- &functions, (void *) rmesa);
- if (!rmesa->glCtx) {
- FREE(rmesa);
- return GL_FALSE;
+ if (!radeonInitContext(&rmesa->radeon, &functions,
+ glVisual, driContextPriv,
+ sharedContextPrivate)) {
+ FREE(rmesa);
+ return GL_FALSE;
}
- driContextPriv->driverPrivate = rmesa;
-
- /* Init r200 context data */
- rmesa->dri.context = driContextPriv;
- rmesa->dri.screen = sPriv;
- rmesa->dri.drawable = NULL; /* Set by XMesaMakeCurrent */
- rmesa->dri.hwContext = driContextPriv->hHWContext;
- rmesa->dri.hwLock = &sPriv->pSAREA->lock;
- rmesa->dri.fd = sPriv->fd;
- rmesa->dri.drmMinor = sPriv->drm_version.minor;
-
- rmesa->r200Screen = screen;
- rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
- screen->sarea_priv_offset);
-
-
- rmesa->dma.buf0_address = rmesa->r200Screen->buffers->list[0].address;
-
- (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
- make_empty_list( & rmesa->swapped );
-
- rmesa->nr_heaps = 1 /* screen->numTexHeaps */ ;
- assert(rmesa->nr_heaps < RADEON_NR_TEX_HEAPS);
- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
- rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
- screen->texSize[i],
- 12,
- RADEON_NR_TEX_REGIONS,
- (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
- & rmesa->sarea->tex_age[i],
- & rmesa->swapped,
- sizeof( r200TexObj ),
- (destroy_texture_object_t *) r200DestroyTexObj );
- }
- rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
- "texture_depth");
- if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
- rmesa->texture_depth = ( screen->cpp == 4 ) ?
- DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
- rmesa->swtcl.RenderIndex = ~0;
- rmesa->hw.all_dirty = 1;
+ rmesa->radeon.swtcl.RenderIndex = ~0;
+ rmesa->radeon.hw.all_dirty = 1;
/* Set the maximum texture size small enough that we can guarentee that
* all texture units can bind a maximal texture and have all of them in
@@ -351,29 +346,20 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
* setting allow larger textures.
*/
- ctx = rmesa->glCtx;
- ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache,
+ ctx = rmesa->radeon.glCtx;
+ ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
"texture_units");
ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
- i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures");
-
- driCalculateMaxTextureLevels( rmesa->texture_heaps,
- rmesa->nr_heaps,
- & ctx->Const,
- 4,
- 11, /* max 2D texture size is 2048x2048 */
-#if ENABLE_HW_3D_TEXTURE
- 8, /* max 3D texture size is 256^3 */
-#else
- 0, /* 3D textures unsupported */
-#endif
- 11, /* max cube texture size is 2048x2048 */
- 11, /* max texture rectangle size is 2048x2048 */
- 12,
- GL_FALSE,
- i );
+ i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
+
+ /* FIXME: When no memory manager is available we should set this
+ * to some reasonable value based on texture memory pool size */
+ ctx->Const.MaxTextureLevels = 12;
+ ctx->Const.Max3DTextureLevels = 9;
+ ctx->Const.MaxCubeTextureLevels = 12;
+ ctx->Const.MaxTextureRectSize = 2048;
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
@@ -383,7 +369,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
ctx->Const.MinPointSizeAA = 1.0;
ctx->Const.MaxPointSizeAA = 1.0;
ctx->Const.PointSizeGranularity = 0.0625;
- if (rmesa->r200Screen->drmSupportsPointSprites)
+ if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
ctx->Const.MaxPointSize = 2047.0;
else
ctx->Const.MaxPointSize = 1.0;
@@ -441,33 +427,39 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
_math_matrix_set_identity( &rmesa->tmpmat );
driInitExtensions( ctx, card_extensions, GL_TRUE );
- if (!(rmesa->r200Screen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
+
+ if (rmesa->radeon.radeonScreen->kernel_mm)
+ driInitExtensions(ctx, mm_extensions, GL_FALSE);
+ if (!(rmesa->radeon.radeonScreen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
/* yuv textures don't work with some chips - R200 / rv280 okay so far
others get the bit ordering right but don't actually do YUV-RGB conversion */
_mesa_enable_extension( ctx, "GL_MESA_ycbcr_texture" );
}
- if (rmesa->glCtx->Mesa_DXTn) {
+ if (rmesa->radeon.glCtx->Mesa_DXTn) {
_mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
_mesa_enable_extension( ctx, "GL_S3_s3tc" );
}
- else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) {
+ else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
_mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
}
- if (rmesa->r200Screen->drmSupportsCubeMapsR200)
+ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200)
_mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
- if (rmesa->r200Screen->drmSupportsBlendColor) {
+ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
driInitExtensions( ctx, blend_extensions, GL_FALSE );
}
- if(rmesa->r200Screen->drmSupportsVertexProgram)
+ if(rmesa->radeon.radeonScreen->drmSupportsVertexProgram)
driInitSingleExtension( ctx, ARB_vp_extension );
- if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program"))
+ if(driQueryOptionb(&rmesa->radeon.optionCache, "nv_vertex_program"))
driInitSingleExtension( ctx, NV_vp_extension );
- if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader)
+ if ((ctx->Const.MaxTextureUnits == 6) && rmesa->radeon.radeonScreen->drmSupportsFragShader)
driInitSingleExtension( ctx, ATI_fs_extension );
- if (rmesa->r200Screen->drmSupportsPointSprites)
+ if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
driInitExtensions( ctx, point_extensions, GL_FALSE );
+
+ if (!rmesa->radeon.radeonScreen->kernel_mm)
+ _mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
#if 0
r200InitDriverFuncs( ctx );
r200InitIoctlFuncs( ctx );
@@ -476,236 +468,43 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
#endif
/* plug in a few more device driver functions */
/* XXX these should really go right after _mesa_init_driver_functions() */
+ radeon_fbo_init(&rmesa->radeon);
+ radeonInitSpanFuncs( ctx );
r200InitPixelFuncs( ctx );
- r200InitSpanFuncs( ctx );
r200InitTnlFuncs( ctx );
r200InitState( rmesa );
r200InitSwtcl( ctx );
- fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode");
- rmesa->iw.irq_seq = -1;
- rmesa->irqsEmitted = 0;
- rmesa->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
- rmesa->r200Screen->irq);
-
- rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
-
- if (!rmesa->do_irqs)
- fprintf(stderr,
- "IRQ's not enabled, falling back to %s: %d %d\n",
- rmesa->do_usleeps ? "usleeps" : "busy waits",
- fthrottle_mode,
- rmesa->r200Screen->irq);
-
rmesa->prefer_gart_client_texturing =
(getenv("R200_GART_CLIENT_TEXTURES") != 0);
- (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
-
-
-#if DO_DEBUG
- R200_DEBUG = driParseDebugString( getenv( "R200_DEBUG" ),
- debug_control );
- R200_DEBUG |= driParseDebugString( getenv( "RADEON_DEBUG" ),
- debug_control );
-#endif
-
- tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode");
- if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
+ tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
+ if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
fprintf(stderr, "disabling 3D acceleration\n");
FALLBACK(rmesa, R200_FALLBACK_DISABLE, 1);
}
else if (tcl_mode == DRI_CONF_TCL_SW || getenv("R200_NO_TCL") ||
- !(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) {
- if (rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL) {
- rmesa->r200Screen->chip_flags &= ~RADEON_CHIPSET_TCL;
+ !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
fprintf(stderr, "Disabling HW TCL support\n");
}
- TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
+ TCL_FALLBACK(rmesa->radeon.glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
}
return GL_TRUE;
}
-/* Destroy the device specific context.
- */
-/* Destroy the Mesa and driver specific context data.
- */
void r200DestroyContext( __DRIcontextPrivate *driContextPriv )
{
- GET_CURRENT_CONTEXT(ctx);
- r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate;
- r200ContextPtr current = ctx ? R200_CONTEXT(ctx) : NULL;
-
- /* check if we're deleting the currently bound context */
- if (rmesa == current) {
- R200_FIREVERTICES( rmesa );
- _mesa_make_current(NULL, NULL, NULL);
- }
-
- /* Free r200 context resources */
- assert(rmesa); /* should never be null */
- if ( rmesa ) {
- GLboolean release_texture_heaps;
-
-
- release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
- _swsetup_DestroyContext( rmesa->glCtx );
- _tnl_DestroyContext( rmesa->glCtx );
- _vbo_DestroyContext( rmesa->glCtx );
- _swrast_DestroyContext( rmesa->glCtx );
-
- r200DestroySwtcl( rmesa->glCtx );
- r200ReleaseArrays( rmesa->glCtx, ~0 );
-
- if (rmesa->dma.current.buf) {
- r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
- r200FlushCmdBuf( rmesa, __FUNCTION__ );
- }
-
- if (rmesa->state.scissor.pClipRects) {
- FREE(rmesa->state.scissor.pClipRects);
- rmesa->state.scissor.pClipRects = NULL;
- }
-
- if ( release_texture_heaps ) {
- /* This share group is about to go away, free our private
- * texture object data.
- */
- int i;
-
- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
- driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
- rmesa->texture_heaps[ i ] = NULL;
- }
-
- assert( is_empty_list( & rmesa->swapped ) );
- }
-
- /* free the Mesa context */
- rmesa->glCtx->DriverCtx = NULL;
- _mesa_destroy_context( rmesa->glCtx );
-
- /* free the option cache */
- driDestroyOptionCache (&rmesa->optionCache);
-
- FREE( rmesa );
- }
-}
-
-
-
-
-void
-r200SwapBuffers( __DRIdrawablePrivate *dPriv )
-{
- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
- r200ContextPtr rmesa;
- GLcontext *ctx;
- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
- ctx = rmesa->glCtx;
- if (ctx->Visual.doubleBufferMode) {
- _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */
- if ( rmesa->doPageFlip ) {
- r200PageFlip( dPriv );
- }
- else {
- r200CopyBuffer( dPriv, NULL );
- }
- }
- }
- else {
- /* XXX this shouldn't be an error but we can't handle it for now */
- _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
- }
-}
-
-void
-r200CopySubBuffer( __DRIdrawablePrivate *dPriv,
- int x, int y, int w, int h )
-{
- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
- r200ContextPtr rmesa;
- GLcontext *ctx;
- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
- ctx = rmesa->glCtx;
- if (ctx->Visual.doubleBufferMode) {
- drm_clip_rect_t rect;
- rect.x1 = x + dPriv->x;
- rect.y1 = (dPriv->h - y - h) + dPriv->y;
- rect.x2 = rect.x1 + w;
- rect.y2 = rect.y1 + h;
- _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */
- r200CopyBuffer( dPriv, &rect );
- }
- }
- else {
- /* XXX this shouldn't be an error but we can't handle it for now */
- _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
- }
-}
-
-/* Force the context `c' to be the current context and associate with it
- * buffer `b'.
- */
-GLboolean
-r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
- __DRIdrawablePrivate *driDrawPriv,
- __DRIdrawablePrivate *driReadPriv )
-{
- if ( driContextPriv ) {
- r200ContextPtr newCtx =
- (r200ContextPtr) driContextPriv->driverPrivate;
-
- if (R200_DEBUG & DEBUG_DRI)
- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)newCtx->glCtx);
-
- newCtx->dri.readable = driReadPriv;
-
- if ( newCtx->dri.drawable != driDrawPriv ||
- newCtx->lastStamp != driDrawPriv->lastStamp ) {
- if (driDrawPriv->swap_interval == (unsigned)-1) {
- driDrawPriv->vblFlags = (newCtx->r200Screen->irq != 0)
- ? driGetDefaultVBlankFlags(&newCtx->optionCache)
- : VBLANK_FLAG_NO_IRQ;
-
- driDrawableInitVBlank( driDrawPriv );
- }
-
- newCtx->dri.drawable = driDrawPriv;
-
- r200SetCliprects(newCtx);
- r200UpdateViewportOffset( newCtx->glCtx );
- }
-
- _mesa_make_current( newCtx->glCtx,
- (GLframebuffer *) driDrawPriv->driverPrivate,
- (GLframebuffer *) driReadPriv->driverPrivate );
-
- _mesa_update_state( newCtx->glCtx );
- r200ValidateState( newCtx->glCtx );
-
- } else {
- if (R200_DEBUG & DEBUG_DRI)
- fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
- _mesa_make_current( NULL, NULL, NULL );
- }
-
- if (R200_DEBUG & DEBUG_DRI)
- fprintf(stderr, "End %s\n", __FUNCTION__);
- return GL_TRUE;
-}
-
-/* Force the context `c' to be unbound from its buffer.
- */
-GLboolean
-r200UnbindContext( __DRIcontextPrivate *driContextPriv )
-{
- r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate;
-
- if (R200_DEBUG & DEBUG_DRI)
- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->glCtx);
-
- return GL_TRUE;
+ int i;
+ r200ContextPtr rmesa = (r200ContextPtr)driContextPriv->driverPrivate;
+ if (rmesa)
+ {
+ for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS ; i++ ) {
+ _math_matrix_dtr( &rmesa->TexGenMatrix[i] );
+ }
+ }
+ radeonDestroyContext(driContextPriv);
}
diff --git a/r200/r200_context.h b/r200/r200_context.h
index 14a1dda..246f98c 100644
--- a/r200/r200_context.h
+++ b/r200/r200_context.h
@@ -53,51 +53,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#error This driver requires a newer libdrm to compile
#endif
+#include "radeon_screen.h"
+#include "radeon_common.h"
+
+#include "radeon_lock.h"
+
struct r200_context;
typedef struct r200_context r200ContextRec;
typedef struct r200_context *r200ContextPtr;
-/* This union is used to avoid warnings/miscompilation
- with float to uint32_t casts due to strict-aliasing */
-typedef union { GLfloat f; uint32_t ui32; } float_ui32_type;
-
-#include "r200_lock.h"
-#include "radeon_screen.h"
#include "main/mm.h"
-/* Flags for software fallback cases */
-/* See correponding strings in r200_swtcl.c */
-#define R200_FALLBACK_TEXTURE 0x01
-#define R200_FALLBACK_DRAW_BUFFER 0x02
-#define R200_FALLBACK_STENCIL 0x04
-#define R200_FALLBACK_RENDER_MODE 0x08
-#define R200_FALLBACK_DISABLE 0x10
-#define R200_FALLBACK_BORDER_MODE 0x20
-
-/* The blit width for texture uploads
- */
-#define BLIT_WIDTH_BYTES 1024
-
-/* Use the templated vertex format:
- */
-#define COLOR_IS_RGBA
-#define TAG(x) r200##x
-#include "tnl_dd/t_dd_vertex.h"
-#undef TAG
-
-typedef void (*r200_tri_func)( r200ContextPtr,
- r200Vertex *,
- r200Vertex *,
- r200Vertex * );
-
-typedef void (*r200_line_func)( r200ContextPtr,
- r200Vertex *,
- r200Vertex * );
-
-typedef void (*r200_point_func)( r200ContextPtr,
- r200Vertex * );
-
-
struct r200_vertex_program {
struct gl_vertex_program mesa_program; /* Must be first */
int translated;
@@ -112,93 +78,11 @@ struct r200_vertex_program {
int fogmode;
};
-struct r200_colorbuffer_state {
- GLuint clear;
-#if 000
- GLint drawOffset, drawPitch;
-#endif
- int roundEnable;
-};
-
-
-struct r200_depthbuffer_state {
- GLuint clear;
- GLfloat scale;
-};
-
-#if 000
-struct r200_pixel_state {
- GLint readOffset, readPitch;
-};
-#endif
-
-struct r200_scissor_state {
- drm_clip_rect_t rect;
- GLboolean enabled;
-
- GLuint numClipRects; /* Cliprects active */
- GLuint numAllocedClipRects; /* Cliprects available */
- drm_clip_rect_t *pClipRects;
-};
-
-struct r200_stencilbuffer_state {
- GLboolean hwBuffer;
- GLuint clear; /* rb3d_stencilrefmask value */
-};
-
-struct r200_stipple_state {
- GLuint mask[32];
-};
-
-
-
-#define TEX_0 0x1
-#define TEX_1 0x2
-#define TEX_2 0x4
-#define TEX_3 0x8
-#define TEX_4 0x10
-#define TEX_5 0x20
-#define TEX_ALL 0x3f
-
-typedef struct r200_tex_obj r200TexObj, *r200TexObjPtr;
-
-/* Texture object in locally shared texture space.
- */
-struct r200_tex_obj {
- driTextureObject base;
-
- GLuint bufAddr; /* Offset to start of locally
- shared texture block */
-
- GLuint dirty_state; /* Flags (1 per texunit) for
- whether or not this texobj
- has dirty hardware state
- (pp_*) that needs to be
- brought into the
- texunit. */
-
- drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
- /* Six, for the cube faces */
- GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
-
- GLuint pp_txfilter; /* hardware register values */
- GLuint pp_txformat;
- GLuint pp_txformat_x;
- GLuint pp_txoffset; /* Image location in texmem.
- All cube faces follow. */
- GLuint pp_txsize; /* npot only */
- GLuint pp_txpitch; /* npot only */
- GLuint pp_border_color;
- GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */
-
- GLboolean border_fallback;
-
- GLuint tile_bits; /* hw texture tile bits used on this texture */
-};
+#define R200_TEX_ALL 0x3f
struct r200_texture_env_state {
- r200TexObjPtr texobj;
+ radeonTexObjPtr texobj;
GLuint outputreg;
GLuint unitneeded;
};
@@ -210,19 +94,6 @@ struct r200_texture_state {
};
-struct r200_state_atom {
- struct r200_state_atom *next, *prev;
- const char *name; /* for debug */
- int cmd_size; /* size in bytes */
- GLuint idx;
- int *cmd; /* one or more cmd's */
- int *lastcmd; /* one or more cmd's */
- GLboolean dirty;
- GLboolean (*check)( GLcontext *, int ); /* is this state active? */
-};
-
-
-
/* Trying to keep these relatively short as the variables are becoming
* extravagently long. Drop the driver name prefix off the front of
* everything - I think we know which driver we're in by now, and keep the
@@ -596,182 +467,96 @@ struct r200_state_atom {
#define PRF_STATE_SIZE 3
-struct r200_hw_state {
- /* Head of the linked list of state atoms. */
- struct r200_state_atom atomlist;
+#define SCI_CMD_0 0
+#define SCI_RE_AUX 1
+#define SCI_CMD_1 2
+#define SCI_XY_1 3
+#define SCI_CMD_2 4
+#define SCI_XY_2 5
+#define SCI_STATE_SIZE 6
+
+#define R200_QUERYOBJ_CMD_0 0
+#define R200_QUERYOBJ_DATA_0 1
+#define R200_QUERYOBJ_CMDSIZE 2
+
+#define STP_CMD_0 0
+#define STP_DATA_0 1
+#define STP_CMD_1 2
+#define STP_STATE_SIZE 35
+struct r200_hw_state {
/* Hardware state, stored as cmdbuf commands:
* -- Need to doublebuffer for
* - reviving state after loss of context
* - eliding noop statechange loops? (except line stipple count)
*/
- struct r200_state_atom ctx;
- struct r200_state_atom set;
- struct r200_state_atom vte;
- struct r200_state_atom lin;
- struct r200_state_atom msk;
- struct r200_state_atom vpt;
- struct r200_state_atom vap;
- struct r200_state_atom vtx;
- struct r200_state_atom tcl;
- struct r200_state_atom msl;
- struct r200_state_atom tcg;
- struct r200_state_atom msc;
- struct r200_state_atom cst;
- struct r200_state_atom tam;
- struct r200_state_atom tf;
- struct r200_state_atom tex[6];
- struct r200_state_atom cube[6];
- struct r200_state_atom zbs;
- struct r200_state_atom mtl[2];
- struct r200_state_atom mat[9];
- struct r200_state_atom lit[8]; /* includes vec, scl commands */
- struct r200_state_atom ucp[6];
- struct r200_state_atom pix[6]; /* pixshader stages */
- struct r200_state_atom eye; /* eye pos */
- struct r200_state_atom grd; /* guard band clipping */
- struct r200_state_atom fog;
- struct r200_state_atom glt;
- struct r200_state_atom prf;
- struct r200_state_atom afs[2];
- struct r200_state_atom pvs;
- struct r200_state_atom vpi[2];
- struct r200_state_atom vpp[2];
- struct r200_state_atom atf;
- struct r200_state_atom spr;
- struct r200_state_atom ptp;
-
- int max_state_size; /* Number of bytes necessary for a full state emit. */
- GLboolean is_dirty, all_dirty;
+ struct radeon_state_atom ctx;
+ struct radeon_state_atom set;
+ struct radeon_state_atom sci;
+ struct radeon_state_atom vte;
+ struct radeon_state_atom lin;
+ struct radeon_state_atom msk;
+ struct radeon_state_atom vpt;
+ struct radeon_state_atom vap;
+ struct radeon_state_atom vtx;
+ struct radeon_state_atom tcl;
+ struct radeon_state_atom msl;
+ struct radeon_state_atom tcg;
+ struct radeon_state_atom msc;
+ struct radeon_state_atom cst;
+ struct radeon_state_atom tam;
+ struct radeon_state_atom tf;
+ struct radeon_state_atom tex[6];
+ struct radeon_state_atom cube[6];
+ struct radeon_state_atom zbs;
+ struct radeon_state_atom mtl[2];
+ struct radeon_state_atom mat[9];
+ struct radeon_state_atom lit[8]; /* includes vec, scl commands */
+ struct radeon_state_atom ucp[6];
+ struct radeon_state_atom pix[6]; /* pixshader stages */
+ struct radeon_state_atom eye; /* eye pos */
+ struct radeon_state_atom grd; /* guard band clipping */
+ struct radeon_state_atom fog;
+ struct radeon_state_atom glt;
+ struct radeon_state_atom prf;
+ struct radeon_state_atom afs[2];
+ struct radeon_state_atom pvs;
+ struct radeon_state_atom vpi[2];
+ struct radeon_state_atom vpp[2];
+ struct radeon_state_atom atf;
+ struct radeon_state_atom spr;
+ struct radeon_state_atom ptp;
+ struct radeon_state_atom stp;
};
struct r200_state {
/* Derived state for internal purposes:
*/
- struct r200_colorbuffer_state color;
- struct r200_depthbuffer_state depth;
-#if 00
- struct r200_pixel_state pixel;
-#endif
- struct r200_scissor_state scissor;
- struct r200_stencilbuffer_state stencil;
- struct r200_stipple_state stipple;
struct r200_texture_state texture;
GLuint envneeded;
};
-/* Need refcounting on dma buffers:
- */
-struct r200_dma_buffer {
- int refcount; /* the number of retained regions in buf */
- drmBufPtr buf;
-};
-
-#define GET_START(rvb) (rmesa->r200Screen->gart_buffer_offset + \
- (rvb)->address - rmesa->dma.buf0_address + \
- (rvb)->start)
-
-/* A retained region, eg vertices for indexed vertices.
- */
-struct r200_dma_region {
- struct r200_dma_buffer *buf;
- char *address; /* == buf->address */
- int start, end, ptr; /* offsets from start of buf */
- int aos_start;
- int aos_stride;
- int aos_size;
-};
-
-
-struct r200_dma {
- /* Active dma region. Allocations for vertices and retained
- * regions come from here. Also used for emitting random vertices,
- * these may be flushed by calling flush_current();
- */
- struct r200_dma_region current;
-
- void (*flush)( r200ContextPtr );
-
- char *buf0_address; /* start of buf[0], for index calcs */
- GLuint nr_released_bufs; /* flush after so many buffers released */
-};
-
-struct r200_dri_mirror {
- __DRIcontextPrivate *context; /* DRI context */
- __DRIscreenPrivate *screen; /* DRI screen */
- __DRIdrawablePrivate *drawable; /* DRI drawable bound to this ctx */
- __DRIdrawablePrivate *readable; /* DRI readable bound to this ctx */
-
- drm_context_t hwContext;
- drm_hw_lock_t *hwLock;
- int fd;
- int drmMinor;
-};
-
-
#define R200_CMD_BUF_SZ (16*1024)
-struct r200_store {
- GLuint statenr;
- GLuint primnr;
- char cmd_buf[R200_CMD_BUF_SZ];
- int cmd_used;
- int elts_start;
-};
-
-
+#define R200_ELT_BUF_SZ (16*1024)
/* r200_tcl.c
*/
struct r200_tcl_info {
GLuint hw_primitive;
-/* hw can handle 12 components max */
- struct r200_dma_region *aos_components[12];
- GLuint nr_aos_components;
-
- GLuint *Elts;
+ int elt_used;
- struct r200_dma_region indexed_verts;
- struct r200_dma_region vertex_data[15];
};
/* r200_swtcl.c
*/
struct r200_swtcl_info {
- GLuint RenderIndex;
-
- /**
- * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is
- * installed in the Mesa state vector.
- */
- GLuint vertex_size;
- /**
- * Attributes instructing the Mesa TCL pipeline where / how to put vertex
- * data in the hardware buffer.
- */
- struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
- /**
- * Number of elements of \c ::vertex_attrs that are actually used.
- */
- GLuint vertex_attr_count;
-
- /**
- * Cached pointer to the buffer where Mesa will store vertex data.
- */
- GLubyte *verts;
-
- /* Fallback rasterization functions
- */
- r200_point_func draw_point;
- r200_line_func draw_line;
- r200_tri_func draw_tri;
-
- GLuint hw_primitive;
- GLenum render_primitive;
- GLuint numverts;
+ radeon_point_func draw_point;
+ radeon_line_func draw_line;
+ radeon_tri_func draw_tri;
/**
* Offset of the 4UB color data within a hardware (swtcl) vertex.
@@ -787,27 +572,10 @@ struct r200_swtcl_info {
* Should Mesa project vertex data or will the hardware do it?
*/
GLboolean needproj;
-
- struct r200_dma_region indexed_verts;
-};
-
-
-struct r200_ioctl {
- GLuint vertex_offset;
- GLuint vertex_size;
};
-#define R200_MAX_PRIMS 64
-
-
-
-struct r200_prim {
- GLuint start;
- GLuint end;
- GLuint prim;
-};
/* A maximum total of 29 elements per vertex: 3 floats for position, 3
* floats for normal, 4 floats for color, 4 bytes for secondary color,
@@ -822,9 +590,8 @@ struct r200_prim {
#define R200_MAX_VERTEX_SIZE ((3*6)+11)
-
struct r200_context {
- GLcontext *glCtx; /* Mesa context */
+ struct radeon_context radeon;
/* Driver and hardware state management
*/
@@ -832,56 +599,15 @@ struct r200_context {
struct r200_state state;
struct r200_vertex_program *curr_vp_hw;
- /* Texture object bookkeeping
- */
- unsigned nr_heaps;
- driTexHeap * texture_heaps[ RADEON_NR_TEX_HEAPS ];
- driTextureObject swapped;
- int texture_depth;
- float initialMaxAnisotropy;
-
- /* Rasterization and vertex state:
- */
- GLuint TclFallback;
- GLuint Fallback;
- GLuint NewGLState;
- DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */
-
/* Vertex buffers
*/
- struct r200_ioctl ioctl;
- struct r200_dma dma;
- struct r200_store store;
- /* A full state emit as of the first state emit in the main store, in case
- * the context is lost.
- */
- struct r200_store backup_store;
-
- /* Page flipping
- */
- GLuint doPageFlip;
-
- /* Busy waiting
- */
- GLuint do_usleeps;
- GLuint do_irqs;
- GLuint irqsEmitted;
- drm_radeon_irq_wait_t iw;
+ struct radeon_ioctl ioctl;
+ struct radeon_store store;
/* Clientdata textures;
*/
GLuint prefer_gart_client_texturing;
- /* Drawable, cliprect and scissor information
- */
- GLuint numClipRects; /* Cliprects for the draw buffer */
- drm_clip_rect_t *pClipRects;
- unsigned int lastStamp;
- GLboolean lost_context;
- GLboolean save_on_next_emit;
- radeonScreenPtr r200Screen; /* Screen private DRI data */
- drm_radeon_sarea_t *sarea; /* Private SAREA data */
-
/* TCL stuff
*/
GLmatrix TexGenMatrix[R200_MAX_TEXTURE_UNITS];
@@ -893,15 +619,6 @@ struct r200_context {
GLuint TexGenCompSel;
GLmatrix tmpmat;
- /* buffer swap
- */
- int64_t swap_ust;
- int64_t swap_missed_ust;
-
- GLuint swap_count;
- GLuint swap_missed_count;
-
-
/* r200_tcl.c
*/
struct r200_tcl_info tcl;
@@ -910,14 +627,6 @@ struct r200_context {
*/
struct r200_swtcl_info swtcl;
- /* Mirrors of some DRI state
- */
- struct r200_dri_mirror dri;
-
- /* Configuration cache
- */
- driOptionCache optionCache;
-
GLboolean using_hyperz;
GLboolean texmicrotile;
@@ -927,28 +636,10 @@ struct r200_context {
#define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx))
-static INLINE GLuint r200PackColor( GLuint cpp,
- GLubyte r, GLubyte g,
- GLubyte b, GLubyte a )
-{
- switch ( cpp ) {
- case 2:
- return PACK_COLOR_565( r, g, b );
- case 4:
- return PACK_COLOR_8888( a, r, g, b );
- default:
- return 0;
- }
-}
-
-
extern void r200DestroyContext( __DRIcontextPrivate *driContextPriv );
extern GLboolean r200CreateContext( const __GLcontextModes *glVisual,
__DRIcontextPrivate *driContextPriv,
void *sharedContextPrivate);
-extern void r200SwapBuffers( __DRIdrawablePrivate *dPriv );
-extern void r200CopySubBuffer( __DRIdrawablePrivate * dPriv,
- int x, int y, int w, int h );
extern GLboolean r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
__DRIdrawablePrivate *driDrawPriv,
__DRIdrawablePrivate *driReadPriv );
@@ -957,28 +648,9 @@ extern GLboolean r200UnbindContext( __DRIcontextPrivate *driContextPriv );
/* ================================================================
* Debugging:
*/
-#define DO_DEBUG 1
-#if DO_DEBUG
-extern int R200_DEBUG;
-#else
-#define R200_DEBUG 0
-#endif
+#define R200_DEBUG RADEON_DEBUG
+
-#define DEBUG_TEXTURE 0x001
-#define DEBUG_STATE 0x002
-#define DEBUG_IOCTL 0x004
-#define DEBUG_PRIMS 0x008
-#define DEBUG_VERTS 0x010
-#define DEBUG_FALLBACKS 0x020
-#define DEBUG_VFMT 0x040
-#define DEBUG_CODEGEN 0x080
-#define DEBUG_VERBOSE 0x100
-#define DEBUG_DRI 0x200
-#define DEBUG_DMA 0x400
-#define DEBUG_SANITY 0x800
-#define DEBUG_SYNC 0x1000
-#define DEBUG_PIXEL 0x2000
-#define DEBUG_MEMORY 0x4000
#endif /* __R200_CONTEXT_H__ */
diff --git a/r200/r200_fragshader.c b/r200/r200_fragshader.c
index d514b28..85c1b7b 100644
--- a/r200/r200_fragshader.c
+++ b/r200/r200_fragshader.c
@@ -522,7 +522,7 @@ static void r200UpdateFSConstants( GLcontext *ctx )
CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.GlobalConstants[i][2]);
CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.GlobalConstants[i][3]);
}
- rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor (
+ rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = radeonPackColor (
4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] );
}
}
diff --git a/r200/r200_ioctl.c b/r200/r200_ioctl.c
index 0741e57..b238adb 100644
--- a/r200/r200_ioctl.c
+++ b/r200/r200_ioctl.c
@@ -31,7 +31,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
-
+
#include <sched.h>
#include <errno.h>
@@ -41,6 +41,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/context.h"
#include "swrast/swrast.h"
+
+
+#include "radeon_common.h"
+#include "radeon_lock.h"
#include "r200_context.h"
#include "r200_state.h"
#include "r200_ioctl.h"
@@ -54,635 +58,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R200_TIMEOUT 512
#define R200_IDLE_RETRY 16
-
-static void r200WaitForIdle( r200ContextPtr rmesa );
-
-
-/* At this point we were in FlushCmdBufLocked but we had lost our context, so
- * we need to unwire our current cmdbuf, hook the one with the saved state in
- * it, flush it, and then put the current one back. This is so commands at the
- * start of a cmdbuf can rely on the state being kept from the previous one.
- */
-static void r200BackUpAndEmitLostStateLocked( r200ContextPtr rmesa )
-{
- GLuint nr_released_bufs;
- struct r200_store saved_store;
-
- if (rmesa->backup_store.cmd_used == 0)
- return;
-
- if (R200_DEBUG & DEBUG_STATE)
- fprintf(stderr, "Emitting backup state on lost context\n");
-
- rmesa->lost_context = GL_FALSE;
-
- nr_released_bufs = rmesa->dma.nr_released_bufs;
- saved_store = rmesa->store;
- rmesa->dma.nr_released_bufs = 0;
- rmesa->store = rmesa->backup_store;
- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
- rmesa->dma.nr_released_bufs = nr_released_bufs;
- rmesa->store = saved_store;
-}
-
-int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller )
-{
- int ret, i;
- drm_radeon_cmd_buffer_t cmd;
-
- if (rmesa->lost_context)
- r200BackUpAndEmitLostStateLocked( rmesa );
-
- if (R200_DEBUG & DEBUG_IOCTL) {
- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
-
- if (0 & R200_DEBUG & DEBUG_VERBOSE)
- for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
- fprintf(stderr, "%d: %x\n", i/4,
- *(int *)(&rmesa->store.cmd_buf[i]));
- }
-
- if (R200_DEBUG & DEBUG_DMA)
- fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
- rmesa->dma.nr_released_bufs);
-
-
- if (R200_DEBUG & DEBUG_SANITY) {
- if (rmesa->state.scissor.enabled)
- ret = r200SanityCmdBuffer( rmesa,
- rmesa->state.scissor.numClipRects,
- rmesa->state.scissor.pClipRects);
- else
- ret = r200SanityCmdBuffer( rmesa,
- rmesa->numClipRects,
- rmesa->pClipRects);
- if (ret) {
- fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);
- goto out;
- }
- }
-
-
- if (R200_DEBUG & DEBUG_MEMORY) {
- if (! driValidateTextureHeaps( rmesa->texture_heaps, rmesa->nr_heaps,
- & rmesa->swapped ) ) {
- fprintf( stderr, "%s: texture memory is inconsistent - expect "
- "mangled textures\n", __FUNCTION__ );
- }
- }
-
-
- cmd.bufsz = rmesa->store.cmd_used;
- cmd.buf = rmesa->store.cmd_buf;
-
- if (rmesa->state.scissor.enabled) {
- cmd.nbox = rmesa->state.scissor.numClipRects;
- cmd.boxes = (drm_clip_rect_t *)rmesa->state.scissor.pClipRects;
- } else {
- cmd.nbox = rmesa->numClipRects;
- cmd.boxes = (drm_clip_rect_t *)rmesa->pClipRects;
- }
-
- ret = drmCommandWrite( rmesa->dri.fd,
- DRM_RADEON_CMDBUF,
- &cmd, sizeof(cmd) );
-
- if (ret)
- fprintf(stderr, "drmCommandWrite: %d\n", ret);
-
- if (R200_DEBUG & DEBUG_SYNC) {
- fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
- r200WaitForIdleLocked( rmesa );
- }
-
-
- out:
- rmesa->store.primnr = 0;
- rmesa->store.statenr = 0;
- rmesa->store.cmd_used = 0;
- rmesa->dma.nr_released_bufs = 0;
- rmesa->save_on_next_emit = 1;
-
- return ret;
-}
-
-
-/* Note: does not emit any commands to avoid recursion on
- * r200AllocCmdBuf.
- */
-void r200FlushCmdBuf( r200ContextPtr rmesa, const char *caller )
-{
- int ret;
-
- LOCK_HARDWARE( rmesa );
-
- ret = r200FlushCmdBufLocked( rmesa, caller );
-
- UNLOCK_HARDWARE( rmesa );
-
- if (ret) {
- fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret);
- exit(ret);
- }
-}
-
-
-/* =============================================================
- * Hardware vertex buffer handling
- */
-
-
-void r200RefillCurrentDmaRegion( r200ContextPtr rmesa )
-{
- struct r200_dma_buffer *dmabuf;
- int fd = rmesa->dri.fd;
- int index = 0;
- int size = 0;
- drmDMAReq dma;
- int ret;
-
- if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- if (rmesa->dma.flush) {
- rmesa->dma.flush( rmesa );
- }
-
- if (rmesa->dma.current.buf)
- r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
-
- if (rmesa->dma.nr_released_bufs > 4)
- r200FlushCmdBuf( rmesa, __FUNCTION__ );
-
- dma.context = rmesa->dri.hwContext;
- dma.send_count = 0;
- dma.send_list = NULL;
- dma.send_sizes = NULL;
- dma.flags = 0;
- dma.request_count = 1;
- dma.request_size = RADEON_BUFFER_SIZE;
- dma.request_list = &index;
- dma.request_sizes = &size;
- dma.granted_count = 0;
-
- LOCK_HARDWARE(rmesa); /* no need to validate */
-
- while (1) {
- ret = drmDMA( fd, &dma );
- if (ret == 0)
- break;
-
- if (rmesa->dma.nr_released_bufs) {
- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
- }
-
- if (rmesa->do_usleeps) {
- UNLOCK_HARDWARE( rmesa );
- DO_USLEEP( 1 );
- LOCK_HARDWARE( rmesa );
- }
- }
-
- UNLOCK_HARDWARE(rmesa);
-
- if (R200_DEBUG & DEBUG_DMA)
- fprintf(stderr, "Allocated buffer %d\n", index);
-
- dmabuf = CALLOC_STRUCT( r200_dma_buffer );
- dmabuf->buf = &rmesa->r200Screen->buffers->list[index];
- dmabuf->refcount = 1;
-
- rmesa->dma.current.buf = dmabuf;
- rmesa->dma.current.address = dmabuf->buf->address;
- rmesa->dma.current.end = dmabuf->buf->total;
- rmesa->dma.current.start = 0;
- rmesa->dma.current.ptr = 0;
-}
-
-void r200ReleaseDmaRegion( r200ContextPtr rmesa,
- struct r200_dma_region *region,
- const char *caller )
-{
- if (R200_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
-
- if (!region->buf)
- return;
-
- if (rmesa->dma.flush)
- rmesa->dma.flush( rmesa );
-
- if (--region->buf->refcount == 0) {
- drm_radeon_cmd_header_t *cmd;
-
- if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
- fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
- region->buf->buf->idx);
-
- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sizeof(*cmd),
- __FUNCTION__ );
- cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
- cmd->dma.buf_idx = region->buf->buf->idx;
- FREE(region->buf);
- rmesa->dma.nr_released_bufs++;
- }
-
- region->buf = NULL;
- region->start = 0;
-}
-
-/* Allocates a region from rmesa->dma.current. If there isn't enough
- * space in current, grab a new buffer (and discard what was left of current)
- */
-void r200AllocDmaRegion( r200ContextPtr rmesa,
- struct r200_dma_region *region,
- int bytes,
- int alignment )
-{
- if (R200_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
-
- if (rmesa->dma.flush)
- rmesa->dma.flush( rmesa );
-
- if (region->buf)
- r200ReleaseDmaRegion( rmesa, region, __FUNCTION__ );
-
- alignment--;
- rmesa->dma.current.start = rmesa->dma.current.ptr =
- (rmesa->dma.current.ptr + alignment) & ~alignment;
-
- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
- r200RefillCurrentDmaRegion( rmesa );
-
- region->start = rmesa->dma.current.start;
- region->ptr = rmesa->dma.current.start;
- region->end = rmesa->dma.current.start + bytes;
- region->address = rmesa->dma.current.address;
- region->buf = rmesa->dma.current.buf;
- region->buf->refcount++;
-
- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
- rmesa->dma.current.start =
- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
-
- assert( rmesa->dma.current.ptr <= rmesa->dma.current.end );
-}
-
-/* ================================================================
- * SwapBuffers with client-side throttling
- */
-
-static uint32_t r200GetLastFrame(r200ContextPtr rmesa)
-{
- drm_radeon_getparam_t gp;
- int ret;
- uint32_t frame;
-
- gp.param = RADEON_PARAM_LAST_FRAME;
- gp.value = (int *)&frame;
- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
- &gp, sizeof(gp) );
- if ( ret ) {
- fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
- exit(1);
- }
-
- return frame;
-}
-
-static void r200EmitIrqLocked( r200ContextPtr rmesa )
-{
- drm_radeon_irq_emit_t ie;
- int ret;
-
- ie.irq_seq = &rmesa->iw.irq_seq;
- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT,
- &ie, sizeof(ie) );
- if ( ret ) {
- fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret );
- exit(1);
- }
-}
-
-
-static void r200WaitIrq( r200ContextPtr rmesa )
-{
- int ret;
-
- do {
- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
- &rmesa->iw, sizeof(rmesa->iw) );
- } while (ret && (errno == EINTR || errno == EBUSY));
-
- if ( ret ) {
- fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
- exit(1);
- }
-}
-
-
-static void r200WaitForFrameCompletion( r200ContextPtr rmesa )
-{
- drm_radeon_sarea_t *sarea = rmesa->sarea;
-
- if (rmesa->do_irqs) {
- if (r200GetLastFrame(rmesa) < sarea->last_frame) {
- if (!rmesa->irqsEmitted) {
- while (r200GetLastFrame (rmesa) < sarea->last_frame)
- ;
- }
- else {
- UNLOCK_HARDWARE( rmesa );
- r200WaitIrq( rmesa );
- LOCK_HARDWARE( rmesa );
- }
- rmesa->irqsEmitted = 10;
- }
-
- if (rmesa->irqsEmitted) {
- r200EmitIrqLocked( rmesa );
- rmesa->irqsEmitted--;
- }
- }
- else {
- while (r200GetLastFrame (rmesa) < sarea->last_frame) {
- UNLOCK_HARDWARE( rmesa );
- if (rmesa->do_usleeps)
- DO_USLEEP( 1 );
- LOCK_HARDWARE( rmesa );
- }
- }
-}
-
-
-
-/* Copy the back color buffer to the front color buffer.
- */
-void r200CopyBuffer( __DRIdrawablePrivate *dPriv,
- const drm_clip_rect_t *rect)
-{
- r200ContextPtr rmesa;
- GLint nbox, i, ret;
- GLboolean missed_target;
- int64_t ust;
- __DRIscreenPrivate *psp = dPriv->driScreenPriv;
-
- assert(dPriv);
- assert(dPriv->driContextPriv);
- assert(dPriv->driContextPriv->driverPrivate);
-
- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
-
- if ( R200_DEBUG & DEBUG_IOCTL ) {
- fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *)rmesa->glCtx );
- }
-
- R200_FIREVERTICES( rmesa );
-
- LOCK_HARDWARE( rmesa );
-
-
- /* Throttle the frame rate -- only allow one pending swap buffers
- * request at a time.
- */
- r200WaitForFrameCompletion( rmesa );
- if (!rect)
- {
- UNLOCK_HARDWARE( rmesa );
- driWaitForVBlank( dPriv, & missed_target );
- LOCK_HARDWARE( rmesa );
- }
-
- nbox = dPriv->numClipRects; /* must be in locked region */
-
- for ( i = 0 ; i < nbox ; ) {
- GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
- drm_clip_rect_t *box = dPriv->pClipRects;
- drm_clip_rect_t *b = rmesa->sarea->boxes;
- GLint n = 0;
-
- for ( ; i < nr ; i++ ) {
-
- *b = box[i];
-
- if (rect)
- {
- if (rect->x1 > b->x1)
- b->x1 = rect->x1;
- if (rect->y1 > b->y1)
- b->y1 = rect->y1;
- if (rect->x2 < b->x2)
- b->x2 = rect->x2;
- if (rect->y2 < b->y2)
- b->y2 = rect->y2;
-
- if (b->x1 >= b->x2 || b->y1 >= b->y2)
- continue;
- }
-
- b++;
- n++;
- }
- rmesa->sarea->nbox = n;
-
- if (!n)
- continue;
-
- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
-
- if ( ret ) {
- fprintf( stderr, "DRM_R200_SWAP_BUFFERS: return = %d\n", ret );
- UNLOCK_HARDWARE( rmesa );
- exit( 1 );
- }
- }
-
- UNLOCK_HARDWARE( rmesa );
- if (!rect)
- {
- rmesa->hw.all_dirty = GL_TRUE;
-
- rmesa->swap_count++;
- (*psp->systemTime->getUST)( & ust );
- if ( missed_target ) {
- rmesa->swap_missed_count++;
- rmesa->swap_missed_ust = ust - rmesa->swap_ust;
- }
-
- rmesa->swap_ust = ust;
-
- sched_yield();
- }
-}
-
-void r200PageFlip( __DRIdrawablePrivate *dPriv )
-{
- r200ContextPtr rmesa;
- GLint ret;
- GLboolean missed_target;
- __DRIscreenPrivate *psp = dPriv->driScreenPriv;
-
- assert(dPriv);
- assert(dPriv->driContextPriv);
- assert(dPriv->driContextPriv->driverPrivate);
-
- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
-
- if ( R200_DEBUG & DEBUG_IOCTL ) {
- fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
- rmesa->sarea->pfCurrentPage);
- }
-
- R200_FIREVERTICES( rmesa );
- LOCK_HARDWARE( rmesa );
-
- if (!dPriv->numClipRects) {
- UNLOCK_HARDWARE( rmesa );
- usleep( 10000 ); /* throttle invisible client 10ms */
- return;
- }
-
- /* Need to do this for the perf box placement:
- */
- {
- drm_clip_rect_t *box = dPriv->pClipRects;
- drm_clip_rect_t *b = rmesa->sarea->boxes;
- b[0] = box[0];
- rmesa->sarea->nbox = 1;
- }
-
- /* Throttle the frame rate -- only allow a few pending swap buffers
- * request at a time.
- */
- r200WaitForFrameCompletion( rmesa );
- UNLOCK_HARDWARE( rmesa );
- driWaitForVBlank( dPriv, & missed_target );
- if ( missed_target ) {
- rmesa->swap_missed_count++;
- (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
- }
- LOCK_HARDWARE( rmesa );
-
- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
-
- UNLOCK_HARDWARE( rmesa );
-
- if ( ret ) {
- fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
- exit( 1 );
- }
-
- rmesa->swap_count++;
- (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
-
-#if 000
- if ( rmesa->sarea->pfCurrentPage == 1 ) {
- rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
- rmesa->state.color.drawPitch = rmesa->r200Screen->frontPitch;
- } else {
- rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
- rmesa->state.color.drawPitch = rmesa->r200Screen->backPitch;
- }
-
- R200_STATECHANGE( rmesa, ctx );
- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset
- + rmesa->r200Screen->fbLocation;
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch;
- if (rmesa->sarea->tiling_enabled) {
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
- }
-#else
- /* Get ready for drawing next frame. Update the renderbuffers'
- * flippedOffset/Pitch fields so we draw into the right place.
- */
- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
- rmesa->sarea->pfCurrentPage);
-
-
- r200UpdateDrawBuffer(rmesa->glCtx);
-#endif
-}
-
-
-/* ================================================================
- * Buffer clear
- */
-static void r200Clear( GLcontext *ctx, GLbitfield mask )
+static void r200KernelClear(GLcontext *ctx, GLuint flags)
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
- GLuint flags = 0;
- GLuint color_mask = 0;
- GLint ret, i;
- GLint cx, cy, cw, ch;
-
- if ( R200_DEBUG & DEBUG_IOCTL ) {
- fprintf( stderr, "r200Clear\n");
- }
-
- {
- LOCK_HARDWARE( rmesa );
- UNLOCK_HARDWARE( rmesa );
- if ( dPriv->numClipRects == 0 )
- return;
- }
-
- r200Flush( ctx );
-
- if ( mask & BUFFER_BIT_FRONT_LEFT ) {
- flags |= RADEON_FRONT;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
- mask &= ~BUFFER_BIT_FRONT_LEFT;
- }
-
- if ( mask & BUFFER_BIT_BACK_LEFT ) {
- flags |= RADEON_BACK;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
- mask &= ~BUFFER_BIT_BACK_LEFT;
- }
-
- if ( mask & BUFFER_BIT_DEPTH ) {
- flags |= RADEON_DEPTH;
- mask &= ~BUFFER_BIT_DEPTH;
- }
-
- if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
- flags |= RADEON_STENCIL;
- mask &= ~BUFFER_BIT_STENCIL;
- }
-
- if ( mask ) {
- if (R200_DEBUG & DEBUG_FALLBACKS)
- fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
- _swrast_Clear( ctx, mask );
- }
-
- if ( !flags )
- return;
-
- if (rmesa->using_hyperz) {
- flags |= RADEON_USE_COMP_ZBUF;
-/* if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200)
- flags |= RADEON_USE_HIERZ; */
- if (!(rmesa->state.stencil.hwBuffer) ||
- ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
- ((rmesa->state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
- flags |= RADEON_CLEAR_FASTZ;
- }
- }
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
+ GLint cx, cy, cw, ch, ret;
+ GLuint i;
- LOCK_HARDWARE( rmesa );
-
- /* compute region after locking: */
- cx = ctx->DrawBuffer->_Xmin;
- cy = ctx->DrawBuffer->_Ymin;
- cw = ctx->DrawBuffer->_Xmax - cx;
- ch = ctx->DrawBuffer->_Ymax - cy;
-
- /* Flip top to bottom */
- cx += dPriv->x;
- cy = dPriv->y + dPriv->h - cy - ch;
+ LOCK_HARDWARE( &rmesa->radeon );
/* Throttle the number of clear ioctls we do.
*/
@@ -693,7 +76,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
gp.param = RADEON_PARAM_LAST_CLEAR;
gp.value = (int *)&clear;
- ret = drmCommandWriteRead( rmesa->dri.fd,
+ ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
if ( ret ) {
@@ -703,24 +86,34 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
/* Clear throttling needs more thought.
*/
- if ( rmesa->sarea->last_clear - clear <= 25 ) {
+ if ( rmesa->radeon.sarea->last_clear - clear <= 25 ) {
break;
}
-
- if (rmesa->do_usleeps) {
- UNLOCK_HARDWARE( rmesa );
+
+ if (rmesa->radeon.do_usleeps) {
+ UNLOCK_HARDWARE( &rmesa->radeon );
DO_USLEEP( 1 );
- LOCK_HARDWARE( rmesa );
+ LOCK_HARDWARE( &rmesa->radeon );
}
}
/* Send current state to the hardware */
- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+
+
+ /* compute region after locking: */
+ cx = ctx->DrawBuffer->_Xmin;
+ cy = ctx->DrawBuffer->_Ymin;
+ cw = ctx->DrawBuffer->_Xmax - cx;
+ ch = ctx->DrawBuffer->_Ymax - cy;
+ /* Flip top to bottom */
+ cx += dPriv->x;
+ cy = dPriv->y + dPriv->h - cy - ch;
for ( i = 0 ; i < dPriv->numClipRects ; ) {
GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
drm_clip_rect_t *box = dPriv->pClipRects;
- drm_clip_rect_t *b = rmesa->sarea->boxes;
+ drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
drm_radeon_clear_t clear;
drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
GLint n = 0;
@@ -755,17 +148,17 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
}
}
- rmesa->sarea->nbox = n;
+ rmesa->radeon.sarea->nbox = n;
clear.flags = flags;
- clear.clear_color = rmesa->state.color.clear;
- clear.clear_depth = rmesa->state.depth.clear; /* needed for hyperz */
+ clear.clear_color = rmesa->radeon.state.color.clear;
+ clear.clear_depth = rmesa->radeon.state.depth.clear; /* needed for hyperz */
clear.color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
- clear.depth_mask = rmesa->state.stencil.clear;
+ clear.depth_mask = rmesa->radeon.state.stencil.clear;
clear.depth_boxes = depth_boxes;
n--;
- b = rmesa->sarea->boxes;
+ b = rmesa->radeon.sarea->boxes;
for ( ; n >= 0 ; n-- ) {
depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
@@ -774,84 +167,94 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
depth_boxes[n].f[CLEAR_DEPTH] = ctx->Depth.Clear;
}
- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
+ ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
&clear, sizeof(clear));
if ( ret ) {
- UNLOCK_HARDWARE( rmesa );
+ UNLOCK_HARDWARE( &rmesa->radeon );
fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
exit( 1 );
}
}
-
- UNLOCK_HARDWARE( rmesa );
- rmesa->hw.all_dirty = GL_TRUE;
+ UNLOCK_HARDWARE( &rmesa->radeon );
}
+/* ================================================================
+ * Buffer clear
+ */
+static void r200Clear( GLcontext *ctx, GLbitfield mask )
+{
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
+ GLuint flags = 0;
+ GLuint color_mask = 0;
+ GLuint orig_mask = mask;
+ if ( R200_DEBUG & RADEON_IOCTL ) {
+ if (rmesa->radeon.sarea)
+ fprintf( stderr, "r200Clear %x %d\n", mask, rmesa->radeon.sarea->pfCurrentPage);
+ else
+ fprintf( stderr, "r200Clear %x radeon->sarea is NULL\n", mask);
+ }
-void r200WaitForIdleLocked( r200ContextPtr rmesa )
-{
- int ret;
- int i = 0;
-
- do {
- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_CP_IDLE);
- if (ret)
- DO_USLEEP( 1 );
- } while (ret && ++i < 100);
-
- if ( ret < 0 ) {
- UNLOCK_HARDWARE( rmesa );
- fprintf( stderr, "Error: R200 timed out... exiting\n" );
- exit( -1 );
- }
-}
+ {
+ LOCK_HARDWARE( &rmesa->radeon );
+ UNLOCK_HARDWARE( &rmesa->radeon );
+ if ( dPriv->numClipRects == 0 )
+ return;
+ }
+ radeonFlush( ctx );
-static void r200WaitForIdle( r200ContextPtr rmesa )
-{
- LOCK_HARDWARE(rmesa);
- r200WaitForIdleLocked( rmesa );
- UNLOCK_HARDWARE(rmesa);
-}
+ if ( mask & BUFFER_BIT_FRONT_LEFT ) {
+ flags |= RADEON_FRONT;
+ color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+ mask &= ~BUFFER_BIT_FRONT_LEFT;
+ }
+ if ( mask & BUFFER_BIT_BACK_LEFT ) {
+ flags |= RADEON_BACK;
+ color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+ mask &= ~BUFFER_BIT_BACK_LEFT;
+ }
-void r200Flush( GLcontext *ctx )
-{
- r200ContextPtr rmesa = R200_CONTEXT( ctx );
+ if ( mask & BUFFER_BIT_DEPTH ) {
+ flags |= RADEON_DEPTH;
+ mask &= ~BUFFER_BIT_DEPTH;
+ }
- if (R200_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s\n", __FUNCTION__);
+ if ( (mask & BUFFER_BIT_STENCIL) ) {
+ flags |= RADEON_STENCIL;
+ mask &= ~BUFFER_BIT_STENCIL;
+ }
- if (rmesa->dma.flush)
- rmesa->dma.flush( rmesa );
+ if ( mask ) {
+ if (R200_DEBUG & RADEON_FALLBACKS)
+ fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
+ _swrast_Clear( ctx, mask );
+ }
- r200EmitState( rmesa );
-
- if (rmesa->store.cmd_used)
- r200FlushCmdBuf( rmesa, __FUNCTION__ );
-}
+ if ( !flags )
+ return;
-/* Make sure all commands have been sent to the hardware and have
- * completed processing.
- */
-void r200Finish( GLcontext *ctx )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
- r200Flush( ctx );
+ if (rmesa->using_hyperz) {
+ flags |= RADEON_USE_COMP_ZBUF;
+/* if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
+ flags |= RADEON_USE_HIERZ; */
+ if (!((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+ ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
+ flags |= RADEON_CLEAR_FASTZ;
+ }
+ }
- if (rmesa->do_irqs) {
- LOCK_HARDWARE( rmesa );
- r200EmitIrqLocked( rmesa );
- UNLOCK_HARDWARE( rmesa );
- r200WaitIrq( rmesa );
+ if (rmesa->radeon.radeonScreen->kernel_mm)
+ radeonUserClear(ctx, orig_mask);
+ else {
+ r200KernelClear(ctx, flags);
+ rmesa->radeon.hw.all_dirty = GL_TRUE;
}
- else
- r200WaitForIdle( rmesa );
}
-
/* This version of AllocateMemoryMESA allocates only GART memory, and
* only does so after the point at which the driver has been
* initialized.
@@ -862,7 +265,7 @@ void r200Finish( GLcontext *ctx )
* device fd.
*/
void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
- GLfloat readfreq, GLfloat writefreq,
+ GLfloat readfreq, GLfloat writefreq,
GLfloat priority)
{
GET_CURRENT_CONTEXT(ctx);
@@ -871,11 +274,11 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
drm_radeon_mem_alloc_t alloc;
int ret;
- if (R200_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq,
+ if (R200_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq,
writefreq, priority);
- if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map)
+ if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map)
return NULL;
if (getenv("R200_NO_ALLOC"))
@@ -886,17 +289,17 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
alloc.size = size;
alloc.region_offset = &region_offset;
- ret = drmCommandWriteRead( rmesa->r200Screen->driScreen->fd,
+ ret = drmCommandWriteRead( rmesa->radeon.radeonScreen->driScreen->fd,
DRM_RADEON_ALLOC,
&alloc, sizeof(alloc));
-
+
if (ret) {
fprintf(stderr, "%s: DRM_RADEON_ALLOC ret %d\n", __FUNCTION__, ret);
return NULL;
}
-
+
{
- char *region_start = (char *)rmesa->r200Screen->gartTextures.map;
+ char *region_start = (char *)rmesa->radeon.radeonScreen->gartTextures.map;
return (void *)(region_start + region_offset);
}
}
@@ -911,31 +314,31 @@ void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer)
drm_radeon_mem_free_t memfree;
int ret;
- if (R200_DEBUG & DEBUG_IOCTL)
+ if (R200_DEBUG & RADEON_IOCTL)
fprintf(stderr, "%s %p\n", __FUNCTION__, pointer);
- if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map) {
+ if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) {
fprintf(stderr, "%s: no context\n", __FUNCTION__);
return;
}
- region_offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
+ region_offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
- if (region_offset < 0 ||
- region_offset > rmesa->r200Screen->gartTextures.size) {
+ if (region_offset < 0 ||
+ region_offset > rmesa->radeon.radeonScreen->gartTextures.size) {
fprintf(stderr, "offset %d outside range 0..%d\n", region_offset,
- rmesa->r200Screen->gartTextures.size);
+ rmesa->radeon.radeonScreen->gartTextures.size);
return;
}
memfree.region = RADEON_MEM_REGION_GART;
memfree.region_offset = region_offset;
-
- ret = drmCommandWrite( rmesa->r200Screen->driScreen->fd,
+
+ ret = drmCommandWrite( rmesa->radeon.radeonScreen->driScreen->fd,
DRM_RADEON_FREE,
&memfree, sizeof(memfree));
-
- if (ret)
+
+ if (ret)
fprintf(stderr, "%s: DRM_RADEON_FREE ret %d\n", __FUNCTION__, ret);
}
@@ -956,32 +359,32 @@ GLuint r200GetMemoryOffsetMESA(__DRIscreen *screen, const GLvoid *pointer)
card_offset = r200GartOffsetFromVirtual( rmesa, pointer );
- return card_offset - rmesa->r200Screen->gart_base;
+ return card_offset - rmesa->radeon.radeonScreen->gart_base;
}
GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
GLint size )
{
- ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
+ ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
int valid = (size >= 0 &&
offset >= 0 &&
- offset + size < rmesa->r200Screen->gartTextures.size);
+ offset + size < rmesa->radeon.radeonScreen->gartTextures.size);
- if (R200_DEBUG & DEBUG_IOCTL)
+ if (R200_DEBUG & RADEON_IOCTL)
fprintf(stderr, "r200IsGartMemory( %p ) : %d\n", pointer, valid );
-
+
return valid;
}
GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
{
- ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
+ ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
- if (offset < 0 || offset > rmesa->r200Screen->gartTextures.size)
+ if (offset < 0 || offset > rmesa->radeon.radeonScreen->gartTextures.size)
return ~0;
else
- return rmesa->r200Screen->gart_texture_offset + offset;
+ return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
}
@@ -989,7 +392,7 @@ GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
void r200InitIoctlFuncs( struct dd_function_table *functions )
{
functions->Clear = r200Clear;
- functions->Finish = r200Finish;
- functions->Flush = r200Flush;
+ functions->Finish = radeonFinish;
+ functions->Flush = radeonFlush;
}
diff --git a/r200/r200_ioctl.h b/r200/r200_ioctl.h
index f7458e4..8d51aef 100644
--- a/r200/r200_ioctl.h
+++ b/r200/r200_ioctl.h
@@ -37,65 +37,31 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/simple_list.h"
#include "radeon_dri.h"
-#include "r200_lock.h"
+
+#include "radeon_bocs_wrapper.h"
#include "xf86drm.h"
#include "drm.h"
#include "radeon_drm.h"
-extern void r200EmitState( r200ContextPtr rmesa );
+extern void r200EmitMaxVtxIndex(r200ContextPtr rmesa, int count);
extern void r200EmitVertexAOS( r200ContextPtr rmesa,
- GLuint vertex_size,
- GLuint offset );
+ GLuint vertex_size,
+ struct radeon_bo *bo,
+ GLuint offset );
extern void r200EmitVbufPrim( r200ContextPtr rmesa,
GLuint primitive,
GLuint vertex_nr );
-extern void r200FlushElts( r200ContextPtr rmesa );
+extern void r200FlushElts(GLcontext *ctx);
extern GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
GLuint primitive,
GLuint min_nr );
-extern void r200EmitAOS( r200ContextPtr rmesa,
- struct r200_dma_region **regions,
- GLuint n,
- GLuint offset );
-
-extern void r200EmitBlit( r200ContextPtr rmesa,
- GLuint color_fmt,
- GLuint src_pitch,
- GLuint src_offset,
- GLuint dst_pitch,
- GLuint dst_offset,
- GLint srcx, GLint srcy,
- GLint dstx, GLint dsty,
- GLuint w, GLuint h );
-
-extern void r200EmitWait( r200ContextPtr rmesa, GLuint flags );
-
-extern void r200FlushCmdBuf( r200ContextPtr rmesa, const char * );
-extern int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller );
-
-extern void r200RefillCurrentDmaRegion( r200ContextPtr rmesa );
-
-extern void r200AllocDmaRegion( r200ContextPtr rmesa,
- struct r200_dma_region *region,
- int bytes,
- int alignment );
-
-extern void r200ReleaseDmaRegion( r200ContextPtr rmesa,
- struct r200_dma_region *region,
- const char *caller );
-
-extern void r200CopyBuffer( __DRIdrawablePrivate *drawable,
- const drm_clip_rect_t *rect);
-extern void r200PageFlip( __DRIdrawablePrivate *drawable );
-extern void r200Flush( GLcontext *ctx );
-extern void r200Finish( GLcontext *ctx );
-extern void r200WaitForIdleLocked( r200ContextPtr rmesa );
-extern void r200WaitForVBlank( r200ContextPtr rmesa );
+extern void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset);
+
extern void r200InitIoctlFuncs( struct dd_function_table *functions );
extern void *r200AllocateMemoryMESA( __DRIscreen *screen, GLsizei size, GLfloat readfreq,
@@ -119,8 +85,8 @@ void r200SetUpAtomList( r200ContextPtr rmesa );
*/
#define R200_NEWPRIM( rmesa ) \
do { \
- if ( rmesa->dma.flush ) \
- rmesa->dma.flush( rmesa ); \
+ if ( rmesa->radeon.dma.flush ) \
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \
} while (0)
/* Can accomodate several state changes and primitive changes without
@@ -130,22 +96,32 @@ do { \
do { \
R200_NEWPRIM( rmesa ); \
rmesa->hw.ATOM.dirty = GL_TRUE; \
- rmesa->hw.is_dirty = GL_TRUE; \
+ rmesa->radeon.hw.is_dirty = GL_TRUE; \
} while (0)
+#define R200_SET_STATE( rmesa, ATOM, index, newvalue ) \
+ do { \
+ uint32_t __index = (index); \
+ uint32_t __dword = (newvalue); \
+ if (__dword != (rmesa)->hw.ATOM.cmd[__index]) { \
+ R200_STATECHANGE( (rmesa), ATOM ); \
+ (rmesa)->hw.ATOM.cmd[__index] = __dword; \
+ } \
+ } while(0)
+
#define R200_DB_STATE( ATOM ) \
memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \
rmesa->hw.ATOM.cmd_size * 4)
static INLINE int R200_DB_STATECHANGE(
r200ContextPtr rmesa,
- struct r200_state_atom *atom )
+ struct radeon_state_atom *atom )
{
if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
- int *tmp;
+ GLuint *tmp;
R200_NEWPRIM( rmesa );
atom->dirty = GL_TRUE;
- rmesa->hw.is_dirty = GL_TRUE;
+ rmesa->radeon.hw.is_dirty = GL_TRUE;
tmp = atom->cmd;
atom->cmd = atom->lastcmd;
atom->lastcmd = tmp;
@@ -156,54 +132,47 @@ static INLINE int R200_DB_STATECHANGE(
}
-/* Fire the buffered vertices no matter what.
- */
-#define R200_FIREVERTICES( rmesa ) \
-do { \
- if ( rmesa->store.cmd_used || rmesa->dma.flush ) { \
- r200Flush( rmesa->glCtx ); \
- } \
-} while (0)
-
/* Command lengths. Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
* are available, you will also be adding an rmesa->state.max_state_size because
* r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
*/
-#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int))
-#define VERT_AOS_BUFSZ (5 * sizeof(int))
+#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2))
+#define VERT_AOS_BUFSZ (5)
#define ELTS_BUFSZ(nr) (12 + nr * 2)
-#define VBUF_BUFSZ (3 * sizeof(int))
-
-/* Ensure that a minimum amount of space is available in the command buffer.
- * This is used to ensure atomicity of state updates with the rendering requests
- * that rely on them.
- *
- * An alternative would be to implement a "soft lock" such that when the buffer
- * wraps at an inopportune time, we grab the lock, flush the current buffer,
- * and hang on to the lock until the critical section is finished and we flush
- * the buffer again and unlock.
- */
-static INLINE void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes )
-{
- if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
- r200FlushCmdBuf( rmesa, __FUNCTION__ );
- assert( bytes <= R200_CMD_BUF_SZ );
-}
+#define VBUF_BUFSZ (3)
+#define SCISSOR_BUFSZ (8)
+#define INDEX_BUFSZ (8+2)
-/* Alloc space in the command buffer
- */
-static INLINE char *r200AllocCmdBuf( r200ContextPtr rmesa,
- int bytes, const char *where )
+static inline uint32_t cmdpacket3(int cmd_type)
{
- char * head;
+ drm_radeon_cmd_header_t cmd;
- if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
- r200FlushCmdBuf( rmesa, where );
+ cmd.i = 0;
+ cmd.header.cmd_type = cmd_type;
+
+ return (uint32_t)cmd.i;
- head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
- rmesa->store.cmd_used += bytes;
- assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
- return head;
}
+#define OUT_BATCH_PACKET3(packet, num_extra) do { \
+ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
+ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3)); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } else { \
+ OUT_BATCH(CP_PACKET2); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } \
+ } while(0)
+
+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do { \
+ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
+ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP)); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } else { \
+ OUT_BATCH(CP_PACKET2); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } \
+ } while(0)
+
+
#endif /* __R200_IOCTL_H__ */
diff --git a/r200/r200_lock.c b/r200/r200_lock.c
deleted file mode 100644
index 99661a4..0000000
--- a/r200/r200_lock.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
-
-The Weather Channel (TM) funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86 license.
-This notice must be preserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "r200_context.h"
-#include "r200_lock.h"
-#include "r200_tex.h"
-#include "r200_state.h"
-#include "r200_ioctl.h"
-
-#include "drirenderbuffer.h"
-
-
-#if DEBUG_LOCKING
-char *prevLockFile = NULL;
-int prevLockLine = 0;
-#endif
-
-/* Turn on/off page flipping according to the flags in the sarea:
- */
-static void
-r200UpdatePageFlipping( r200ContextPtr rmesa )
-{
- rmesa->doPageFlip = rmesa->sarea->pfState;
- if (rmesa->glCtx->WinSysDrawBuffer) {
- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
- rmesa->sarea->pfCurrentPage);
- }
-}
-
-
-
-/* Update the hardware state. This is called if another main/context.has
- * grabbed the hardware lock, which includes the X server. This
- * function also updates the driver's window state after the X server
- * moves, resizes or restacks a window -- the change will be reflected
- * in the drawable position and clip rects. Since the X server grabs
- * the hardware lock when it changes the window state, this routine will
- * automatically be called after such a change.
- */
-void r200GetLock( r200ContextPtr rmesa, GLuint flags )
-{
- __DRIdrawablePrivate *drawable = rmesa->dri.drawable;
- __DRIdrawablePrivate *readable = rmesa->dri.readable;
- __DRIscreenPrivate *sPriv = rmesa->dri.screen;
- drm_radeon_sarea_t *sarea = rmesa->sarea;
- int i;
-
- drmGetLock( rmesa->dri.fd, rmesa->dri.hwContext, flags );
-
- /* The window might have moved, so we might need to get new clip
- * rects.
- *
- * NOTE: This releases and regrabs the hw lock to allow the X server
- * to respond to the DRI protocol request for new drawable info.
- * Since the hardware state depends on having the latest drawable
- * clip rects, all state checking must be done _after_ this call.
- */
- DRI_VALIDATE_DRAWABLE_INFO( sPriv, drawable );
- if (drawable != readable) {
- DRI_VALIDATE_DRAWABLE_INFO( sPriv, readable );
- }
-
- if ( rmesa->lastStamp != drawable->lastStamp ) {
- r200UpdatePageFlipping( rmesa );
- r200SetCliprects( rmesa );
- r200UpdateViewportOffset( rmesa->glCtx );
- driUpdateFramebufferSize(rmesa->glCtx, drawable);
- }
-
- R200_STATECHANGE( rmesa, ctx );
- if (rmesa->sarea->tiling_enabled) {
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
- }
- else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
-
- if ( sarea->ctx_owner != rmesa->dri.hwContext ) {
- sarea->ctx_owner = rmesa->dri.hwContext;
- }
-
- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
- DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] );
- }
-
- rmesa->lost_context = GL_TRUE;
-}
diff --git a/r200/r200_lock.h b/r200/r200_lock.h
deleted file mode 100644
index 4ff9890..0000000
--- a/r200/r200_lock.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
-
-The Weather Channel (TM) funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86 license.
-This notice must be preserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#ifndef __R200_LOCK_H__
-#define __R200_LOCK_H__
-
-extern void r200GetLock( r200ContextPtr rmesa, GLuint flags );
-
-/* Turn DEBUG_LOCKING on to find locking conflicts.
- */
-#define DEBUG_LOCKING 0
-
-#if DEBUG_LOCKING
-extern char *prevLockFile;
-extern int prevLockLine;
-
-#define DEBUG_LOCK() \
- do { \
- prevLockFile = (__FILE__); \
- prevLockLine = (__LINE__); \
- } while (0)
-
-#define DEBUG_RESET() \
- do { \
- prevLockFile = 0; \
- prevLockLine = 0; \
- } while (0)
-
-#define DEBUG_CHECK_LOCK() \
- do { \
- if ( prevLockFile ) { \
- fprintf( stderr, \
- "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \
- prevLockFile, prevLockLine, __FILE__, __LINE__ ); \
- exit( 1 ); \
- } \
- } while (0)
-
-#else
-
-#define DEBUG_LOCK()
-#define DEBUG_RESET()
-#define DEBUG_CHECK_LOCK()
-
-#endif
-
-/*
- * !!! We may want to separate locks from locks with validation. This
- * could be used to improve performance for those things commands that
- * do not do any drawing !!!
- */
-
-
-/* Lock the hardware and validate our state.
- */
-#define LOCK_HARDWARE( rmesa ) \
- do { \
- char __ret = 0; \
- DEBUG_CHECK_LOCK(); \
- DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext, \
- (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret ); \
- if ( __ret ) \
- r200GetLock( rmesa, 0 ); \
- DEBUG_LOCK(); \
- } while (0)
-
-#define UNLOCK_HARDWARE( rmesa ) \
- do { \
- DRM_UNLOCK( rmesa->dri.fd, \
- rmesa->dri.hwLock, \
- rmesa->dri.hwContext ); \
- DEBUG_RESET(); \
- } while (0)
-
-#endif /* __R200_LOCK_H__ */
diff --git a/r200/r200_maos.h b/r200/r200_maos.h
index d3ed06d..16a7047 100644
--- a/r200/r200_maos.h
+++ b/r200/r200_maos.h
@@ -38,6 +38,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_context.h"
extern void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev );
-extern void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs );
#endif
diff --git a/r200/r200_maos_arrays.c b/r200/r200_maos_arrays.c
index 8512b9a..383a0c4 100644
--- a/r200/r200_maos_arrays.c
+++ b/r200/r200_maos_arrays.c
@@ -50,110 +50,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_maos.h"
#include "r200_tcl.h"
-
-#if 0
-/* Usage:
- * - from r200_tcl_render
- * - call r200EmitArrays to ensure uptodate arrays in dma
- * - emit primitives (new type?) which reference the data
- * -- need to use elts for lineloop, quads, quadstrip/flat
- * -- other primitives are all well-formed (need tristrip-1,fake-poly)
- *
- */
-static void emit_ubyte_rgba3( GLcontext *ctx,
- struct r200_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- r200_color_t *out = (r200_color_t *)(rvb->start + rvb->address);
-
- if (R200_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p\n",
- __FUNCTION__, count, stride, (void *)out);
-
- for (i = 0; i < count; i++) {
- out->red = *data;
- out->green = *(data+1);
- out->blue = *(data+2);
- out->alpha = 0xFF;
- out++;
- data += stride;
- }
-}
-
-static void emit_ubyte_rgba4( GLcontext *ctx,
- struct r200_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (R200_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
-
- if (stride == 4) {
- for (i = 0; i < count; i++)
- ((int *)out)[i] = LE32_TO_CPU(((int *)data)[i]);
- } else {
- for (i = 0; i < count; i++) {
- *(int *)out++ = LE32_TO_CPU(*(int *)data);
- data += stride;
- }
- }
-}
-
-
-static void emit_ubyte_rgba( GLcontext *ctx,
- struct r200_dma_region *rvb,
- char *data,
- int size,
- int stride,
- int count )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
-
- if (R200_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
-
- assert (!rvb->buf);
-
- if (stride == 0) {
- r200AllocDmaRegion( rmesa, rvb, 4, 4 );
- count = 1;
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = 0;
- rvb->aos_size = 1;
- }
- else {
- r200AllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = 1;
- rvb->aos_size = 1;
- }
-
- /* Emit the data
- */
- switch (size) {
- case 3:
- emit_ubyte_rgba3( ctx, rvb, data, stride, count );
- break;
- case 4:
- emit_ubyte_rgba4( ctx, rvb, data, stride, count );
- break;
- default:
- assert(0);
- exit(1);
- break;
- }
-}
-#endif
-
-
#if defined(USE_X86_ASM)
#define COPY_DWORDS( dst, src, nr ) \
do { \
@@ -174,204 +70,34 @@ do { \
} while (0)
#endif
-
-static void emit_vecfog( GLcontext *ctx,
- struct r200_dma_region *rvb,
- char *data,
- int stride,
- int count )
+static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
+ GLvoid *data, int stride, int count)
{
- int i;
- GLfloat *out;
-
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
-
- if (R200_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
-
- assert (!rvb->buf);
-
- if (stride == 0) {
- r200AllocDmaRegion( rmesa, rvb, 4, 4 );
- count = 1;
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = 0;
- rvb->aos_size = 1;
- }
- else {
- r200AllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = 1;
- rvb->aos_size = 1;
- }
-
- /* Emit the data
- */
- out = (GLfloat *)(rvb->address + rvb->start);
- for (i = 0; i < count; i++) {
- out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data );
- out++;
- data += stride;
- }
-
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ uint32_t *out;
+ int i;
+ int size = 1;
+
+ if (stride == 0) {
+ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+ count = 1;
+ aos->stride = 0;
+ } else {
+ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+ aos->stride = size;
+ }
+
+ aos->components = size;
+ aos->count = count;
+
+ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+ for (i = 0; i < count; i++) {
+ out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data );
+ out++;
+ data += stride;
+ }
}
-
-static void emit_vec4( GLcontext *ctx,
- struct r200_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (R200_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
-
- if (stride == 4)
- COPY_DWORDS( out, data, count );
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out++;
- data += stride;
- }
-}
-
-
-static void emit_vec8( GLcontext *ctx,
- struct r200_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (R200_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
-
- if (stride == 8)
- COPY_DWORDS( out, data, count*2 );
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data+4);
- out += 2;
- data += stride;
- }
-}
-
-static void emit_vec12( GLcontext *ctx,
- struct r200_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (R200_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
- __FUNCTION__, count, stride, (void *)out, (void *)data);
-
- if (stride == 12)
- COPY_DWORDS( out, data, count*3 );
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data+4);
- out[2] = *(int *)(data+8);
- out += 3;
- data += stride;
- }
-}
-
-static void emit_vec16( GLcontext *ctx,
- struct r200_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (R200_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
-
- if (stride == 16)
- COPY_DWORDS( out, data, count*4 );
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data+4);
- out[2] = *(int *)(data+8);
- out[3] = *(int *)(data+12);
- out += 4;
- data += stride;
- }
-}
-
-
-static void emit_vector( GLcontext *ctx,
- struct r200_dma_region *rvb,
- char *data,
- int size,
- int stride,
- int count )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
-
- if (R200_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d size %d stride %d\n",
- __FUNCTION__, count, size, stride);
-
- assert (!rvb->buf);
-
- if (stride == 0) {
- r200AllocDmaRegion( rmesa, rvb, size * 4, 4 );
- count = 1;
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = 0;
- rvb->aos_size = size;
- }
- else {
- r200AllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = size;
- rvb->aos_size = size;
- }
-
- /* Emit the data
- */
- switch (size) {
- case 1:
- emit_vec4( ctx, rvb, data, stride, count );
- break;
- case 2:
- emit_vec8( ctx, rvb, data, stride, count );
- break;
- case 3:
- emit_vec12( ctx, rvb, data, stride, count );
- break;
- case 4:
- emit_vec16( ctx, rvb, data, stride, count );
- break;
- default:
- assert(0);
- exit(1);
- break;
- }
-
-}
-
-
-
/* Emit any changed arrays to new GART memory, re-emit a packet to
* update the arrays.
*/
@@ -379,12 +105,12 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
{
r200ContextPtr rmesa = R200_CONTEXT( ctx );
struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
- struct r200_dma_region **component = rmesa->tcl.aos_components;
GLuint nr = 0;
GLuint vfmt0 = 0, vfmt1 = 0;
GLuint count = VB->Count;
GLuint i, emitsize;
+ // fprintf(stderr,"emit arrays\n");
for ( i = 0; i < 15; i++ ) {
GLubyte attrib = vimap_rev[i];
if (attrib != 255) {
@@ -416,20 +142,20 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
case 3:
/* special handling to fix up fog. Will get us into trouble with vbos...*/
assert(attrib == VERT_ATTRIB_FOG);
- if (!rmesa->tcl.vertex_data[i].buf) {
+ if (!rmesa->radeon.tcl.aos[i].bo) {
if (ctx->VertexProgram._Enabled)
- emit_vector( ctx,
- &(rmesa->tcl.vertex_data[i]),
- (char *)VB->AttribPtr[attrib]->data,
- 1,
- VB->AttribPtr[attrib]->stride,
- count);
+ rcommon_emit_vector( ctx,
+ &(rmesa->radeon.tcl.aos[nr]),
+ (char *)VB->AttribPtr[attrib]->data,
+ 1,
+ VB->AttribPtr[attrib]->stride,
+ count);
else
- emit_vecfog( ctx,
- &(rmesa->tcl.vertex_data[i]),
- (char *)VB->AttribPtr[attrib]->data,
- VB->AttribPtr[attrib]->stride,
- count);
+ r200_emit_vecfog( ctx,
+ &(rmesa->radeon.tcl.aos[nr]),
+ (char *)VB->AttribPtr[attrib]->data,
+ VB->AttribPtr[attrib]->stride,
+ count);
}
vfmt0 |= R200_VTX_DISCRETE_FOG;
goto after_emit;
@@ -473,17 +199,17 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
default:
assert(0);
}
- if (!rmesa->tcl.vertex_data[i].buf) {
- emit_vector( ctx,
- &(rmesa->tcl.vertex_data[i]),
- (char *)VB->AttribPtr[attrib]->data,
- emitsize,
- VB->AttribPtr[attrib]->stride,
- count );
+ if (!rmesa->radeon.tcl.aos[nr].bo) {
+ rcommon_emit_vector( ctx,
+ &(rmesa->radeon.tcl.aos[nr]),
+ (char *)VB->AttribPtr[attrib]->data,
+ emitsize,
+ VB->AttribPtr[attrib]->stride,
+ count );
}
after_emit:
assert(nr < 12);
- component[nr++] = &rmesa->tcl.vertex_data[i];
+ nr++;
}
}
@@ -494,19 +220,6 @@ after_emit:
rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = vfmt1;
}
- rmesa->tcl.nr_aos_components = nr;
+ rmesa->radeon.tcl.aos_count = nr;
}
-
-void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs )
-{
- r200ContextPtr rmesa = R200_CONTEXT( ctx );
-
- /* only do it for changed inputs ? */
- int i;
- for (i = 0; i < 15; i++) {
- if (newinputs & (1 << i))
- r200ReleaseDmaRegion( rmesa,
- &rmesa->tcl.vertex_data[i], __FUNCTION__ );
- }
-}
diff --git a/r200/r200_pixel.c b/r200/r200_pixel.c
index 2797cbb..9577387 100644
--- a/r200/r200_pixel.c
+++ b/r200/r200_pixel.c
@@ -51,29 +51,29 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format,
const void *pixels, GLint sz, GLint pitch )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- GLuint cpp = rmesa->r200Screen->cpp;
+ GLuint cpp = rmesa->radeon.radeonScreen->cpp;
- if (R200_DEBUG & DEBUG_PIXEL)
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s\n", __FUNCTION__);
if ( (pitch & 63) ||
ctx->_ImageTransferState ||
packing->SwapBytes ||
packing->LsbFirst) {
- if (R200_DEBUG & DEBUG_PIXEL)
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s: failed 1\n", __FUNCTION__);
return GL_FALSE;
}
- if ( type == GL_UNSIGNED_INT_8_8_8_8_REV &&
- cpp == 4 &&
+ if ( type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+ cpp == 4 &&
format == GL_BGRA ) {
- if (R200_DEBUG & DEBUG_PIXEL)
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s: passed 2\n", __FUNCTION__);
return GL_TRUE;
}
- if (R200_DEBUG & DEBUG_PIXEL)
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s: failed\n", __FUNCTION__);
return GL_FALSE;
@@ -83,7 +83,7 @@ static GLboolean
check_color_per_fragment_ops( const GLcontext *ctx )
{
int result;
- result = (!( ctx->Color.AlphaEnabled ||
+ result = (!( ctx->Color.AlphaEnabled ||
ctx->Depth.Test ||
ctx->Fog.Enabled ||
ctx->Scissor.Enabled ||
@@ -96,12 +96,12 @@ check_color_per_fragment_ops( const GLcontext *ctx )
ctx->Texture._EnabledUnits
) &&
ctx->Current.RasterPosValid);
-
+
return result;
}
-
+#if 0
static GLboolean
clip_pixelrect( const GLcontext *ctx,
const GLframebuffer *buffer,
@@ -137,11 +137,12 @@ clip_pixelrect( const GLcontext *ctx,
if (*height <= 0)
return GL_FALSE;
- *size = ((*y + *height - 1) * rmesa->r200Screen->frontPitch +
- (*x + *width - 1) * rmesa->r200Screen->cpp);
+ *size = ((*y + *height - 1) * rmesa->radeon.radeonScreen->frontPitch +
+ (*x + *width - 1) * rmesa->radeon.radeonScreen->cpp);
return GL_TRUE;
}
+#endif
static GLboolean
r200TryReadPixels( GLcontext *ctx,
@@ -150,29 +151,30 @@ r200TryReadPixels( GLcontext *ctx,
const struct gl_pixelstore_attrib *pack,
GLvoid *pixels )
{
+ return GL_FALSE;
+#if 0
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLint pitch = pack->RowLength ? pack->RowLength : width;
GLint blit_format;
- GLuint cpp = rmesa->r200Screen->cpp;
+ GLuint cpp = rmesa->radeon.radeonScreen->cpp;
GLint size = width * height * cpp;
- if (R200_DEBUG & DEBUG_PIXEL)
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s\n", __FUNCTION__);
/* Only accelerate reading to GART buffers.
*/
- if ( !r200IsGartMemory(rmesa, pixels,
- pitch * height * rmesa->r200Screen->cpp ) ) {
- if (R200_DEBUG & DEBUG_PIXEL)
+ if ( !r200IsGartMemory(rmesa, pixels,
+ pitch * height * rmesa->radeon.radeonScreen->cpp ) ) {
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s: dest not GART\n", __FUNCTION__);
- return GL_FALSE;
}
/* Need GL_PACK_INVERT_MESA to cope with upsidedown results from
* blitter:
*/
if (!pack->Invert) {
- if (R200_DEBUG & DEBUG_PIXEL)
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s: MESA_PACK_INVERT not set\n", __FUNCTION__);
return GL_FALSE;
}
@@ -180,7 +182,7 @@ r200TryReadPixels( GLcontext *ctx,
if (!check_color(ctx, type, format, pack, pixels, size, pitch))
return GL_FALSE;
- switch ( rmesa->r200Screen->cpp ) {
+ switch ( rmesa->radeon.radeonScreen->cpp ) {
case 4:
blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
break;
@@ -197,40 +199,40 @@ r200TryReadPixels( GLcontext *ctx,
* a full command buffer expects to be called unlocked. As a
* workaround, immediately flush the buffer on aquiring the lock.
*/
- LOCK_HARDWARE( rmesa );
+ LOCK_HARDWARE( &rmesa->radeon );
if (rmesa->store.cmd_used)
- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height,
&size)) {
- UNLOCK_HARDWARE( rmesa );
- if (R200_DEBUG & DEBUG_PIXEL)
+ UNLOCK_HARDWARE( &rmesa->radeon );
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s totally clipped -- nothing to do\n",
__FUNCTION__);
return GL_TRUE;
}
{
- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
driRenderbuffer *drb = (driRenderbuffer *) ctx->ReadBuffer->_ColorReadBuffer;
int nbox = dPriv->numClipRects;
int src_offset = drb->offset
- + rmesa->r200Screen->fbLocation;
+ + rmesa->radeon.radeonScreen->fbLocation;
int src_pitch = drb->pitch * drb->cpp;
int dst_offset = r200GartOffsetFromVirtual( rmesa, pixels );
- int dst_pitch = pitch * rmesa->r200Screen->cpp;
+ int dst_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
drm_clip_rect_t *box = dPriv->pClipRects;
int i;
- r200EmitWait( rmesa, RADEON_WAIT_3D );
+ r200EmitWait( rmesa, RADEON_WAIT_3D );
y = dPriv->h - y - height;
x += dPriv->x;
y += dPriv->y;
- if (R200_DEBUG & DEBUG_PIXEL)
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "readpixel blit src_pitch %d dst_pitch %d\n",
src_pitch, dst_pitch);
@@ -240,7 +242,7 @@ r200TryReadPixels( GLcontext *ctx,
GLint by = box[i].y1;
GLint bw = box[i].x2 - bx;
GLint bh = box[i].y2 - by;
-
+
if (bx < x) bw -= x - bx, bx = x;
if (by < y) bh -= y - by, by = y;
if (bx + bw > x + width) bw = x + width - bx;
@@ -257,12 +259,12 @@ r200TryReadPixels( GLcontext *ctx,
bw, bh );
}
- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
}
- UNLOCK_HARDWARE( rmesa );
-
- r200Finish( ctx ); /* required by GL */
+ UNLOCK_HARDWARE( &rmesa->radeon );
+ radeonFinish( ctx ); /* required by GL */
+#endif
return GL_TRUE;
}
@@ -273,12 +275,12 @@ r200ReadPixels( GLcontext *ctx,
const struct gl_pixelstore_attrib *pack,
GLvoid *pixels )
{
- if (R200_DEBUG & DEBUG_PIXEL)
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s\n", __FUNCTION__);
- if (!r200TryReadPixels( ctx, x, y, width, height, format, type, pack,
+ if (!r200TryReadPixels( ctx, x, y, width, height, format, type, pack,
pixels))
- _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack,
+ _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack,
pixels);
}
@@ -291,8 +293,12 @@ static void do_draw_pix( GLcontext *ctx,
const void *pixels,
GLuint planemask)
{
+ if (R200_DEBUG & RADEON_PIXEL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+#if 0
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
drm_clip_rect_t *box = dPriv->pClipRects;
struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0];
driRenderbuffer *drb = (driRenderbuffer *) rb;
@@ -301,12 +307,9 @@ static void do_draw_pix( GLcontext *ctx,
int blit_format;
int size;
int src_offset = r200GartOffsetFromVirtual( rmesa, pixels );
- int src_pitch = pitch * rmesa->r200Screen->cpp;
+ int src_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
- if (R200_DEBUG & DEBUG_PIXEL)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- switch ( rmesa->r200Screen->cpp ) {
+ switch ( rmesa->radeon.radeonScreen->cpp ) {
case 2:
blit_format = R200_CP_COLOR_FORMAT_RGB565;
break;
@@ -318,17 +321,17 @@ static void do_draw_pix( GLcontext *ctx,
}
- LOCK_HARDWARE( rmesa );
+ LOCK_HARDWARE( &rmesa->radeon );
if (rmesa->store.cmd_used)
- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
y -= height; /* cope with pixel zoom */
-
+
if (!clip_pixelrect(ctx, ctx->DrawBuffer,
&x, &y, &width, &height,
&size)) {
- UNLOCK_HARDWARE( rmesa );
+ UNLOCK_HARDWARE( &rmesa->radeon );
return;
}
@@ -357,15 +360,16 @@ static void do_draw_pix( GLcontext *ctx,
blit_format,
src_pitch, src_offset,
drb->pitch * drb->cpp,
- drb->offset + rmesa->r200Screen->fbLocation,
+ drb->offset + rmesa->radeon.radeonScreen->fbLocation,
bx - x, by - y,
bx, by,
bw, bh );
}
- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
- r200WaitForIdleLocked( rmesa ); /* required by GL */
- UNLOCK_HARDWARE( rmesa );
+ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+ radeonWaitForIdleLocked( &rmesa->radeon ); /* required by GL */
+ UNLOCK_HARDWARE( &rmesa->radeon );
+#endif
}
@@ -381,10 +385,10 @@ r200TryDrawPixels( GLcontext *ctx,
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLint pitch = unpack->RowLength ? unpack->RowLength : width;
GLuint planemask;
- GLuint cpp = rmesa->r200Screen->cpp;
+ GLuint cpp = rmesa->radeon.radeonScreen->cpp;
GLint size = height * pitch * cpp;
- if (R200_DEBUG & DEBUG_PIXEL)
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s\n", __FUNCTION__);
/* check that we're drawing to exactly one color buffer */
@@ -395,7 +399,7 @@ r200TryDrawPixels( GLcontext *ctx,
case GL_RGB:
case GL_RGBA:
case GL_BGRA:
- planemask = r200PackColor(cpp,
+ planemask = radeonPackColor(cpp,
ctx->Color.ColorMask[RCOMP],
ctx->Color.ColorMask[GCOMP],
ctx->Color.ColorMask[BCOMP],
@@ -407,10 +411,10 @@ r200TryDrawPixels( GLcontext *ctx,
if (planemask != ~0)
return GL_FALSE; /* fix me -- should be possible */
- /* Can't do conversions on GART reads/draws.
+ /* Can't do conversions on GART reads/draws.
*/
if ( !r200IsGartMemory( rmesa, pixels, size ) ) {
- if (R200_DEBUG & DEBUG_PIXEL)
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s: not GART memory\n", __FUNCTION__);
return GL_FALSE;
}
@@ -431,7 +435,7 @@ r200TryDrawPixels( GLcontext *ctx,
return GL_FALSE;
}
- if ( r200IsGartMemory(rmesa, pixels, size) )
+ if (0)// r200IsGartMemory(rmesa, pixels, size) )
{
do_draw_pix( ctx, x, y, width, height, pitch, pixels, planemask );
return GL_TRUE;
@@ -453,7 +457,7 @@ r200DrawPixels( GLcontext *ctx,
const struct gl_pixelstore_attrib *unpack,
const GLvoid *pixels )
{
- if (R200_DEBUG & DEBUG_PIXEL)
+ if (R200_DEBUG & RADEON_PIXEL)
fprintf(stderr, "%s\n", __FUNCTION__);
if (!r200TryDrawPixels( ctx, x, y, width, height, format, type,
@@ -471,7 +475,7 @@ r200Bitmap( GLcontext *ctx, GLint px, GLint py,
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- if (rmesa->Fallback)
+ if (rmesa->radeon.Fallback)
_swrast_Bitmap( ctx, px, py, width, height, unpack, bitmap );
else
r200PointsBitmap( ctx, px, py, width, height, unpack, bitmap );
@@ -482,9 +486,9 @@ r200Bitmap( GLcontext *ctx, GLint px, GLint py,
void r200InitPixelFuncs( GLcontext *ctx )
{
if (!getenv("R200_NO_BLITS")) {
- ctx->Driver.ReadPixels = r200ReadPixels;
- ctx->Driver.DrawPixels = r200DrawPixels;
- if (getenv("R200_HW_BITMAP"))
+ ctx->Driver.ReadPixels = r200ReadPixels;
+ ctx->Driver.DrawPixels = r200DrawPixels;
+ if (getenv("R200_HW_BITMAP"))
ctx->Driver.Bitmap = r200Bitmap;
}
}
diff --git a/r200/r200_reg.h b/r200/r200_reg.h
index 5ce287f..526a624 100644
--- a/r200/r200_reg.h
+++ b/r200/r200_reg.h
@@ -463,8 +463,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R200_VSC_UPDATE_USER_COLOR_1_ENABLE 0x00020000
/* gap */
#define R200_SE_TCL_VECTOR_INDX_REG 0x2200
+# define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT 16
+# define RADEON_VEC_INDX_DWORD_COUNT_SHIFT 28
#define R200_SE_TCL_VECTOR_DATA_REG 0x2204
#define R200_SE_TCL_SCALAR_INDX_REG 0x2208
+# define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT 16
#define R200_SE_TCL_SCALAR_DATA_REG 0x220c
/* gap */
#define R200_SE_TCL_MATRIX_SEL_0 0x2230
@@ -949,6 +952,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R200_LOD_BIAS_MASK (0xfff80000)
#define R200_LOD_BIAS_SHIFT 19
#define R200_PP_TXSIZE_0 0x2c0c /* NPOT only */
+#define R200_PP_TX_WIDTHMASK_SHIFT 0
+#define R200_PP_TX_HEIGHTMASK_SHIFT 16
+
#define R200_PP_TXPITCH_0 0x2c10 /* NPOT only */
#define R200_PP_BORDER_COLOR_0 0x2c14
#define R200_PP_CUBIC_FACES_0 0x2c18
diff --git a/r200/r200_sanity.c b/r200/r200_sanity.c
index 36530c2..1241a92 100644
--- a/r200/r200_sanity.c
+++ b/r200/r200_sanity.c
@@ -48,11 +48,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define MORE_VERBOSE 1
#if MORE_VERBOSE
-#define VERBOSE (R200_DEBUG & DEBUG_VERBOSE)
+#define VERBOSE (R200_DEBUG & RADEON_VERBOSE)
#define NORMAL (1)
#else
#define VERBOSE 0
-#define NORMAL (R200_DEBUG & DEBUG_VERBOSE)
+#define NORMAL (R200_DEBUG & RADEON_VERBOSE)
#endif
diff --git a/r200/r200_span.c b/r200/r200_span.c
deleted file mode 100644
index 9783678..0000000
--- a/r200/r200_span.c
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
-
-The Weather Channel (TM) funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86 license.
-This notice must be preserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "main/glheader.h"
-#include "main/imports.h"
-#include "main/colormac.h"
-#include "swrast/swrast.h"
-
-#include "r200_context.h"
-#include "r200_ioctl.h"
-#include "r200_state.h"
-#include "r200_span.h"
-#include "r200_tex.h"
-
-#define DBG 0
-
-/*
- * Note that all information needed to access pixels in a renderbuffer
- * should be obtained through the gl_renderbuffer parameter, not per-context
- * information.
- */
-#define LOCAL_VARS \
- driRenderbuffer *drb = (driRenderbuffer *) rb; \
- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
- const GLuint bottom = dPriv->h - 1; \
- GLubyte *buf = (GLubyte *) drb->flippedData \
- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \
- GLuint p; \
- (void) p;
-
-#define LOCAL_DEPTH_VARS \
- driRenderbuffer *drb = (driRenderbuffer *) rb; \
- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
- const GLuint bottom = dPriv->h - 1; \
- GLuint xo = dPriv->x; \
- GLuint yo = dPriv->y; \
- GLubyte *buf = (GLubyte *) drb->Base.Data;
-
-#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
-
-#define Y_FLIP(Y) (bottom - (Y))
-
-#define HW_LOCK()
-
-#define HW_UNLOCK()
-
-
-
-/* ================================================================
- * Color buffer
- */
-
-/* 16 bit, RGB565 color spanline and pixel functions
- */
-#define SPANTMP_PIXEL_FMT GL_RGB
-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
-
-#define TAG(x) r200##x##_RGB565
-#define TAG2(x,y) r200##x##_RGB565##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
-#include "spantmp2.h"
-
-/* 32 bit, ARGB8888 color spanline and pixel functions
- */
-#define SPANTMP_PIXEL_FMT GL_BGRA
-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
-
-#define TAG(x) r200##x##_ARGB8888
-#define TAG2(x,y) r200##x##_ARGB8888##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
-#include "spantmp2.h"
-
-
-/* ================================================================
- * Depth buffer
- */
-
-/* The Radeon family has depth tiling on all the time, so we have to convert
- * the x,y coordinates into the memory bus address (mba) in the same
- * manner as the engine. In each case, the linear block address (ba)
- * is calculated, and then wired with x and y to produce the final
- * memory address.
- * The chip will do address translation on its own if the surface registers
- * are set up correctly. It is not quite enough to get it working with hyperz too...
- */
-
-/* extract bit 'b' of x, result is zero or one */
-#define BIT(x,b) ((x & (1<<b))>>b)
-
-static GLuint
-r200_mba_z32( driRenderbuffer *drb, GLint x, GLint y )
-{
- GLuint pitch = drb->pitch;
- if (drb->depthHasSurface) {
- return 4 * (x + y * pitch);
- }
- else {
- GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 5) + ((x & 0x7FF) >> 5);
- GLuint a =
- (BIT(x,0) << 2) |
- (BIT(y,0) << 3) |
- (BIT(x,1) << 4) |
- (BIT(y,1) << 5) |
- (BIT(x,3) << 6) |
- (BIT(x,4) << 7) |
- (BIT(x,2) << 8) |
- (BIT(y,2) << 9) |
- (BIT(y,3) << 10) |
- (((pitch & 0x20) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
- ((b >> 1) << 12);
- return a;
- }
-}
-
-static GLuint
-r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y )
-{
- GLuint pitch = drb->pitch;
- if (drb->depthHasSurface) {
- return 2 * (x + y * pitch);
- }
- else {
- GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 6) + ((x & 0x7FF) >> 6);
- GLuint a =
- (BIT(x,0) << 1) |
- (BIT(y,0) << 2) |
- (BIT(x,1) << 3) |
- (BIT(y,1) << 4) |
- (BIT(x,2) << 5) |
- (BIT(x,4) << 6) |
- (BIT(x,5) << 7) |
- (BIT(x,3) << 8) |
- (BIT(y,2) << 9) |
- (BIT(y,3) << 10) |
- (((pitch & 0x40) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
- ((b >> 1) << 12);
- return a;
- }
-}
-
-
-/* 16-bit depth buffer functions
- */
-#define VALUE_TYPE GLushort
-
-#define WRITE_DEPTH( _x, _y, d ) \
- *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d;
-
-#define READ_DEPTH( d, _x, _y ) \
- d = *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo ));
-
-#define TAG(x) r200##x##_z16
-#include "depthtmp.h"
-
-
-/* 24 bit depth, 8 bit stencil depthbuffer functions
- */
-#define VALUE_TYPE GLuint
-
-#define WRITE_DEPTH( _x, _y, d ) \
-do { \
- GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
- tmp &= 0xff000000; \
- tmp |= ((d) & 0x00ffffff); \
- *(GLuint *)(buf + offset) = tmp; \
-} while (0)
-
-#define READ_DEPTH( d, _x, _y ) \
- d = *(GLuint *)(buf + r200_mba_z32( drb, _x + xo, \
- _y + yo )) & 0x00ffffff;
-
-#define TAG(x) r200##x##_z24_s8
-#include "depthtmp.h"
-
-
-/* ================================================================
- * Stencil buffer
- */
-
-/* 24 bit depth, 8 bit stencil depthbuffer functions
- */
-#define WRITE_STENCIL( _x, _y, d ) \
-do { \
- GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
- tmp &= 0x00ffffff; \
- tmp |= (((d) & 0xff) << 24); \
- *(GLuint *)(buf + offset) = tmp; \
-} while (0)
-
-#define READ_STENCIL( d, _x, _y ) \
-do { \
- GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
- tmp &= 0xff000000; \
- d = tmp >> 24; \
-} while (0)
-
-#define TAG(x) r200##x##_z24_s8
-#include "stenciltmp.h"
-
-
-/* Move locking out to get reasonable span performance (10x better
- * than doing this in HW_LOCK above). WaitForIdle() is the main
- * culprit.
- */
-
-static void r200SpanRenderStart( GLcontext *ctx )
-{
- r200ContextPtr rmesa = R200_CONTEXT( ctx );
-
- R200_FIREVERTICES( rmesa );
- LOCK_HARDWARE( rmesa );
- r200WaitForIdleLocked( rmesa );
-
- /* Read & rewrite the first pixel in the frame buffer. This should
- * be a noop, right? In fact without this conform fails as reading
- * from the framebuffer sometimes produces old results -- the
- * on-card read cache gets mixed up and doesn't notice that the
- * framebuffer has been updated.
- *
- * In the worst case this is buggy too as p might get the wrong
- * value first time, so really need a hidden pixel somewhere for this.
- */
- {
- int p;
- driRenderbuffer *drb =
- (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
- volatile int *buf =
- (volatile int *)(rmesa->dri.screen->pFB + drb->offset);
- p = *buf;
- *buf = p;
- }
-}
-
-static void r200SpanRenderFinish( GLcontext *ctx )
-{
- r200ContextPtr rmesa = R200_CONTEXT( ctx );
- _swrast_flush( ctx );
- UNLOCK_HARDWARE( rmesa );
-}
-
-void r200InitSpanFuncs( GLcontext *ctx )
-{
- struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
- swdd->SpanRenderStart = r200SpanRenderStart;
- swdd->SpanRenderFinish = r200SpanRenderFinish;
-}
-
-
-
-/**
- * Plug in the Get/Put routines for the given driRenderbuffer.
- */
-void
-radeonSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
-{
- if (drb->Base.InternalFormat == GL_RGBA) {
- if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
- r200InitPointers_RGB565(&drb->Base);
- }
- else {
- r200InitPointers_ARGB8888(&drb->Base);
- }
- }
- else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
- r200InitDepthPointers_z16(&drb->Base);
- }
- else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
- r200InitDepthPointers_z24_s8(&drb->Base);
- }
- else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
- r200InitStencilPointers_z24_s8(&drb->Base);
- }
-}
diff --git a/r200/r200_state.c b/r200/r200_state.c
index 2fcc87c..76852e3 100644
--- a/r200/r200_state.c
+++ b/r200/r200_state.c
@@ -47,6 +47,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "tnl/t_pipeline.h"
#include "swrast_setup/swrast_setup.h"
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
#include "r200_context.h"
#include "r200_ioctl.h"
#include "r200_state.h"
@@ -77,7 +79,7 @@ static void r200AlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
switch ( func ) {
case GL_NEVER:
- pp_misc |= R200_ALPHA_TEST_FAIL;
+ pp_misc |= R200_ALPHA_TEST_FAIL;
break;
case GL_LESS:
pp_misc |= R200_ALPHA_TEST_LESS;
@@ -114,8 +116,8 @@ static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] )
CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
- if (rmesa->r200Screen->drmSupportsBlendColor)
- rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = r200PackColor( 4, color[0], color[1], color[2], color[3] );
+ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
+ rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = radeonPackColor( 4, color[0], color[1], color[2], color[3] );
}
/**
@@ -213,7 +215,7 @@ static void r200_set_blend_state( GLcontext * ctx )
R200_STATECHANGE( rmesa, ctx );
- if (rmesa->r200Screen->drmSupportsBlendColor) {
+ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
if (ctx->Color.ColorLogicOpEnabled) {
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl | R200_ROP_ENABLE;
rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
@@ -278,7 +280,7 @@ static void r200_set_blend_state( GLcontext * ctx )
return;
}
- if (!rmesa->r200Screen->drmSupportsBlendColor) {
+ if (!rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
return;
}
@@ -383,10 +385,10 @@ static void r200ClearDepth( GLcontext *ctx, GLclampd d )
switch ( format ) {
case R200_DEPTH_FORMAT_16BIT_INT_Z:
- rmesa->state.depth.clear = d * 0x0000ffff;
+ rmesa->radeon.state.depth.clear = d * 0x0000ffff;
break;
case R200_DEPTH_FORMAT_24BIT_INT_Z:
- rmesa->state.depth.clear = d * 0x00ffffff;
+ rmesa->radeon.state.depth.clear = d * 0x00ffffff;
break;
}
}
@@ -477,10 +479,10 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
}
}
break;
- case GL_FOG_COLOR:
+ case GL_FOG_COLOR:
R200_STATECHANGE( rmesa, ctx );
UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
- i = r200PackColor( 4, col[0], col[1], col[2], 0 );
+ i = radeonPackColor( 4, col[0], col[1], col[2], 0 );
rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
break;
@@ -505,7 +507,7 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
if (out_0 != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0]) {
R200_STATECHANGE( rmesa, vtx );
- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0;
+ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0;
}
break;
@@ -521,102 +523,6 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
}
}
-
-/* =============================================================
- * Scissoring
- */
-
-
-static GLboolean intersect_rect( drm_clip_rect_t *out,
- drm_clip_rect_t *a,
- drm_clip_rect_t *b )
-{
- *out = *a;
- if ( b->x1 > out->x1 ) out->x1 = b->x1;
- if ( b->y1 > out->y1 ) out->y1 = b->y1;
- if ( b->x2 < out->x2 ) out->x2 = b->x2;
- if ( b->y2 < out->y2 ) out->y2 = b->y2;
- if ( out->x1 >= out->x2 ) return GL_FALSE;
- if ( out->y1 >= out->y2 ) return GL_FALSE;
- return GL_TRUE;
-}
-
-
-void r200RecalcScissorRects( r200ContextPtr rmesa )
-{
- drm_clip_rect_t *out;
- int i;
-
- /* Grow cliprect store?
- */
- if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
- while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
- rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */
- rmesa->state.scissor.numAllocedClipRects *= 2;
- }
-
- if (rmesa->state.scissor.pClipRects)
- FREE(rmesa->state.scissor.pClipRects);
-
- rmesa->state.scissor.pClipRects =
- MALLOC( rmesa->state.scissor.numAllocedClipRects *
- sizeof(drm_clip_rect_t) );
-
- if ( rmesa->state.scissor.pClipRects == NULL ) {
- rmesa->state.scissor.numAllocedClipRects = 0;
- return;
- }
- }
-
- out = rmesa->state.scissor.pClipRects;
- rmesa->state.scissor.numClipRects = 0;
-
- for ( i = 0 ; i < rmesa->numClipRects ; i++ ) {
- if ( intersect_rect( out,
- &rmesa->pClipRects[i],
- &rmesa->state.scissor.rect ) ) {
- rmesa->state.scissor.numClipRects++;
- out++;
- }
- }
-}
-
-
-static void r200UpdateScissor( GLcontext *ctx )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
-
- if ( rmesa->dri.drawable ) {
- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-
- int x = ctx->Scissor.X;
- int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
- int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
- int h = dPriv->h - ctx->Scissor.Y - 1;
-
- rmesa->state.scissor.rect.x1 = x + dPriv->x;
- rmesa->state.scissor.rect.y1 = y + dPriv->y;
- rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
- rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
-
- r200RecalcScissorRects( rmesa );
- }
-}
-
-
-static void r200Scissor( GLcontext *ctx,
- GLint x, GLint y, GLsizei w, GLsizei h )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
-
- if ( ctx->Scissor.Enabled ) {
- R200_FIREVERTICES( rmesa ); /* don't pipeline cliprect changes */
- r200UpdateScissor( ctx );
- }
-
-}
-
-
/* =============================================================
* Culling
*/
@@ -668,6 +574,10 @@ static void r200FrontFace( GLcontext *ctx, GLenum mode )
R200_STATECHANGE( rmesa, tcl );
rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_CULL_FRONT_IS_CCW;
+ /* Winding is inverted when rendering to FBO */
+ if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+ mode = (mode == GL_CW) ? GL_CCW : GL_CW;
+
switch ( mode ) {
case GL_CW:
rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_FFACE_CULL_CW;
@@ -790,7 +700,7 @@ static void r200LineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
r200ContextPtr rmesa = R200_CONTEXT(ctx);
R200_STATECHANGE( rmesa, lin );
- rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] =
+ rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] =
((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
}
@@ -803,21 +713,27 @@ static void r200ColorMask( GLcontext *ctx,
GLboolean b, GLboolean a )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- GLuint mask = r200PackColor( rmesa->r200Screen->cpp,
- ctx->Color.ColorMask[RCOMP],
- ctx->Color.ColorMask[GCOMP],
- ctx->Color.ColorMask[BCOMP],
- ctx->Color.ColorMask[ACOMP] );
-
+ GLuint mask;
+ struct radeon_renderbuffer *rrb;
GLuint flag = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] & ~R200_PLANE_MASK_ENABLE;
+ rrb = radeon_get_colorbuffer(&rmesa->radeon);
+ if (!rrb)
+ return;
+ mask = radeonPackColor( rrb->cpp,
+ ctx->Color.ColorMask[RCOMP],
+ ctx->Color.ColorMask[GCOMP],
+ ctx->Color.ColorMask[BCOMP],
+ ctx->Color.ColorMask[ACOMP] );
+
+
if (!(r && g && b && a))
flag |= R200_PLANE_MASK_ENABLE;
- if ( rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] != flag ) {
- R200_STATECHANGE( rmesa, ctx );
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = flag;
- }
+ if ( rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] != flag ) {
+ R200_STATECHANGE( rmesa, ctx );
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = flag;
+ }
if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
R200_STATECHANGE( rmesa, msk );
@@ -834,7 +750,8 @@ static void r200PolygonOffset( GLcontext *ctx,
GLfloat factor, GLfloat units )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- float_ui32_type constant = { units * rmesa->state.depth.scale };
+ const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+ float_ui32_type constant = { units * depthScale };
float_ui32_type factoru = { factor };
/* factor *= 2; */
@@ -847,41 +764,16 @@ static void r200PolygonOffset( GLcontext *ctx,
rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
}
-static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
- GLuint i;
- drm_radeon_stipple_t stipple;
-
- /* Must flip pattern upside down.
- */
- for ( i = 0 ; i < 32 ; i++ ) {
- rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i];
- }
-
- /* TODO: push this into cmd mechanism
- */
- R200_FIREVERTICES( rmesa );
- LOCK_HARDWARE( rmesa );
-
- /* FIXME: Use window x,y offsets into stipple RAM.
- */
- stipple.mask = rmesa->state.stipple.mask;
- drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE,
- &stipple, sizeof(stipple) );
- UNLOCK_HARDWARE( rmesa );
-}
-
static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
/* Can't generally do unfilled via tcl, but some good special
- * cases work.
+ * cases work.
*/
TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag);
- if (rmesa->TclFallback) {
+ if (rmesa->radeon.TclFallback) {
r200ChooseRenderState( ctx );
r200ChooseVertexState( ctx );
}
@@ -920,34 +812,34 @@ static void r200UpdateSpecular( GLcontext *ctx )
if (ctx->Light.Enabled &&
ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) {
- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) |
- (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
+ (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0;
rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1;
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE;
p |= R200_SPECULAR_ENABLE;
- rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &=
+ rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &=
~R200_DIFFUSE_SPECULAR_COMBINE;
}
else if (ctx->Light.Enabled) {
- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
- ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));
+ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+ ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));
rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0;
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE;
} else if (ctx->Fog.ColorSumEnabled ) {
- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) |
- (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
+ (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
p |= R200_SPECULAR_ENABLE;
} else {
- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
- ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));
+ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+ ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));
}
if (ctx->Fog.Enabled) {
- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
- ((R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
+ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+ ((R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1;
}
@@ -958,7 +850,7 @@ static void r200UpdateSpecular( GLcontext *ctx )
/* Update vertex/render formats
*/
- if (rmesa->TclFallback) {
+ if (rmesa->radeon.TclFallback) {
r200ChooseRenderState( ctx );
r200ChooseVertexState( ctx );
}
@@ -970,7 +862,7 @@ static void r200UpdateSpecular( GLcontext *ctx )
*/
-/* Update on colormaterial, material emmissive/ambient,
+/* Update on colormaterial, material emmissive/ambient,
* lightmodel.globalambient
*/
static void update_global_ambient( GLcontext *ctx )
@@ -984,23 +876,23 @@ static void update_global_ambient( GLcontext *ctx )
*/
if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] &
((3 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
- (3 << R200_FRONT_AMBIENT_SOURCE_SHIFT))) == 0)
+ (3 << R200_FRONT_AMBIENT_SOURCE_SHIFT))) == 0)
{
- COPY_3V( &fcmd[GLT_RED],
+ COPY_3V( &fcmd[GLT_RED],
ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]);
ACC_SCALE_3V( &fcmd[GLT_RED],
ctx->Light.Model.Ambient,
ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]);
- }
+ }
else
{
COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
}
-
+
R200_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
}
-/* Update on change to
+/* Update on change to
* - light[p].colors
* - light[p].enabled
*/
@@ -1014,10 +906,10 @@ static void update_light_colors( GLcontext *ctx, GLuint p )
r200ContextPtr rmesa = R200_CONTEXT(ctx);
float *fcmd = (float *)R200_DB_STATE( lit[p] );
- COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );
+ COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );
COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
-
+
R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
}
}
@@ -1037,7 +929,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
if (ctx->Light.ColorMaterialEnabled) {
GLuint mask = ctx->Light.ColorMaterialBitmask;
-
+
if (mask & MAT_BIT_FRONT_EMISSION) {
light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
R200_FRONT_EMISSIVE_SOURCE_SHIFT);
@@ -1053,7 +945,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
else
light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
R200_FRONT_AMBIENT_SOURCE_SHIFT);
-
+
if (mask & MAT_BIT_FRONT_DIFFUSE) {
light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
R200_FRONT_DIFFUSE_SOURCE_SHIFT);
@@ -1061,7 +953,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
else
light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
R200_FRONT_DIFFUSE_SOURCE_SHIFT);
-
+
if (mask & MAT_BIT_FRONT_SPECULAR) {
light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
R200_FRONT_SPECULAR_SOURCE_SHIFT);
@@ -1070,7 +962,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
R200_FRONT_SPECULAR_SOURCE_SHIFT);
}
-
+
if (mask & MAT_BIT_BACK_EMISSION) {
light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
R200_BACK_EMISSIVE_SOURCE_SHIFT);
@@ -1120,8 +1012,8 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
R200_STATECHANGE( rmesa, tcl );
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = light_model_ctl1;
}
-
-
+
+
}
void r200UpdateMaterial( GLcontext *ctx )
@@ -1131,12 +1023,12 @@ void r200UpdateMaterial( GLcontext *ctx )
GLfloat *fcmd = (GLfloat *)R200_DB_STATE( mtl[0] );
GLfloat *fcmd2 = (GLfloat *)R200_DB_STATE( mtl[1] );
GLuint mask = ~0;
-
+
/* Might be possible and faster to update everything unconditionally? */
if (ctx->Light.ColorMaterialEnabled)
mask &= ~ctx->Light.ColorMaterialBitmask;
- if (R200_DEBUG & DEBUG_STATE)
+ if (R200_DEBUG & RADEON_STATE)
fprintf(stderr, "%s\n", __FUNCTION__);
if (mask & MAT_BIT_FRONT_EMISSION) {
@@ -1217,7 +1109,7 @@ void r200UpdateMaterial( GLcontext *ctx )
*
* which are calculated in light.c and are correct for the current
* lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
- * and _MESA_NEW_NEED_EYE_COORDS.
+ * and _MESA_NEW_NEED_EYE_COORDS.
*/
static void update_light( GLcontext *ctx )
{
@@ -1234,8 +1126,8 @@ static void update_light( GLcontext *ctx )
tmp &= ~R200_LIGHT_IN_MODELSPACE;
else
tmp |= R200_LIGHT_IN_MODELSPACE;
-
- if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0])
+
+ if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0])
{
R200_STATECHANGE( rmesa, tcl );
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] = tmp;
@@ -1259,10 +1151,10 @@ static void update_light( GLcontext *ctx )
if (ctx->Light.Light[p].Enabled) {
struct gl_light *l = &ctx->Light.Light[p];
GLfloat *fcmd = (GLfloat *)R200_DB_STATE( lit[p] );
-
+
if (l->EyePosition[3] == 0.0) {
- COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm );
- COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm );
+ COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm );
+ COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm );
fcmd[LIT_POSITION_W] = 0;
fcmd[LIT_DIRECTION_W] = 0;
} else {
@@ -1286,21 +1178,21 @@ static void r200Lightfv( GLcontext *ctx, GLenum light,
GLint p = light - GL_LIGHT0;
struct gl_light *l = &ctx->Light.Light[p];
GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
-
+
switch (pname) {
- case GL_AMBIENT:
+ case GL_AMBIENT:
case GL_DIFFUSE:
case GL_SPECULAR:
update_light_colors( ctx, p );
break;
- case GL_SPOT_DIRECTION:
- /* picked up in update_light */
+ case GL_SPOT_DIRECTION:
+ /* picked up in update_light */
break;
case GL_POSITION: {
- /* positions picked up in update_light, but can do flag here */
+ /* positions picked up in update_light, but can do flag here */
GLuint flag = (p&1)? R200_LIGHT_1_IS_LOCAL : R200_LIGHT_0_IS_LOCAL;
GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
@@ -1416,7 +1308,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname,
r200ContextPtr rmesa = R200_CONTEXT(ctx);
switch (pname) {
- case GL_LIGHT_MODEL_AMBIENT:
+ case GL_LIGHT_MODEL_AMBIENT:
update_global_ambient( ctx );
break;
@@ -1430,7 +1322,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname,
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE;
else
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE);
- if (rmesa->TclFallback) {
+ if (rmesa->radeon.TclFallback) {
r200ChooseRenderState( ctx );
r200ChooseVertexState( ctx );
}
@@ -1675,7 +1567,7 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- rmesa->state.stencil.clear =
+ rmesa->radeon.state.stencil.clear =
((GLuint) (ctx->Stencil.Clear & 0xff) |
(0xff << R200_STENCIL_MASK_SHIFT) |
((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT));
@@ -1700,19 +1592,29 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
void r200UpdateWindow( GLcontext *ctx )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
- GLfloat xoffset = (GLfloat)dPriv->x;
- GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
+ GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+ GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
const GLfloat *v = ctx->Viewport._WindowMap.m;
+ const GLboolean render_to_fbo = (ctx->DrawBuffer ? (ctx->DrawBuffer->Name != 0) : 0);
+ const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+ GLfloat y_scale, y_bias;
+
+ if (render_to_fbo) {
+ y_scale = 1.0;
+ y_bias = 0;
+ } else {
+ y_scale = -1.0;
+ y_bias = yoffset;
+ }
float_ui32_type sx = { v[MAT_SX] };
float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
- float_ui32_type sy = { - v[MAT_SY] };
- float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
- float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
- float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
+ float_ui32_type sy = { v[MAT_SY] * y_scale };
+ float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y };
+ float_ui32_type sz = { v[MAT_SZ] * depthScale };
+ float_ui32_type tz = { v[MAT_TZ] * depthScale };
- R200_FIREVERTICES( rmesa );
R200_STATECHANGE( rmesa, vpt );
rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = sx.ui32;
@@ -1723,6 +1625,30 @@ void r200UpdateWindow( GLcontext *ctx )
rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = tz.ui32;
}
+void r200_vtbl_update_scissor( GLcontext *ctx )
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ unsigned x1, y1, x2, y2;
+ struct radeon_renderbuffer *rrb;
+
+ R200_SET_STATE(r200, set, SET_RE_CNTL, R200_SCISSOR_ENABLE | r200->hw.set.cmd[SET_RE_CNTL]);
+
+ if (r200->radeon.state.scissor.enabled) {
+ x1 = r200->radeon.state.scissor.rect.x1;
+ y1 = r200->radeon.state.scissor.rect.y1;
+ x2 = r200->radeon.state.scissor.rect.x2;
+ y2 = r200->radeon.state.scissor.rect.y2;
+ } else {
+ rrb = radeon_get_colorbuffer(&r200->radeon);
+ x1 = 0;
+ y1 = 0;
+ x2 = rrb->base.Width - 1;
+ y2 = rrb->base.Height - 1;
+ }
+
+ R200_SET_STATE(r200, sci, SCI_XY_1, x1 | (y1 << 16));
+ R200_SET_STATE(r200, sci, SCI_XY_2, x2 | (y2 << 16));
+}
static void r200Viewport( GLcontext *ctx, GLint x, GLint y,
@@ -1733,6 +1659,8 @@ static void r200Viewport( GLcontext *ctx, GLint x, GLint y,
* values, or keep the originals hanging around.
*/
r200UpdateWindow( ctx );
+
+ radeon_viewport(ctx, x, y, width, height);
}
static void r200DepthRange( GLcontext *ctx, GLclampd nearval,
@@ -1744,7 +1672,7 @@ static void r200DepthRange( GLcontext *ctx, GLclampd nearval,
void r200UpdateViewportOffset( GLcontext *ctx )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
GLfloat xoffset = (GLfloat)dPriv->x;
GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
const GLfloat *v = ctx->Viewport._WindowMap.m;
@@ -1774,8 +1702,8 @@ void r200UpdateViewportOffset( GLcontext *ctx )
R200_STIPPLE_Y_OFFSET_MASK);
/* add magic offsets, then invert */
- stx = 31 - ((rmesa->dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
- sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
+ stx = 31 - ((dPriv->x - 1) & R200_STIPPLE_COORD_MASK);
+ sty = 31 - ((dPriv->y + dPriv->h - 1)
& R200_STIPPLE_COORD_MASK);
m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) |
@@ -1788,7 +1716,7 @@ void r200UpdateViewportOffset( GLcontext *ctx )
}
}
- r200UpdateScissor( ctx );
+ radeonUpdateScissor( ctx );
}
@@ -1801,11 +1729,16 @@ static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLubyte color[4];
+ struct radeon_renderbuffer *rrb;
+
+ rrb = radeon_get_colorbuffer(&rmesa->radeon);
+ if (!rrb)
+ return;
CLAMPED_FLOAT_TO_UBYTE(color[0], c[0]);
CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]);
CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]);
CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]);
- rmesa->state.color.clear = r200PackColor( rmesa->r200Screen->cpp,
+ rmesa->radeon.state.color.clear = radeonPackColor( rrb->cpp,
color[0], color[1],
color[2], color[3] );
}
@@ -1848,96 +1781,6 @@ static void r200LogicOpCode( GLcontext *ctx, GLenum opcode )
rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = r200_rop_tab[rop];
}
-
-/*
- * Set up the cliprects for either front or back-buffer drawing.
- */
-void r200SetCliprects( r200ContextPtr rmesa )
-{
- __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
- __DRIdrawablePrivate *const readable = rmesa->dri.readable;
- GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
- GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
-
- if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BIT_BACK_LEFT) {
- /* Can't ignore 2d windows if we are page flipping.
- */
- if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
- rmesa->numClipRects = drawable->numClipRects;
- rmesa->pClipRects = drawable->pClipRects;
- }
- else {
- rmesa->numClipRects = drawable->numBackClipRects;
- rmesa->pClipRects = drawable->pBackClipRects;
- }
- }
- else {
- /* front buffer (or none, or multiple buffers) */
- rmesa->numClipRects = drawable->numClipRects;
- rmesa->pClipRects = drawable->pClipRects;
- }
-
- if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
- _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
- drawable->w, drawable->h);
- draw_fb->Initialized = GL_TRUE;
- }
-
- if (drawable != readable) {
- if ((read_fb->Width != readable->w) ||
- (read_fb->Height != readable->h)) {
- _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
- readable->w, readable->h);
- read_fb->Initialized = GL_TRUE;
- }
- }
-
- if (rmesa->state.scissor.enabled)
- r200RecalcScissorRects( rmesa );
-
- rmesa->lastStamp = drawable->lastStamp;
-}
-
-
-static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
-
- if (R200_DEBUG & DEBUG_DRI)
- fprintf(stderr, "%s %s\n", __FUNCTION__,
- _mesa_lookup_enum_by_nr( mode ));
-
- R200_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */
-
- if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
- /* 0 (GL_NONE) buffers or multiple color drawing buffers */
- FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
- return;
- }
-
- switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
- case BUFFER_FRONT_LEFT:
- case BUFFER_BACK_LEFT:
- FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
- break;
- default:
- FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
- return;
- }
-
- r200SetCliprects( rmesa );
-
- /* We'll set the drawing engine's offset/pitch parameters later
- * when we update other state.
- */
-}
-
-
-static void r200ReadBuffer( GLcontext *ctx, GLenum mode )
-{
- /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
-}
-
/* =============================================================
* State enable/disable
*/
@@ -1947,7 +1790,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLuint p, flag;
- if ( R200_DEBUG & DEBUG_STATE )
+ if ( R200_DEBUG & RADEON_STATE )
fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__,
_mesa_lookup_enum_by_nr( cap ),
state ? "GL_TRUE" : "GL_FALSE" );
@@ -1979,7 +1822,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
case GL_CLIP_PLANE2:
case GL_CLIP_PLANE3:
case GL_CLIP_PLANE4:
- case GL_CLIP_PLANE5:
+ case GL_CLIP_PLANE5:
p = cap-GL_CLIP_PLANE0;
R200_STATECHANGE( rmesa, tcl );
if (state) {
@@ -2013,10 +1856,10 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
R200_STATECHANGE(rmesa, ctx );
if ( state ) {
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE;
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
} else {
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE;
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
}
break;
@@ -2031,7 +1874,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
}
r200UpdateSpecular( ctx ); /* for PK_SPEC */
- if (rmesa->TclFallback)
+ if (rmesa->radeon.TclFallback)
r200ChooseVertexState( ctx );
_mesa_allow_light_in_model( ctx, !state );
break;
@@ -2046,13 +1889,13 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
case GL_LIGHT7:
R200_STATECHANGE(rmesa, tcl);
p = cap - GL_LIGHT0;
- if (p&1)
+ if (p&1)
flag = (R200_LIGHT_1_ENABLE |
- R200_LIGHT_1_ENABLE_AMBIENT |
+ R200_LIGHT_1_ENABLE_AMBIENT |
R200_LIGHT_1_ENABLE_SPECULAR);
else
flag = (R200_LIGHT_0_ENABLE |
- R200_LIGHT_0_ENABLE_AMBIENT |
+ R200_LIGHT_0_ENABLE_AMBIENT |
R200_LIGHT_0_ENABLE_SPECULAR);
if (state)
@@ -2060,7 +1903,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
else
rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
- /*
+ /*
*/
update_light_colors( ctx, p );
break;
@@ -2068,7 +1911,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
case GL_LIGHTING:
r200UpdateSpecular(ctx);
/* for reflection map fixup - might set recheck_texgen for all units too */
- rmesa->NewGLState |= _NEW_TEXTURE;
+ rmesa->radeon.NewGLState |= _NEW_TEXTURE;
break;
case GL_LINE_SMOOTH:
@@ -2181,21 +2024,30 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
}
case GL_SCISSOR_TEST:
- R200_FIREVERTICES( rmesa );
- rmesa->state.scissor.enabled = state;
- r200UpdateScissor( ctx );
+ radeon_firevertices(&rmesa->radeon);
+ rmesa->radeon.state.scissor.enabled = state;
+ radeonUpdateScissor( ctx );
break;
case GL_STENCIL_TEST:
- if ( rmesa->state.stencil.hwBuffer ) {
- R200_STATECHANGE( rmesa, ctx );
- if ( state ) {
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_STENCIL_ENABLE;
+ {
+ GLboolean hw_stencil = GL_FALSE;
+ if (ctx->DrawBuffer) {
+ struct radeon_renderbuffer *rrbStencil
+ = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+ hw_stencil = (rrbStencil && rrbStencil->bo);
+ }
+
+ if (hw_stencil) {
+ R200_STATECHANGE( rmesa, ctx );
+ if ( state ) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_STENCIL_ENABLE;
+ } else {
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_STENCIL_ENABLE;
+ }
} else {
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_STENCIL_ENABLE;
+ FALLBACK( rmesa, R200_FALLBACK_STENCIL, state );
}
- } else {
- FALLBACK( rmesa, R200_FALLBACK_STENCIL, state );
}
break;
@@ -2205,7 +2057,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
case GL_TEXTURE_GEN_T:
/* Picked up in r200UpdateTextureState.
*/
- rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE;
+ rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE;
break;
case GL_COLOR_SUM_EXT:
@@ -2322,7 +2174,7 @@ void r200LightingSpaceChange( GLcontext *ctx )
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLboolean tmp;
- if (R200_DEBUG & DEBUG_STATE)
+ if (R200_DEBUG & RADEON_STATE)
fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]);
@@ -2338,7 +2190,7 @@ void r200LightingSpaceChange( GLcontext *ctx )
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS;
}
- if (R200_DEBUG & DEBUG_STATE)
+ if (R200_DEBUG & RADEON_STATE)
fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]);
}
@@ -2381,7 +2233,7 @@ static void update_texturematrix( GLcontext *ctx )
GLuint compsel = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL];
int unit;
- if (R200_DEBUG & DEBUG_STATE)
+ if (R200_DEBUG & RADEON_STATE)
fprintf(stderr, "%s before COMPSEL: %x\n", __FUNCTION__,
rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]);
@@ -2389,7 +2241,7 @@ static void update_texturematrix( GLcontext *ctx )
rmesa->TexMatCompSel = 0;
for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
- if (!ctx->Texture.Unit[unit]._ReallyEnabled)
+ if (!ctx->Texture.Unit[unit]._ReallyEnabled)
continue;
if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
@@ -2399,21 +2251,21 @@ static void update_texturematrix( GLcontext *ctx )
rmesa->TexMatCompSel |= R200_OUTPUT_TEX_0 << unit;
if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) {
- /* Need to preconcatenate any active texgen
+ /* Need to preconcatenate any active texgen
* obj/eyeplane matrices:
*/
_math_matrix_mul_matrix( &rmesa->tmpmat,
- ctx->TextureMatrixStack[unit].Top,
+ ctx->TextureMatrixStack[unit].Top,
&rmesa->TexGenMatrix[unit] );
upload_matrix( rmesa, rmesa->tmpmat.m, R200_MTX_TEX0+unit );
- }
+ }
else {
- upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m,
+ upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m,
R200_MTX_TEX0+unit );
}
}
else if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) {
- upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m,
+ upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m,
R200_MTX_TEX0+unit );
}
}
@@ -2432,69 +2284,84 @@ static void update_texturematrix( GLcontext *ctx )
}
}
-
-
-/**
- * Tell the card where to render (offset, pitch).
- * Effected by glDrawBuffer, etc
- */
-void
-r200UpdateDrawBuffer(GLcontext *ctx)
+static GLboolean r200ValidateBuffers(GLcontext *ctx)
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- driRenderbuffer *drb;
+ struct radeon_renderbuffer *rrb;
+ struct radeon_dma_bo *dma_bo;
+ int i, ret;
- if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
- /* draw to front */
- drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
- }
- else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
- /* draw to back */
- drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+ radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+
+ rrb = radeon_get_colorbuffer(&rmesa->radeon);
+ /* color buffer */
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+ 0, RADEON_GEM_DOMAIN_VRAM);
}
- else {
- /* drawing to multiple buffers, or none */
- return;
+
+ /* depth buffer */
+ rrb = radeon_get_depthbuffer(&rmesa->radeon);
+ /* color buffer */
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+ 0, RADEON_GEM_DOMAIN_VRAM);
}
- assert(drb);
- assert(drb->flippedPitch);
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+ radeonTexObj *t;
- R200_STATECHANGE( rmesa, ctx );
+ if (!ctx->Texture.Unit[i]._ReallyEnabled)
+ continue;
- /* Note: we used the (possibly) page-flipped values */
- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
- = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
- & R200_COLOROFFSET_MASK);
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
- if (rmesa->sarea->tiling_enabled) {
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+ if (t->image_override && t->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+ else if (t->mt->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->mt->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
}
-}
-
+ dma_bo = first_elem(&rmesa->radeon.dma.reserved);
+ {
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, dma_bo->bo, RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+ }
+ return GL_TRUE;
+}
-void r200ValidateState( GLcontext *ctx )
+GLboolean r200ValidateState( GLcontext *ctx )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- GLuint new_state = rmesa->NewGLState;
+ GLuint new_state = rmesa->radeon.NewGLState;
- if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
- r200UpdateDrawBuffer(ctx);
+ if (new_state & _NEW_BUFFERS) {
+ _mesa_update_framebuffer(ctx);
+ /* this updates the DrawBuffer's Width/Height if it's a FBO */
+ _mesa_update_draw_buffer_bounds(ctx);
+
+ R200_STATECHANGE(rmesa, ctx);
}
- if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
+ if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) {
r200UpdateTextureState( ctx );
- new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
+ new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
r200UpdateLocalViewer( ctx );
}
+ /* we need to do a space check here */
+ if (!r200ValidateBuffers(ctx))
+ return GL_FALSE;
+
/* FIXME: don't really need most of these when vertex progs are enabled */
/* Need an event driven matrix update?
*/
- if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
+ if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, R200_MTX_MVP );
/* Need these for lighting (shouldn't upload otherwise)
@@ -2518,11 +2385,12 @@ void r200ValidateState( GLcontext *ctx )
/* emit all active clip planes if projection matrix changes.
*/
if (new_state & (_NEW_PROJECTION)) {
- if (ctx->Transform.ClipPlanesEnabled)
+ if (ctx->Transform.ClipPlanesEnabled)
r200UpdateClipPlanes( ctx );
}
if (new_state & (_NEW_PROGRAM|
+ _NEW_PROGRAM_CONSTANTS |
/* need to test for pretty much anything due to possible parameter bindings */
_NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM|
_NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX|
@@ -2533,7 +2401,8 @@ void r200ValidateState( GLcontext *ctx )
else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0);
}
- rmesa->NewGLState = 0;
+ rmesa->radeon.NewGLState = 0;
+ return GL_TRUE;
}
@@ -2544,7 +2413,7 @@ static void r200InvalidateState( GLcontext *ctx, GLuint new_state )
_vbo_InvalidateState( ctx, new_state );
_tnl_InvalidateState( ctx, new_state );
_ae_invalidate_state( ctx, new_state );
- R200_CONTEXT(ctx)->NewGLState |= new_state;
+ R200_CONTEXT(ctx)->radeon.NewGLState |= new_state;
}
/* A hack. The r200 can actually cope just fine with materials
@@ -2573,12 +2442,13 @@ static void r200WrapRunPipeline( GLcontext *ctx )
GLboolean has_material;
if (0)
- fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
+ fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
/* Validate state:
*/
- if (rmesa->NewGLState)
- r200ValidateState( ctx );
+ if (rmesa->radeon.NewGLState)
+ if (!r200ValidateState( ctx ))
+ FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx );
@@ -2587,7 +2457,7 @@ static void r200WrapRunPipeline( GLcontext *ctx )
}
/* Run the pipeline.
- */
+ */
_tnl_run_pipeline( ctx );
if (has_material) {
@@ -2596,15 +2466,30 @@ static void r200WrapRunPipeline( GLcontext *ctx )
}
+static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ GLint i;
+
+ radeon_firevertices(&r200->radeon);
+
+ R200_STATECHANGE(r200, stp);
+
+ /* Must flip pattern upside down.
+ */
+ for ( i = 31 ; i >= 0; i--) {
+ r200->hw.stp.cmd[3 + i] = ((GLuint *) mask)[i];
+ }
+}
/* Initialize the driver's state functions.
*/
-void r200InitStateFuncs( struct dd_function_table *functions )
+void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 )
{
functions->UpdateState = r200InvalidateState;
functions->LightingSpaceChange = r200LightingSpaceChange;
- functions->DrawBuffer = r200DrawBuffer;
- functions->ReadBuffer = r200ReadBuffer;
+ functions->DrawBuffer = radeonDrawBuffer;
+ functions->ReadBuffer = radeonReadBuffer;
functions->AlphaFunc = r200AlphaFunc;
functions->BlendColor = r200BlendColor;
@@ -2632,11 +2517,14 @@ void r200InitStateFuncs( struct dd_function_table *functions )
functions->LogicOpcode = r200LogicOpCode;
functions->PolygonMode = r200PolygonMode;
functions->PolygonOffset = r200PolygonOffset;
- functions->PolygonStipple = r200PolygonStipple;
+ if (dri2)
+ functions->PolygonStipple = r200PolygonStipple;
+ else
+ functions->PolygonStipple = radeonPolygonStipplePreKMS;
functions->PointParameterfv = r200PointParameter;
functions->PointSize = r200PointSize;
functions->RenderMode = r200RenderMode;
- functions->Scissor = r200Scissor;
+ functions->Scissor = radeonScissor;
functions->ShadeModel = r200ShadeModel;
functions->StencilFuncSeparate = r200StencilFuncSeparate;
functions->StencilMaskSeparate = r200StencilMaskSeparate;
diff --git a/r200/r200_state.h b/r200/r200_state.h
index a917163..9c62f0a 100644
--- a/r200/r200_state.h
+++ b/r200/r200_state.h
@@ -38,28 +38,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_context.h"
extern void r200InitState( r200ContextPtr rmesa );
-extern void r200InitStateFuncs( struct dd_function_table *functions );
+extern void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 );
extern void r200InitTnlFuncs( GLcontext *ctx );
extern void r200UpdateMaterial( GLcontext *ctx );
-extern void r200SetCliprects( r200ContextPtr rmesa );
-extern void r200RecalcScissorRects( r200ContextPtr rmesa );
extern void r200UpdateViewportOffset( GLcontext *ctx );
extern void r200UpdateWindow( GLcontext *ctx );
extern void r200UpdateDrawBuffer(GLcontext *ctx);
-extern void r200ValidateState( GLcontext *ctx );
-
-extern void r200PrintDirty( r200ContextPtr rmesa,
- const char *msg );
+extern GLboolean r200ValidateState( GLcontext *ctx );
+extern void r200_vtbl_update_scissor( GLcontext *ctx );
extern void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode );
#define FALLBACK( rmesa, bit, mode ) do { \
if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \
__FUNCTION__, bit, mode ); \
- r200Fallback( rmesa->glCtx, bit, mode ); \
+ r200Fallback( rmesa->radeon.glCtx, bit, mode ); \
} while (0)
extern void r200LightingSpaceChange( GLcontext *ctx );
diff --git a/r200/r200_state_init.c b/r200/r200_state_init.c
index 9e4677e..7697306 100644
--- a/r200/r200_state_init.c
+++ b/r200/r200_state_init.c
@@ -43,40 +43,141 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "tnl/t_pipeline.h"
#include "swrast_setup/swrast_setup.h"
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
#include "r200_context.h"
#include "r200_ioctl.h"
#include "r200_state.h"
#include "r200_tcl.h"
#include "r200_tex.h"
#include "r200_swtcl.h"
+#include "radeon_queryobj.h"
#include "xmlpool.h"
+/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.
+ */
+static struct {
+ int start;
+ int len;
+ const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+ {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
+ {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
+ {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
+ {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
+ {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
+ {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
+ {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
+ {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
+ {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
+ {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
+ {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
+ {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
+ {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
+ {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
+ {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
+ {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
+ {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
+ {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
+ {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
+ {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
+ {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
+ "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
+ {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
+ {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
+ {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
+ {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
+ {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
+ {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
+ {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
+ {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
+ {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
+ {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
+ {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
+ {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
+ {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
+ {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
+ {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
+ {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
+ {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
+ {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
+ {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
+ {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
+ {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
+ {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
+ {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
+ {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
+ {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
+ {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
+ {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
+ {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
+ {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
+ "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
+ {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
+ {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
+ {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
+ {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
+ {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
+ {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
+ {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
+ {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
+ {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
+ {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
+ {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
+ "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
+ {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
+ {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
+ {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
+ {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
+ {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
+ {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
+ {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
+ {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
+ {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
+ {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
+ {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
+ {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
+ {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
+ {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
+ {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
+ {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
+ {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
+ {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
+ {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
+ {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
+ {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
+ {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
+ {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
+ {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
+ {R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"}, /* 85 */
+ {R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
+ {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
+ {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
+ {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
+ {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
+ {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
+ {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
+ {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
+ {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
+};
+
/* =============================================================
* State initialization
*/
-
-void r200PrintDirty( r200ContextPtr rmesa, const char *msg )
+static int cmdpkt( r200ContextPtr rmesa, int id )
{
- struct r200_state_atom *l;
-
- fprintf(stderr, msg);
- fprintf(stderr, ": ");
+ drm_radeon_cmd_header_t h;
- foreach(l, &rmesa->hw.atomlist) {
- if (l->dirty || rmesa->hw.all_dirty)
- fprintf(stderr, "%s, ", l->name);
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ return CP_PACKET0(packet[id].start, packet[id].len - 1);
+ } else {
+ h.i = 0;
+ h.packet.cmd_type = RADEON_CMD_PACKET;
+ h.packet.packet_id = id;
}
-
- fprintf(stderr, "\n");
-}
-
-static int cmdpkt( int id )
-{
- drm_radeon_cmd_header_t h;
- h.i = 0;
- h.packet.cmd_type = RADEON_CMD_PACKET;
- h.packet.packet_id = id;
return h.i;
}
@@ -126,151 +227,607 @@ static int cmdscl2( int offset, int stride, int count )
return h.i;
}
-#define CHECK( NM, FLAG ) \
-static GLboolean check_##NM( GLcontext *ctx, int idx ) \
+/**
+ * Check functions are used to check if state is active.
+ * If it is active check function returns maximum emit size.
+ */
+#define CHECK( NM, FLAG, ADD ) \
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
{ \
r200ContextPtr rmesa = R200_CONTEXT(ctx); \
- (void) idx; \
(void) rmesa; \
- return FLAG; \
+ return (FLAG) ? atom->cmd_size + (ADD) : 0; \
}
-#define TCL_CHECK( NM, FLAG ) \
-static GLboolean check_##NM( GLcontext *ctx, int idx ) \
-{ \
- r200ContextPtr rmesa = R200_CONTEXT(ctx); \
- (void) idx; \
- return !rmesa->TclFallback && !ctx->VertexProgram._Enabled && (FLAG); \
+#define TCL_CHECK( NM, FLAG, ADD ) \
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
+{ \
+ r200ContextPtr rmesa = R200_CONTEXT(ctx); \
+ return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
}
-#define TCL_OR_VP_CHECK( NM, FLAG ) \
-static GLboolean check_##NM( GLcontext *ctx, int idx ) \
+#define TCL_OR_VP_CHECK( NM, FLAG, ADD ) \
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
{ \
r200ContextPtr rmesa = R200_CONTEXT(ctx); \
- (void) idx; \
- return !rmesa->TclFallback && (FLAG); \
+ return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
}
-#define VP_CHECK( NM, FLAG ) \
-static GLboolean check_##NM( GLcontext *ctx, int idx ) \
-{ \
- r200ContextPtr rmesa = R200_CONTEXT(ctx); \
- (void) idx; \
- return !rmesa->TclFallback && ctx->VertexProgram._Enabled && (FLAG); \
+#define VP_CHECK( NM, FLAG, ADD ) \
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
+{ \
+ r200ContextPtr rmesa = R200_CONTEXT(ctx); \
+ (void) atom; \
+ return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
+}
+
+CHECK( always, GL_TRUE, 0 )
+CHECK( always_add4, GL_TRUE, 4 )
+CHECK( never, GL_FALSE, 0 )
+CHECK( tex_any, ctx->Texture._EnabledUnits, 0 )
+CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled), 0 );
+CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled, 0 )
+ CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 )
+CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)), 0 )
+CHECK( afs, ctx->ATIFragmentShader._Enabled, 0 )
+CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( tex_cube_cs, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 2 + 4*5 - CUBE_STATE_SIZE )
+TCL_CHECK( tcl_fog, ctx->Fog.Enabled, 0 )
+TCL_CHECK( tcl_fog_add4, ctx->Fog.Enabled, 4 )
+TCL_CHECK( tcl, GL_TRUE, 0 )
+TCL_CHECK( tcl_add8, GL_TRUE, 8 )
+TCL_CHECK( tcl_add4, GL_TRUE, 4 )
+TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded, 0 )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 0 )
+TCL_CHECK( tcl_tex_add4, rmesa->state.texture.unit[atom->idx].unitneeded, 4 )
+TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_lighting_add6, ctx->Light.Enabled, 6 )
+TCL_CHECK( tcl_light_add8, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 8 )
+TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 0 )
+TCL_OR_VP_CHECK( tcl_ucp_add4, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 4 )
+TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE, 0 )
+TCL_OR_VP_CHECK( tcl_or_vp_add2, GL_TRUE, 2 )
+VP_CHECK( tcl_vp, GL_TRUE, 0 )
+VP_CHECK( tcl_vp_add4, GL_TRUE, 4 )
+VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 0 )
+VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 0 )
+VP_CHECK( tcl_vp_size_add4, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 4 )
+VP_CHECK( tcl_vpp_size_add4, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 4 )
+
+#define OUT_VEC(hdr, data) do { \
+ drm_radeon_cmd_header_t h; \
+ h.i = hdr; \
+ OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \
+ OUT_BATCH(0); \
+ OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \
+ OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
+ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1)); \
+ OUT_BATCH_TABLE((data), h.vectors.count); \
+ } while(0)
+
+#define OUT_VECLINEAR(hdr, data) do { \
+ drm_radeon_cmd_header_t h; \
+ uint32_t _start, _sz; \
+ h.i = hdr; \
+ _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8); \
+ _sz = h.veclinear.count * 4; \
+ if (r200->radeon.radeonScreen->kernel_mm && _sz) { \
+ BEGIN_BATCH_NO_AUTOSTATE(dwords); \
+ OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \
+ OUT_BATCH(0); \
+ OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \
+ OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
+ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1)); \
+ OUT_BATCH_TABLE((data), _sz); \
+ END_BATCH(); \
+ } \
+ } while(0)
+
+#define OUT_SCL(hdr, data) do { \
+ drm_radeon_cmd_header_t h; \
+ h.i = hdr; \
+ OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \
+ OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
+ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \
+ OUT_BATCH_TABLE((data), h.scalars.count); \
+ } while(0)
+
+#define OUT_SCL2(hdr, data) do { \
+ drm_radeon_cmd_header_t h; \
+ h.i = hdr; \
+ OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \
+ OUT_BATCH((h.scalars.offset + 0x100) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
+ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \
+ OUT_BATCH_TABLE((data), h.scalars.count); \
+ } while(0)
+static int check_rrb(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb;
+ rrb = radeon_get_colorbuffer(&r200->radeon);
+ if (!rrb || !rrb->bo)
+ return 0;
+ return atom->cmd_size;
+}
+
+static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1));
+ OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18));
+ END_BATCH();
}
+static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
+ OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
+ END_BATCH();
+}
-CHECK( always, GL_TRUE )
-CHECK( never, GL_FALSE )
-CHECK( tex_any, ctx->Texture._EnabledUnits )
-CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) );
-CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) )
-CHECK( tex, rmesa->state.texture.unit[idx].unitneeded )
-CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled )
-CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) )
-CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) )
-CHECK( afs, ctx->ATIFragmentShader._Enabled )
-CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT )
-TCL_CHECK( tcl_fog, ctx->Fog.Enabled )
-TCL_CHECK( tcl, GL_TRUE )
-TCL_CHECK( tcl_tex, rmesa->state.texture.unit[idx].unitneeded )
-TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
-TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[idx].Enabled )
-TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << idx)) )
-TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE )
-VP_CHECK( tcl_vp, GL_TRUE )
-VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 )
-VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 )
+static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1);
+ OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1);
+ END_BATCH();
+}
+static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
-/* Initialize the context's hardware state.
- */
-void r200InitState( r200ContextPtr rmesa )
+ OUT_VECLINEAR(atom->cmd[0], atom->cmd+1);
+}
+
+static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
- GLcontext *ctx = rmesa->glCtx;
- GLuint color_fmt, depth_fmt, i;
- GLint drawPitch, drawOffset;
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
- switch ( rmesa->r200Screen->cpp ) {
- case 2:
- color_fmt = R200_COLOR_FORMAT_RGB565;
- break;
- case 4:
- color_fmt = R200_COLOR_FORMAT_ARGB8888;
- break;
- default:
- fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
- exit( -1 );
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_SCL(atom->cmd[0], atom->cmd+1);
+ END_BATCH();
+}
+
+
+static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_VEC(atom->cmd[0], atom->cmd+1);
+ END_BATCH();
+}
+
+static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ struct radeon_renderbuffer *rrb;
+ uint32_t cbpitch;
+ uint32_t zbpitch, depth_fmt;
+ uint32_t dwords = atom->check(ctx, atom);
+
+ /* output the first 7 bytes of context */
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(atom->cmd, 5);
+
+ rrb = radeon_get_depthbuffer(&r200->radeon);
+ if (!rrb) {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else {
+ zbpitch = (rrb->pitch / rrb->cpp);
+ if (r200->using_hyperz)
+ zbpitch |= RADEON_DEPTH_HYPERZ;
+ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH(zbpitch);
+ if (rrb->cpp == 4)
+ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+ else
+ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
+ }
+
+ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+ OUT_BATCH(atom->cmd[CTX_CMD_1]);
+ OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+
+ rrb = radeon_get_colorbuffer(&r200->radeon);
+ if (!rrb || !rrb->bo) {
+ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+ OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
+ } else {
+ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+ if (rrb->cpp == 4)
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+ else
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+
+ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
}
- rmesa->state.color.clear = 0x00000000;
+ OUT_BATCH(atom->cmd[CTX_CMD_2]);
- switch ( ctx->Visual.depthBits ) {
- case 16:
- rmesa->state.depth.clear = 0x0000ffff;
- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
- depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z;
- rmesa->state.stencil.clear = 0x00000000;
- break;
- case 24:
- rmesa->state.depth.clear = 0x00ffffff;
- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
- depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z;
- rmesa->state.stencil.clear = 0xffff0000;
- break;
- default:
- fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
- ctx->Visual.depthBits );
- exit( -1 );
+ if (!rrb || !rrb->bo) {
+ OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
+ } else {
+ cbpitch = (rrb->pitch / rrb->cpp);
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ cbpitch |= R200_COLOR_TILE_ENABLE;
+ OUT_BATCH(cbpitch);
+ }
+
+ if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
+ OUT_BATCH_TABLE((atom->cmd + 14), 4);
+
+ END_BATCH();
+}
+
+static int check_always_ctx( GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb, *drb;
+ uint32_t dwords;
+
+ rrb = radeon_get_colorbuffer(&r200->radeon);
+ if (!rrb || !rrb->bo) {
+ return 0;
}
- /* Only have hw stencil when depth buffer is 24 bits deep */
- rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
- ctx->Visual.depthBits == 24 );
+ drb = radeon_get_depthbuffer(&r200->radeon);
- rmesa->Fallback = 0;
+ dwords = 10;
+ if (drb)
+ dwords += 6;
+ if (rrb)
+ dwords += 8;
+ if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
+ dwords += 4;
- if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
- drawOffset = rmesa->r200Screen->backOffset;
- drawPitch = rmesa->r200Screen->backPitch;
- } else {
- drawOffset = rmesa->r200Screen->frontOffset;
- drawPitch = rmesa->r200Screen->frontPitch;
+
+ return dwords;
+}
+
+static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ struct radeon_renderbuffer *rrb, *drb;
+ uint32_t cbpitch = 0;
+ uint32_t zbpitch = 0;
+ uint32_t dwords = atom->check(ctx, atom);
+ uint32_t depth_fmt;
+
+ rrb = radeon_get_colorbuffer(&r200->radeon);
+ if (!rrb || !rrb->bo) {
+ return;
}
-#if 000
- if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
- rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
- rmesa->state.color.drawPitch = rmesa->r200Screen->backPitch;
+
+ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+ if (rrb->cpp == 4)
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+ else switch (rrb->base._ActualFormat) {
+ case GL_RGB5:
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+ break;
+ case GL_RGBA4:
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
+ break;
+ case GL_RGB5_A1:
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
+ break;
+ }
+
+ cbpitch = (rrb->pitch / rrb->cpp);
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ cbpitch |= R200_COLOR_TILE_ENABLE;
+
+ drb = radeon_get_depthbuffer(&r200->radeon);
+ if (drb) {
+ zbpitch = (drb->pitch / drb->cpp);
+ if (drb->cpp == 4)
+ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+ else
+ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
+ }
+
+ /* output the first 7 bytes of context */
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+ /* In the CS case we need to split this up */
+ OUT_BATCH(CP_PACKET0(packet[0].start, 3));
+ OUT_BATCH_TABLE((atom->cmd + 1), 4);
+
+ if (drb) {
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
+ OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
+ OUT_BATCH(zbpitch);
+ }
+
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
+ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+ OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
+ OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+
+
+ if (rrb) {
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
+ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
+ OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ }
+
+ if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
+ OUT_BATCH_TABLE((atom->cmd + 14), 4);
+ }
+
+ END_BATCH();
+}
+
+static int get_tex_size(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ uint32_t dwords = atom->cmd_size + 2;
+ int i = atom->idx;
+ radeonTexObj *t = r200->state.texture.unit[i].texobj;
+ if (!(t && t->mt && !t->image_override))
+ dwords -= 2;
+
+ return dwords;
+}
+
+static int check_tex_pair(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ /** XOR is bit flip operation so use it for finding pair */
+ if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx ^ 1].unitneeded))
+ return 0;
+
+ return get_tex_size(ctx, atom);
+}
+
+static int check_tex(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ if (!(r200->state.texture.unit[atom->idx].unitneeded))
+ return 0;
+
+ return get_tex_size(ctx, atom);
+}
+
+
+static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+ int i = atom->idx;
+ radeonTexObj *t = r200->state.texture.unit[i].texobj;
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ /* is this ok even with drm older than 1.18? */
+ OUT_BATCH_TABLE(atom->cmd, 10);
+
+ if (t && t->mt && !t->image_override) {
+ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ } else if (!t) {
+ /* workaround for old CS mechanism */
+ OUT_BATCH(r200->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
} else {
- rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
- rmesa->state.color.drawPitch = rmesa->r200Screen->frontPitch;
+ OUT_BATCH(t->override_offset);
}
- rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
- rmesa->state.pixel.readPitch = rmesa->state.color.drawPitch;
-#endif
+ END_BATCH();
+}
+
+static int get_tex_mm_size(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ uint32_t dwords = atom->cmd_size + 2;
+ int hastexture = 1;
+ int i = atom->idx;
+ radeonTexObj *t = r200->state.texture.unit[i].texobj;
+ if (!t)
+ hastexture = 0;
+ else {
+ if (!t->mt && !t->bo)
+ hastexture = 0;
+ }
+
+ if (!hastexture)
+ dwords -= 4;
+ return dwords;
+}
+
+static int check_tex_pair_mm(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ /** XOR is bit flip operation so use it for finding pair */
+ if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx ^ 1].unitneeded))
+ return 0;
+
+ return get_tex_mm_size(ctx, atom);
+}
- rmesa->hw.max_state_size = 0;
+static int check_tex_mm(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ if (!(r200->state.texture.unit[atom->idx].unitneeded))
+ return 0;
+
+ return get_tex_mm_size(ctx, atom);
+}
+
+
+static void tex_emit_mm(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+ int i = atom->idx;
+ radeonTexObj *t = r200->state.texture.unit[i].texobj;
+ if (!r200->state.texture.unit[i].unitneeded)
+ dwords -= 4;
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+ OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (32 * i), 7));
+ OUT_BATCH_TABLE((atom->cmd + 1), 8);
+
+ if (dwords > atom->cmd_size) {
+ OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0));
+ if (t->mt && !t->image_override) {
+ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ } else {
+ if (t->bo)
+ OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ }
+ }
+ END_BATCH();
+}
+
+
+static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+ int i = atom->idx, j;
+ radeonTexObj *t = r200->state.texture.unit[i].texobj;
+ radeon_mipmap_level *lvl;
+
+ if (!(t && !t->image_override))
+ dwords = 2;
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ /* XXX that size won't really match with image_override... */
+ OUT_BATCH_TABLE(atom->cmd, 2);
+
+ if (t && !t->image_override) {
+ lvl = &t->mt->levels[0];
+ OUT_BATCH_TABLE((atom->cmd + 2), 1);
+ for (j = 1; j <= 5; j++) {
+ OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+ RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ }
+ }
+ END_BATCH();
+}
+
+static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+ int i = atom->idx, j;
+ radeonTexObj *t = r200->state.texture.unit[i].texobj;
+ radeon_mipmap_level *lvl;
+ if (!(t && !t->image_override))
+ dwords = 2;
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(atom->cmd, 2);
+
+ if (t && !t->image_override) {
+ lvl = &t->mt->levels[0];
+ for (j = 1; j <= 5; j++) {
+ OUT_BATCH(CP_PACKET0(R200_PP_CUBIC_OFFSET_F1_0 + (24*i) + (4 * (j-1)), 0));
+ OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ }
+ }
+ END_BATCH();
+}
+
+/* Initialize the context's hardware state.
+ */
+void r200InitState( r200ContextPtr rmesa )
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+ GLuint i;
+
+ rmesa->radeon.state.color.clear = 0x00000000;
+
+ switch ( ctx->Visual.depthBits ) {
+ case 16:
+ rmesa->radeon.state.depth.clear = 0x0000ffff;
+ rmesa->radeon.state.stencil.clear = 0x00000000;
+ break;
+ case 24:
+ default:
+ rmesa->radeon.state.depth.clear = 0x00ffffff;
+ rmesa->radeon.state.stencil.clear = 0xffff0000;
+ break;
+ }
+
+ rmesa->radeon.Fallback = 0;
+
+ rmesa->radeon.hw.max_state_size = 0;
#define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX ) \
do { \
rmesa->hw.ATOM.cmd_size = SZ; \
- rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \
- rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \
+ rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
+ rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
rmesa->hw.ATOM.name = NM; \
rmesa->hw.ATOM.idx = IDX; \
- rmesa->hw.ATOM.check = check_##CHK; \
+ if (check_##CHK != check_never) { \
+ rmesa->hw.ATOM.check = check_##CHK; \
+ rmesa->radeon.hw.max_state_size += SZ * sizeof(int); \
+ } else { \
+ rmesa->hw.ATOM.check = NULL; \
+ } \
rmesa->hw.ATOM.dirty = GL_FALSE; \
- rmesa->hw.max_state_size += SZ * sizeof(int); \
} while (0)
/* Allocate state buffers:
*/
- if (rmesa->r200Screen->drmSupportsBlendColor)
- ALLOC_STATE( ctx, always, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
+ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
+ ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
+ else
+ ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 );
+
+ if (rmesa->radeon.radeonScreen->kernel_mm)
+ {
+ rmesa->hw.ctx.emit = ctx_emit_cs;
+ rmesa->hw.ctx.check = check_always_ctx;
+ }
else
- ALLOC_STATE( ctx, always, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 );
+ {
+ rmesa->hw.ctx.emit = ctx_emit;
+ }
ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
@@ -282,52 +839,75 @@ void r200InitState( r200ContextPtr rmesa )
ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
- if (rmesa->r200Screen->drmSupportsFragShader) {
- if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
- /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
- ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
- ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
- ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+ {
+ int state_size = TEX_STATE_SIZE_NEWDRM;
+ if (!rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+ state_size = TEX_STATE_SIZE_OLDDRM;
}
- else {
- ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
- ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
- ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+ /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
+ ALLOC_STATE( tex[0], tex_pair_mm, state_size, "TEX/tex-0", 0 );
+ ALLOC_STATE( tex[1], tex_pair_mm, state_size, "TEX/tex-1", 1 );
+ ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+ }
+ else {
+ ALLOC_STATE( tex[0], tex_mm, state_size, "TEX/tex-0", 0 );
+ ALLOC_STATE( tex[1], tex_mm, state_size, "TEX/tex-1", 1 );
+ ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+ }
+ ALLOC_STATE( tex[2], tex_mm, state_size, "TEX/tex-2", 2 );
+ ALLOC_STATE( tex[3], tex_mm, state_size, "TEX/tex-3", 3 );
+ ALLOC_STATE( tex[4], tex_mm, state_size, "TEX/tex-4", 4 );
+ ALLOC_STATE( tex[5], tex_mm, state_size, "TEX/tex-5", 5 );
+ if (!rmesa->radeon.radeonScreen->kernel_mm)
+ {
+ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+ rmesa->hw.tex[0].check = check_tex_pair;
+ rmesa->hw.tex[1].check = check_tex_pair;
+ } else {
+ rmesa->hw.tex[0].check = check_tex;
+ rmesa->hw.tex[1].check = check_tex;
+ }
+ rmesa->hw.tex[2].check = check_tex;
+ rmesa->hw.tex[3].check = check_tex;
+ rmesa->hw.tex[4].check = check_tex;
+ rmesa->hw.tex[5].check = check_tex;
+ }
+ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+ ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 );
+ ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+ ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
+ } else {
+ ALLOC_STATE( atf, never, ATF_STATE_SIZE, "ATF/tfactor", 0 );
+ ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+ ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
+ }
}
- ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-2", 2 );
- ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-3", 3 );
- ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-4", 4 );
- ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-5", 5 );
- ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 );
- ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
- ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
}
- else {
- if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
- ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
- ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
- ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
- }
- else {
- ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
- ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
- ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
- }
- ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-2", 2 );
- ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-3", 3 );
- ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-4", 4 );
- ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-5", 5 );
- ALLOC_STATE( atf, never, ATF_STATE_SIZE, "TF/tfactor", 0 );
- ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
- ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
- }
- if (rmesa->r200Screen->drmSupportsCubeMapsR200) {
+ /* polygon stipple is done with irq for non-kms */
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 );
+ }
+
+ for (i = 0; i < 6; i++)
+ if (rmesa->radeon.radeonScreen->kernel_mm)
+ rmesa->hw.tex[i].emit = tex_emit_mm;
+ else
+ rmesa->hw.tex[i].emit = tex_emit;
+ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) {
ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 );
ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
+ for (i = 0; i < 6; i++)
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ rmesa->hw.cube[i].emit = cube_emit_cs;
+ rmesa->hw.cube[i].check = check_tex_cube_cs;
+ } else
+ rmesa->hw.cube[i].emit = cube_emit;
}
else {
ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
@@ -337,12 +917,20 @@ void r200InitState( r200ContextPtr rmesa )
ALLOC_STATE( cube[4], never, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
ALLOC_STATE( cube[5], never, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
}
- if (rmesa->r200Screen->drmSupportsVertexProgram) {
+
+ if (rmesa->radeon.radeonScreen->drmSupportsVertexProgram) {
ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
- ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
- ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
- ALLOC_STATE( vpp[0], tcl_vp, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
- ALLOC_STATE( vpp[1], tcl_vpp_size, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE( vpi[0], tcl_vp_add4, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
+ ALLOC_STATE( vpi[1], tcl_vp_size_add4, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
+ ALLOC_STATE( vpp[0], tcl_vp_add4, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
+ ALLOC_STATE( vpp[1], tcl_vpp_size_add4, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
+ } else {
+ ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
+ ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
+ ALLOC_STATE( vpp[0], tcl_vp, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
+ ALLOC_STATE( vpp[1], tcl_vpp_size, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
+ }
}
else {
ALLOC_STATE( pvs, never, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
@@ -355,50 +943,87 @@ void r200InitState( r200ContextPtr rmesa )
ALLOC_STATE( tcl, tcl_or_vp, TCL_STATE_SIZE, "TCL/tcl", 0 );
ALLOC_STATE( msl, tcl, MSL_STATE_SIZE, "MSL/matrix-select", 0 );
ALLOC_STATE( tcg, tcl, TCG_STATE_SIZE, "TCG/texcoordgen", 0 );
- ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 );
- ALLOC_STATE( mtl[1], tcl_lighting, MTL_STATE_SIZE, "MTL1/material1", 1 );
- ALLOC_STATE( grd, tcl_or_vp, GRD_STATE_SIZE, "GRD/guard-band", 0 );
- ALLOC_STATE( fog, tcl_fog, FOG_STATE_SIZE, "FOG/fog", 0 );
- ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 );
- ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
- ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 );
- ALLOC_STATE( mat[R200_MTX_IMV], tcl, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
- ALLOC_STATE( mat[R200_MTX_MVP], tcl, MAT_STATE_SIZE, "MAT/modelproject", 0 );
- ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex, MAT_STATE_SIZE, "MAT/texmat0", 0 );
- ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex, MAT_STATE_SIZE, "MAT/texmat1", 1 );
- ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex, MAT_STATE_SIZE, "MAT/texmat2", 2 );
- ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex, MAT_STATE_SIZE, "MAT/texmat3", 3 );
- ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex, MAT_STATE_SIZE, "MAT/texmat4", 4 );
- ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex, MAT_STATE_SIZE, "MAT/texmat5", 5 );
- ALLOC_STATE( ucp[0], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
- ALLOC_STATE( ucp[1], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
- ALLOC_STATE( ucp[2], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
- ALLOC_STATE( ucp[3], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
- ALLOC_STATE( ucp[4], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
- ALLOC_STATE( ucp[5], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
- ALLOC_STATE( lit[0], tcl_light, LIT_STATE_SIZE, "LIT/light-0", 0 );
- ALLOC_STATE( lit[1], tcl_light, LIT_STATE_SIZE, "LIT/light-1", 1 );
- ALLOC_STATE( lit[2], tcl_light, LIT_STATE_SIZE, "LIT/light-2", 2 );
- ALLOC_STATE( lit[3], tcl_light, LIT_STATE_SIZE, "LIT/light-3", 3 );
- ALLOC_STATE( lit[4], tcl_light, LIT_STATE_SIZE, "LIT/light-4", 4 );
- ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 );
- ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 );
- ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 );
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE( mtl[0], tcl_lighting_add6, MTL_STATE_SIZE, "MTL0/material0", 0 );
+ ALLOC_STATE( mtl[1], tcl_lighting_add6, MTL_STATE_SIZE, "MTL1/material1", 1 );
+ ALLOC_STATE( grd, tcl_or_vp_add2, GRD_STATE_SIZE, "GRD/guard-band", 0 );
+ ALLOC_STATE( fog, tcl_fog_add4, FOG_STATE_SIZE, "FOG/fog", 0 );
+ ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 0 );
+ ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
+ ALLOC_STATE( mat[R200_MTX_MV], tcl_add4, MAT_STATE_SIZE, "MAT/modelview", 0 );
+ ALLOC_STATE( mat[R200_MTX_IMV], tcl_add4, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
+ ALLOC_STATE( mat[R200_MTX_MVP], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 0 );
+ ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat0", 0 );
+ ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+ ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat2", 2 );
+ ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat3", 3 );
+ ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat4", 4 );
+ ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat5", 5 );
+ ALLOC_STATE( ucp[0], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
+ ALLOC_STATE( ucp[1], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+ ALLOC_STATE( ucp[2], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
+ ALLOC_STATE( ucp[3], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
+ ALLOC_STATE( ucp[4], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
+ ALLOC_STATE( ucp[5], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
+ ALLOC_STATE( lit[0], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-0", 0 );
+ ALLOC_STATE( lit[1], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-1", 1 );
+ ALLOC_STATE( lit[2], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-2", 2 );
+ ALLOC_STATE( lit[3], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-3", 3 );
+ ALLOC_STATE( lit[4], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-4", 4 );
+ ALLOC_STATE( lit[5], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-5", 5 );
+ ALLOC_STATE( lit[6], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-6", 6 );
+ ALLOC_STATE( lit[7], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-7", 7 );
+ ALLOC_STATE( sci, rrb, SCI_STATE_SIZE, "SCI/scissor", 0 );
+ } else {
+ ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 );
+ ALLOC_STATE( mtl[1], tcl_lighting, MTL_STATE_SIZE, "MTL1/material1", 1 );
+ ALLOC_STATE( grd, tcl_or_vp, GRD_STATE_SIZE, "GRD/guard-band", 0 );
+ ALLOC_STATE( fog, tcl_fog, FOG_STATE_SIZE, "FOG/fog", 0 );
+ ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 );
+ ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
+ ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 );
+ ALLOC_STATE( mat[R200_MTX_IMV], tcl, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
+ ALLOC_STATE( mat[R200_MTX_MVP], tcl, MAT_STATE_SIZE, "MAT/modelproject", 0 );
+ ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex, MAT_STATE_SIZE, "MAT/texmat0", 0 );
+ ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+ ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex, MAT_STATE_SIZE, "MAT/texmat2", 2 );
+ ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex, MAT_STATE_SIZE, "MAT/texmat3", 3 );
+ ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex, MAT_STATE_SIZE, "MAT/texmat4", 4 );
+ ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex, MAT_STATE_SIZE, "MAT/texmat5", 5 );
+ ALLOC_STATE( ucp[0], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
+ ALLOC_STATE( ucp[1], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+ ALLOC_STATE( ucp[2], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
+ ALLOC_STATE( ucp[3], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
+ ALLOC_STATE( ucp[4], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
+ ALLOC_STATE( ucp[5], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
+ ALLOC_STATE( lit[0], tcl_light, LIT_STATE_SIZE, "LIT/light-0", 0 );
+ ALLOC_STATE( lit[1], tcl_light, LIT_STATE_SIZE, "LIT/light-1", 1 );
+ ALLOC_STATE( lit[2], tcl_light, LIT_STATE_SIZE, "LIT/light-2", 2 );
+ ALLOC_STATE( lit[3], tcl_light, LIT_STATE_SIZE, "LIT/light-3", 3 );
+ ALLOC_STATE( lit[4], tcl_light, LIT_STATE_SIZE, "LIT/light-4", 4 );
+ ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 );
+ ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 );
+ ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 );
+ ALLOC_STATE( sci, never, SCI_STATE_SIZE, "SCI/scissor", 0 );
+ }
ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 );
ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 );
ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 );
ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 );
- if (rmesa->r200Screen->drmSupportsTriPerf) {
+ if (rmesa->radeon.radeonScreen->drmSupportsTriPerf) {
ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
}
else {
ALLOC_STATE( prf, never, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
}
- if (rmesa->r200Screen->drmSupportsPointSprites) {
+ if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) {
ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
- ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 );
+ if (rmesa->radeon.radeonScreen->kernel_mm)
+ ALLOC_STATE( ptp, tcl_add8, PTP_STATE_SIZE, "PTP/pointparams", 0 );
+ else
+ ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 );
}
else {
ALLOC_STATE (spr, never, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
@@ -409,87 +1034,125 @@ void r200InitState( r200ContextPtr rmesa )
/* Fill in the packet headers:
*/
- rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
- rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
- rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
- if (rmesa->r200Screen->drmSupportsBlendColor)
- rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(R200_EMIT_RB3D_BLENDCOLOR);
- rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
- rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
- rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
- rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
- rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
- rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
- rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(R200_EMIT_PP_CNTL_X);
- rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(R200_EMIT_RB3D_DEPTHXY_OFFSET);
- rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(R200_EMIT_RE_AUX_SCISSOR_CNTL);
- rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(R200_EMIT_RE_SCISSOR_TL_0);
- rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(R200_EMIT_SE_VAP_CNTL_STATUS);
- rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE);
- rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
- rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3);
- rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0);
- if (rmesa->r200Screen->drmSupportsFragShader) {
- rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR);
- rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0);
- rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
- rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1);
- rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
- rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2);
- rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
- rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3);
- rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
- rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4);
- rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
- rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5);
- rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
+ rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
+ rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
+ rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
+ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
+ rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(rmesa, R200_EMIT_RB3D_BLENDCOLOR);
+ rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
+ rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
+ rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
+ rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
+ rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
+ rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
+ rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CNTL_X);
+ rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(rmesa, R200_EMIT_RB3D_DEPTHXY_OFFSET);
+ rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(rmesa, R200_EMIT_RE_AUX_SCISSOR_CNTL);
+ rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(rmesa, R200_EMIT_RE_SCISSOR_TL_0);
+ rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(rmesa, R200_EMIT_SE_VAP_CNTL_STATUS);
+ rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(rmesa, R200_EMIT_RE_POINTSIZE);
+ rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(rmesa, R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
+ rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TAM_DEBUG3);
+ rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(rmesa, R200_EMIT_TFACTOR_0);
+ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+ rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(rmesa, R200_EMIT_ATF_TFACTOR);
+ rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_0);
+ rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
+ rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_1);
+ rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
+ rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_2);
+ rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
+ rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_3);
+ rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
+ rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_4);
+ rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
+ rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_5);
+ rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
} else {
- rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
- rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
- rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
- rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
- rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2);
- rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
- rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3);
- rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
- rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4);
- rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
- rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5);
- rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
- }
- rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0);
- rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1);
- rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(R200_EMIT_VAP_PVS_CNTL);
- rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0);
- rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0);
- rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1);
- rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_1);
- rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_2);
- rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_2);
- rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_3);
- rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_3);
- rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_4);
- rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_4);
- rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_5);
- rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_5);
- rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_0);
- rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_1);
- rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_2);
- rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_3);
- rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_4);
- rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_5);
- rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
- rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
- rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
- rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(R200_EMIT_TEX_PROC_CTL_2);
- rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(R200_EMIT_MATRIX_SELECT_0);
- rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(R200_EMIT_VAP_CTL);
- rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(R200_EMIT_VTX_FMT_0);
- rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(R200_EMIT_OUTPUT_VTX_COMP_SEL);
- rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(R200_EMIT_SE_VTX_STATE_CNTL);
- rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(R200_EMIT_VTE_CNTL);
- rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(R200_EMIT_PP_TRI_PERF_CNTL);
- rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(R200_EMIT_TCL_POINT_SPRITE_CNTL);
+ rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_0);
+ rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
+ rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_1);
+ rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
+ rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_2);
+ rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
+ rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_3);
+ rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
+ rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_4);
+ rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
+ rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_5);
+ rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
+ }
+ rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_0);
+ rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_1);
+ rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_PVS_CNTL);
+ rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_0);
+ rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_0);
+ rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_1);
+ rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_1);
+ rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_2);
+ rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_2);
+ rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_3);
+ rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_3);
+ rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_4);
+ rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_4);
+ rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_5);
+ rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_5);
+ rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_0);
+ rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_1);
+ rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_2);
+ rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_3);
+ rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_4);
+ rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_5);
+ rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
+ rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
+ rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(rmesa, R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
+ rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(rmesa, R200_EMIT_TEX_PROC_CTL_2);
+ rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(rmesa, R200_EMIT_MATRIX_SELECT_0);
+ rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_CTL);
+ rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTX_FMT_0);
+ rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(rmesa, R200_EMIT_OUTPUT_VTX_COMP_SEL);
+ rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(rmesa, R200_EMIT_SE_VTX_STATE_CNTL);
+ rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL);
+ rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL);
+ rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL);
+
+ rmesa->hw.sci.cmd[SCI_CMD_0] = CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0);
+ rmesa->hw.sci.cmd[SCI_CMD_1] = CP_PACKET0(R200_RE_TOP_LEFT, 0);
+ rmesa->hw.sci.cmd[SCI_CMD_2] = CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0);
+
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+
+ rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
+ rmesa->hw.stp.cmd[STP_DATA_0] = 0;
+ rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
+
+ rmesa->hw.mtl[0].emit = mtl_emit;
+ rmesa->hw.mtl[1].emit = mtl_emit;
+
+ rmesa->hw.vpi[0].emit = veclinear_emit;
+ rmesa->hw.vpi[1].emit = veclinear_emit;
+ rmesa->hw.vpp[0].emit = veclinear_emit;
+ rmesa->hw.vpp[1].emit = veclinear_emit;
+
+ rmesa->hw.grd.emit = scl_emit;
+ rmesa->hw.fog.emit = vec_emit;
+ rmesa->hw.glt.emit = vec_emit;
+ rmesa->hw.eye.emit = vec_emit;
+
+ for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++)
+ rmesa->hw.mat[i].emit = vec_emit;
+
+ for (i = 0; i < 8; i++)
+ rmesa->hw.lit[i].emit = lit_emit;
+
+ for (i = 0; i < 6; i++)
+ rmesa->hw.ucp[i].emit = vec_emit;
+
+ rmesa->hw.ptp.emit = ptp_emit;
+ }
+
+
+
rmesa->hw.mtl[0].cmd[MTL_CMD_0] =
cmdvec( R200_VS_MAT_0_EMISS, 1, 16 );
rmesa->hw.mtl[0].cmd[MTL_CMD_1] =
@@ -567,7 +1230,7 @@ void r200InitState( r200ContextPtr rmesa )
(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
- if (rmesa->r200Screen->drmSupportsBlendColor) {
+ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000;
rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
@@ -578,18 +1241,17 @@ void r200InitState( r200ContextPtr rmesa )
}
rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
- rmesa->r200Screen->depthOffset + rmesa->r200Screen->fbLocation;
+ rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation;
rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] =
- ((rmesa->r200Screen->depthPitch &
+ ((rmesa->radeon.radeonScreen->depthPitch &
R200_DEPTHPITCH_MASK) |
R200_DEPTH_ENDIAN_NO_SWAP);
if (rmesa->using_hyperz)
rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ;
- rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
- R200_Z_TEST_LESS |
+ rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (R200_Z_TEST_LESS |
R200_STENCIL_TEST_ALWAYS |
R200_STENCIL_FAIL_KEEP |
R200_STENCIL_ZPASS_KEEP |
@@ -599,15 +1261,14 @@ void r200InitState( r200ContextPtr rmesa )
if (rmesa->using_hyperz) {
rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE |
R200_Z_DECOMPRESSION_ENABLE;
-/* if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200)
+/* if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
}
rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE
| R200_TEX_BLEND_0_ENABLE);
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = color_fmt;
- switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) {
+ switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
case DRI_CONF_DITHER_XERRORDIFFRESET:
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT;
break;
@@ -615,41 +1276,19 @@ void r200InitState( r200ContextPtr rmesa )
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE;
break;
}
- if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) ==
+ if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
DRI_CONF_ROUND_ROUND )
- rmesa->state.color.roundEnable = R200_ROUND_ENABLE;
+ rmesa->radeon.state.color.roundEnable = R200_ROUND_ENABLE;
else
- rmesa->state.color.roundEnable = 0;
- if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) ==
+ rmesa->radeon.state.color.roundEnable = 0;
+ if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
DRI_CONF_COLOR_REDUCTION_DITHER )
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE;
else
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
-
-#if 000
- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->state.color.drawOffset +
- rmesa->r200Screen->fbLocation)
- & R200_COLOROFFSET_MASK);
-
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch &
- R200_COLORPITCH_MASK) |
- R200_COLOR_ENDIAN_NO_SWAP);
-#else
- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
- rmesa->r200Screen->fbLocation)
- & R200_COLOROFFSET_MASK);
-
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
- R200_COLORPITCH_MASK) |
- R200_COLOR_ENDIAN_NO_SWAP);
-#endif
- /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
- if (rmesa->sarea->tiling_enabled) {
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
- }
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK *
- driQueryOptionf (&rmesa->optionCache,"texture_blend_quality");
+ driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality");
rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0;
rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW |
@@ -704,7 +1343,7 @@ void r200InitState( r200ContextPtr rmesa )
R200_VC_NO_SWAP;
#endif
- if (!(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) {
+ if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
/* Bypass TCL */
rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8);
}
@@ -743,28 +1382,28 @@ void r200InitState( r200ContextPtr rmesa )
rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
(/* R200_TEXCOORD_PROJ | */
0x100000); /* Small default bias */
- if (rmesa->r200Screen->drmSupportsFragShader) {
+ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
}
else {
rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] =
- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
}
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =
- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] =
- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] =
- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] =
- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] =
- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] =
(R200_TXC_ARG_A_ZERO |
@@ -967,5 +1606,13 @@ void r200InitState( r200ContextPtr rmesa )
r200LightingSpaceChange( ctx );
- rmesa->hw.all_dirty = GL_TRUE;
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ radeon_init_query_stateobj(&rmesa->radeon, R200_QUERYOBJ_CMDSIZE);
+ rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_CMD_0] = CP_PACKET0(RADEON_RB3D_ZPASS_DATA, 0);
+ rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_DATA_0] = 0;
+ }
+
+ rmesa->radeon.hw.all_dirty = GL_TRUE;
+
+ rcommonInitCmdBuf(&rmesa->radeon);
}
diff --git a/r200/r200_swtcl.c b/r200/r200_swtcl.c
index b25f028..240fb45 100644
--- a/r200/r200_swtcl.c
+++ b/r200/r200_swtcl.c
@@ -39,6 +39,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/image.h"
#include "main/imports.h"
#include "main/macros.h"
+#include "main/simple_list.h"
#include "swrast/s_context.h"
#include "swrast/s_fog.h"
@@ -55,27 +56,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_tcl.h"
-static void flush_last_swtcl_prim( r200ContextPtr rmesa );
-
-
/***********************************************************************
- * Initialization
+ * Initialization
***********************************************************************/
#define EMIT_ATTR( ATTR, STYLE, F0 ) \
do { \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \
- rmesa->swtcl.vertex_attr_count++; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \
+ rmesa->radeon.swtcl.vertex_attr_count++; \
fmt_0 |= F0; \
} while (0)
#define EMIT_PAD( N ) \
do { \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \
- rmesa->swtcl.vertex_attr_count++; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \
+ rmesa->radeon.swtcl.vertex_attr_count++; \
} while (0)
static void r200SetVertexFormat( GLcontext *ctx )
@@ -100,7 +98,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
}
assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
- rmesa->swtcl.vertex_attr_count = 0;
+ rmesa->radeon.swtcl.vertex_attr_count = 0;
/* EMIT_ATTR's must be in order as they tell t_vertex.c how to
* build up a hardware vertex.
@@ -121,7 +119,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
}
rmesa->swtcl.coloroffset = offset;
-#if MESA_LITTLE_ENDIAN
+#if MESA_LITTLE_ENDIAN
EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) );
#else
EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) );
@@ -132,7 +130,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
-#if MESA_LITTLE_ENDIAN
+#if MESA_LITTLE_ENDIAN
if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
rmesa->swtcl.specoffset = offset;
EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) );
@@ -185,7 +183,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_SPEC_ALPHA;
}
- if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) ||
+ if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
(rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) ||
(rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
R200_NEWPRIM(rmesa);
@@ -193,26 +191,42 @@ static void r200SetVertexFormat( GLcontext *ctx )
rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0;
rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1;
- rmesa->swtcl.vertex_size =
+ rmesa->radeon.swtcl.vertex_size =
_tnl_install_attrs( ctx,
- rmesa->swtcl.vertex_attrs,
- rmesa->swtcl.vertex_attr_count,
+ rmesa->radeon.swtcl.vertex_attrs,
+ rmesa->radeon.swtcl.vertex_attr_count,
NULL, 0 );
- rmesa->swtcl.vertex_size /= 4;
- RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
+ rmesa->radeon.swtcl.vertex_size /= 4;
+ RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
+ }
+}
+
+static void r200_predict_emit_size( r200ContextPtr rmesa )
+{
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s\n", __func__);
+ const int vertex_array_size = 7;
+ const int prim_size = 3;
+ if (!rmesa->radeon.swtcl.emit_prediction) {
+ const int state_size = radeonCountStateEmitSize(&rmesa->radeon);
+ if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+ state_size +
+ vertex_array_size + prim_size,
+ __FUNCTION__))
+ rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon);
+ else
+ rmesa->radeon.swtcl.emit_prediction = state_size;
+ rmesa->radeon.swtcl.emit_prediction += vertex_array_size + prim_size
+ + rmesa->radeon.cmdbuf.cs->cdw;
}
}
static void r200RenderStart( GLcontext *ctx )
{
- r200ContextPtr rmesa = R200_CONTEXT( ctx );
-
r200SetVertexFormat( ctx );
-
- if (rmesa->dma.flush != 0 &&
- rmesa->dma.flush != flush_last_swtcl_prim)
- rmesa->dma.flush( rmesa );
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s\n", __func__);
}
@@ -232,7 +246,7 @@ void r200ChooseVertexState( GLcontext *ctx )
* rasterization fallback. As this function will be called again when we
* leave a rasterization fallback, we can just skip it for now.
*/
- if (rmesa->Fallback != 0)
+ if (rmesa->radeon.Fallback != 0)
return;
vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
@@ -273,78 +287,32 @@ void r200ChooseVertexState( GLcontext *ctx )
}
}
-
-/* Flush vertices in the current dma region.
- */
-static void flush_last_swtcl_prim( r200ContextPtr rmesa )
-{
- if (R200_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- rmesa->dma.flush = NULL;
-
- if (rmesa->dma.current.buf) {
- struct r200_dma_region *current = &rmesa->dma.current;
- GLuint current_offset = (rmesa->r200Screen->gart_buffer_offset +
- current->buf->buf->idx * RADEON_BUFFER_SIZE +
- current->start);
-
- assert (!(rmesa->swtcl.hw_primitive & R200_VF_PRIM_WALK_IND));
-
- assert (current->start +
- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
- current->ptr);
-
- if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
- r200EnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
- rmesa->hw.max_state_size + VBUF_BUFSZ );
- r200EmitVertexAOS( rmesa,
- rmesa->swtcl.vertex_size,
- current_offset);
-
- r200EmitVbufPrim( rmesa,
- rmesa->swtcl.hw_primitive,
- rmesa->swtcl.numverts);
- }
-
- rmesa->swtcl.numverts = 0;
- current->start = current->ptr;
- }
-}
-
-
-/* Alloc space in the current dma region.
- */
-static INLINE void *
-r200AllocDmaLowVerts( r200ContextPtr rmesa, int nverts, int vsize )
+void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
{
- GLuint bytes = vsize * nverts;
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s\n", __func__);
- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
- r200RefillCurrentDmaRegion( rmesa );
- if (!rmesa->dma.flush) {
- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
- rmesa->dma.flush = flush_last_swtcl_prim;
- }
+ radeonEmitState(&rmesa->radeon);
+ r200EmitVertexAOS( rmesa,
+ rmesa->radeon.swtcl.vertex_size,
+ first_elem(&rmesa->radeon.dma.reserved)->bo,
+ current_offset);
- ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
- ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
- ASSERT( rmesa->dma.current.start +
- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
- rmesa->dma.current.ptr );
+ r200EmitVbufPrim( rmesa,
+ rmesa->radeon.swtcl.hw_primitive,
+ rmesa->radeon.swtcl.numverts);
+ if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n",
+ rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
- {
- GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
- rmesa->dma.current.ptr += bytes;
- rmesa->swtcl.numverts += nverts;
- return head;
- }
+ rmesa->radeon.swtcl.emit_prediction = 0;
}
-
/**************************************************************************/
@@ -389,17 +357,27 @@ static void r200ResetLineStipple( GLcontext *ctx );
#define HAVE_POLYGONS 1
#define HAVE_ELTS 0
+static void* r200_alloc_verts( r200ContextPtr rmesa, GLuint n, GLuint size)
+{
+ void *rv;
+ do {
+ r200_predict_emit_size( rmesa );
+ rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 );
+ } while(!rv);
+ return rv;
+}
+
#undef LOCAL_VARS
#undef ALLOC_VERTS
#define CTX_ARG r200ContextPtr rmesa
-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
-#define ALLOC_VERTS( n, size ) r200AllocDmaLowVerts( rmesa, n, size * 4 )
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) r200_alloc_verts(rmesa, n, size)
#define LOCAL_VARS \
r200ContextPtr rmesa = R200_CONTEXT(ctx); \
- const char *r200verts = (char *)rmesa->swtcl.verts;
-#define VERT(x) (r200Vertex *)(r200verts + ((x) * vertsize * sizeof(int)))
-#define VERTEX r200Vertex
-#define DO_DEBUG_VERTS (1 && (R200_DEBUG & DEBUG_VERTS))
+ const char *r200verts = (char *)rmesa->radeon.swtcl.verts;
+#define VERT(x) (radeonVertex *)(r200verts + ((x) * vertsize * sizeof(int)))
+#define VERTEX radeonVertex
+#define DO_DEBUG_VERTS (1 && (R200_DEBUG & RADEON_VERTS))
#undef TAG
#define TAG(x) r200_##x
@@ -456,11 +434,11 @@ static struct {
#define VERT_Y(_v) _v->v.y
#define VERT_Z(_v) _v->v.z
#define AREA_IS_CCW( a ) (a < 0)
-#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
#define VERT_SET_RGBA( v, c ) \
do { \
- r200_color_t *color = (r200_color_t *)&((v)->ui[coloroffset]); \
+ radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]); \
UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \
UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \
UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \
@@ -472,7 +450,7 @@ do { \
#define VERT_SET_SPEC( v, c ) \
do { \
if (specoffset) { \
- r200_color_t *spec = (r200_color_t *)&((v)->ui[specoffset]); \
+ radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]); \
UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]); \
UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]); \
UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]); \
@@ -481,8 +459,8 @@ do { \
#define VERT_COPY_SPEC( v0, v1 ) \
do { \
if (specoffset) { \
- r200_color_t *spec0 = (r200_color_t *)&((v0)->ui[specoffset]); \
- r200_color_t *spec1 = (r200_color_t *)&((v1)->ui[specoffset]); \
+ radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]); \
+ radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]); \
spec0->red = spec1->red; \
spec0->green = spec1->green; \
spec0->blue = spec1->blue; \
@@ -503,7 +481,7 @@ do { \
#define LOCAL_VARS(n) \
r200ContextPtr rmesa = R200_CONTEXT(ctx); \
- GLuint color[n], spec[n]; \
+ GLuint color[n] = {0}, spec[n] = {0}; \
GLuint coloroffset = rmesa->swtcl.coloroffset; \
GLuint specoffset = rmesa->swtcl.specoffset; \
(void) color; (void) spec; (void) coloroffset; (void) specoffset;
@@ -513,7 +491,7 @@ do { \
***********************************************************************/
#define RASTERIZE(x) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, x) )
-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
#undef TAG
#define TAG(x) x
#include "tnl_dd/t_dd_unfilled.h"
@@ -569,8 +547,8 @@ static void init_rast_tab( void )
#undef LOCAL_VARS
#define LOCAL_VARS \
r200ContextPtr rmesa = R200_CONTEXT(ctx); \
- const GLuint vertsize = rmesa->swtcl.vertex_size; \
- const char *r200verts = (char *)rmesa->swtcl.verts; \
+ const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \
+ const char *r200verts = (char *)rmesa->radeon.swtcl.verts; \
const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \
const GLboolean stipple = ctx->Line.StippleFlag; \
(void) elt; (void) stipple;
@@ -599,13 +577,13 @@ void r200ChooseRenderState( GLcontext *ctx )
GLuint index = 0;
GLuint flags = ctx->_TriangleCaps;
- if (!rmesa->TclFallback || rmesa->Fallback)
+ if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback)
return;
if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R200_TWOSIDE_BIT;
if (flags & DD_TRI_UNFILLED) index |= R200_UNFILLED_BIT;
- if (index != rmesa->swtcl.RenderIndex) {
+ if (index != rmesa->radeon.swtcl.RenderIndex) {
tnl->Driver.Render.Points = rast_tab[index].points;
tnl->Driver.Render.Line = rast_tab[index].line;
tnl->Driver.Render.ClippedLine = rast_tab[index].line;
@@ -622,7 +600,7 @@ void r200ChooseRenderState( GLcontext *ctx )
tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
}
- rmesa->swtcl.RenderIndex = index;
+ rmesa->radeon.swtcl.RenderIndex = index;
}
}
@@ -636,7 +614,7 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- if (rmesa->swtcl.hw_primitive != hwprim) {
+ if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
/* need to disable perspective-correct texturing for point sprites */
if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) {
if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) {
@@ -649,15 +627,15 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE;
}
R200_NEWPRIM( rmesa );
- rmesa->swtcl.hw_primitive = hwprim;
+ rmesa->radeon.swtcl.hw_primitive = hwprim;
}
}
static void r200RenderPrimitive( GLcontext *ctx, GLenum prim )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- rmesa->swtcl.render_primitive = prim;
- if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED))
+ rmesa->radeon.swtcl.render_primitive = prim;
+ if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED))
r200RasterPrimitive( ctx, reduced_hw_prim(ctx, prim) );
}
@@ -701,23 +679,23 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
TNLcontext *tnl = TNL_CONTEXT(ctx);
- GLuint oldfallback = rmesa->Fallback;
+ GLuint oldfallback = rmesa->radeon.Fallback;
if (mode) {
- rmesa->Fallback |= bit;
+ rmesa->radeon.Fallback |= bit;
if (oldfallback == 0) {
- R200_FIREVERTICES( rmesa );
+ radeon_firevertices(&rmesa->radeon);
TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE );
_swsetup_Wakeup( ctx );
- rmesa->swtcl.RenderIndex = ~0;
- if (R200_DEBUG & DEBUG_FALLBACKS) {
+ rmesa->radeon.swtcl.RenderIndex = ~0;
+ if (R200_DEBUG & RADEON_FALLBACKS) {
fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n",
bit, getFallbackString(bit));
}
}
}
else {
- rmesa->Fallback &= ~bit;
+ rmesa->radeon.Fallback &= ~bit;
if (oldfallback == bit) {
_swrast_flush( ctx );
@@ -731,18 +709,18 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple;
TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_FALSE );
- if (rmesa->TclFallback) {
- /* These are already done if rmesa->TclFallback goes to
+ if (rmesa->radeon.TclFallback) {
+ /* These are already done if rmesa->radeon.TclFallback goes to
* zero above. But not if it doesn't (R200_NO_TCL for
* example?)
*/
_tnl_invalidate_vertex_state( ctx, ~0 );
_tnl_invalidate_vertices( ctx, ~0 );
- RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
+ RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
r200ChooseVertexState( ctx );
r200ChooseRenderState( ctx );
}
- if (R200_DEBUG & DEBUG_FALLBACKS) {
+ if (R200_DEBUG & RADEON_FALLBACKS) {
fprintf(stderr, "R200 end rasterization fallback: 0x%x %s\n",
bit, getFallbackString(bit));
}
@@ -755,7 +733,7 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
/**
* Cope with depth operations by drawing individual pixels as points.
- *
+ *
* \todo
* The way the vertex state is set in this routine is hokey. It seems to
* work, but it's very hackish. This whole routine is pretty hackish. If
@@ -770,14 +748,14 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
const GLubyte *bitmap )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- const GLfloat *rc = ctx->Current.RasterColor;
+ const GLfloat *rc = ctx->Current.RasterColor;
GLint row, col;
- r200Vertex vert;
+ radeonVertex vert;
GLuint orig_vte;
GLuint h;
- /* Turn off tcl.
+ /* Turn off tcl.
*/
TCL_FALLBACK( ctx, R200_TCL_FALLBACK_BITMAP, 1 );
@@ -794,7 +772,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
vte |= R200_VTX_W0_FMT;
vap &= ~R200_VAP_FORCE_W_TO_ONE;
- rmesa->swtcl.vertex_size = 5;
+ rmesa->radeon.swtcl.vertex_size = 5;
if ( (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0)
|| (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
@@ -828,7 +806,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
R200_VPORT_Z_SCALE_ENA |
R200_VPORT_X_OFFSET_ENA |
R200_VPORT_Y_OFFSET_ENA |
- R200_VPORT_Z_OFFSET_ENA);
+ R200_VPORT_Z_OFFSET_ENA);
/* Turn off other stuff: Stipple?, texture?, blending?, etc.
*/
@@ -871,16 +849,16 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
/* Update window height
*/
- LOCK_HARDWARE( rmesa );
- UNLOCK_HARDWARE( rmesa );
- h = rmesa->dri.drawable->h + rmesa->dri.drawable->y;
- px += rmesa->dri.drawable->x;
+ LOCK_HARDWARE( &rmesa->radeon );
+ UNLOCK_HARDWARE( &rmesa->radeon );
+ h = radeon_get_drawable(&rmesa->radeon)->h + radeon_get_drawable(&rmesa->radeon)->y;
+ px += radeon_get_drawable(&rmesa->radeon)->x;
/* Clipping handled by existing mechansims in r200_ioctl.c?
*/
for (row=0; row<height; row++) {
- const GLubyte *src = (const GLubyte *)
- _mesa_image_address2d(unpack, bitmap, width, height,
+ const GLubyte *src = (const GLubyte *)
+ _mesa_image_address2d(unpack, bitmap, width, height,
GL_COLOR_INDEX, GL_BITMAP, row, 0 );
if (unpack->LsbFirst) {
@@ -929,7 +907,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
/* Need to restore vertexformat?
*/
- if (rmesa->TclFallback)
+ if (rmesa->radeon.TclFallback)
r200ChooseVertexState( ctx );
}
@@ -949,6 +927,7 @@ void r200InitSwtcl( GLcontext *ctx )
init_rast_tab();
firsttime = 0;
}
+ rmesa->radeon.swtcl.emit_prediction = 0;
tnl->Driver.Render.Start = r200RenderStart;
tnl->Driver.Render.Finish = r200RenderFinish;
@@ -959,20 +938,12 @@ void r200InitSwtcl( GLcontext *ctx )
tnl->Driver.Render.Interp = _tnl_interp;
/* FIXME: what are these numbers? */
- _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+ _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
36 * sizeof(GLfloat) );
-
- rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
- rmesa->swtcl.RenderIndex = ~0;
- rmesa->swtcl.render_primitive = GL_TRIANGLES;
- rmesa->swtcl.hw_primitive = 0;
-}
-
-void r200DestroySwtcl( GLcontext *ctx )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
-
- if (rmesa->swtcl.indexed_verts.buf)
- r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ );
+ rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+ rmesa->radeon.swtcl.RenderIndex = ~0;
+ rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+ rmesa->radeon.swtcl.hw_primitive = 0;
}
+
diff --git a/r200/r200_swtcl.h b/r200/r200_swtcl.h
index 8c29fd0..b090587 100644
--- a/r200/r200_swtcl.h
+++ b/r200/r200_swtcl.h
@@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_context.h"
extern void r200InitSwtcl( GLcontext *ctx );
-extern void r200DestroySwtcl( GLcontext *ctx );
extern void r200ChooseRenderState( GLcontext *ctx );
extern void r200ChooseVertexState( GLcontext *ctx );
@@ -52,15 +51,11 @@ extern void r200BuildVertices( GLcontext *ctx, GLuint start, GLuint count,
extern void r200PrintSetupFlags(char *msg, GLuint flags );
-extern void r200_emit_indexed_verts( GLcontext *ctx,
- GLuint start,
- GLuint count );
-
extern void r200_translate_vertex( GLcontext *ctx,
- const r200Vertex *src,
+ const radeonVertex *src,
SWvertex *dst );
-extern void r200_print_vertex( GLcontext *ctx, const r200Vertex *v );
+extern void r200_print_vertex( GLcontext *ctx, const radeonVertex *v );
extern void r200_import_float_colors( GLcontext *ctx );
extern void r200_import_float_spec_colors( GLcontext *ctx );
@@ -70,5 +65,5 @@ extern void r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
const struct gl_pixelstore_attrib *unpack,
const GLubyte *bitmap );
-
+void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
#endif
diff --git a/r200/r200_tcl.c b/r200/r200_tcl.c
index 99aecfe..c702910 100644
--- a/r200/r200_tcl.c
+++ b/r200/r200_tcl.c
@@ -51,6 +51,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_swtcl.h"
#include "r200_maos.h"
+#include "radeon_common_context.h"
+
#define HAVE_POINTS 1
@@ -109,7 +111,7 @@ static GLboolean discrete_prim[0x10] = {
#define ELT_INIT(prim, hw_prim) \
r200TclPrimitive( ctx, prim, hw_prim | R200_VF_PRIM_WALK_IND )
-#define GET_MESA_ELTS() rmesa->tcl.Elts
+#define GET_MESA_ELTS() TNL_CONTEXT(ctx)->vb.Elts
/* Don't really know how many elts will fit in what's left of cmdbuf,
@@ -123,7 +125,7 @@ static GLboolean discrete_prim[0x10] = {
#define RESET_STIPPLE() do { \
R200_STATECHANGE( rmesa, lin ); \
- r200EmitState( rmesa ); \
+ radeonEmitState(&rmesa->radeon); \
} while (0)
#define AUTO_STIPPLE( mode ) do { \
@@ -134,7 +136,7 @@ static GLboolean discrete_prim[0x10] = {
else \
rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
~R200_LINE_PATTERN_AUTO_RESET; \
- r200EmitState( rmesa ); \
+ radeonEmitState(&rmesa->radeon); \
} while (0)
@@ -142,27 +144,24 @@ static GLboolean discrete_prim[0x10] = {
static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr )
{
- if (rmesa->dma.flush == r200FlushElts &&
- rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) {
+ if (rmesa->radeon.dma.flush == r200FlushElts &&
+ rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) {
- GLushort *dest = (GLushort *)(rmesa->store.cmd_buf +
- rmesa->store.cmd_used);
+ GLushort *dest = (GLushort *)(rmesa->radeon.tcl.elt_dma_bo->ptr +
+ rmesa->radeon.tcl.elt_dma_offset + rmesa->tcl.elt_used);
- rmesa->store.cmd_used += nr*2;
+ rmesa->tcl.elt_used += nr*2;
return dest;
}
else {
- if (rmesa->dma.flush)
- rmesa->dma.flush( rmesa );
-
- r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
- rmesa->hw.max_state_size + ELTS_BUFSZ(nr) );
+ if (rmesa->radeon.dma.flush)
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
r200EmitAOS( rmesa,
- rmesa->tcl.aos_components,
- rmesa->tcl.nr_aos_components, 0 );
+ rmesa->radeon.tcl.aos_count, 0 );
+ r200EmitMaxVtxIndex(rmesa, rmesa->radeon.tcl.aos[0].count);
return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr );
}
}
@@ -188,13 +187,11 @@ static void r200EmitPrim( GLcontext *ctx,
r200ContextPtr rmesa = R200_CONTEXT( ctx );
r200TclPrimitive( ctx, prim, hwprim );
- r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
- rmesa->hw.max_state_size + VBUF_BUFSZ );
+ // fprintf(stderr,"Emit prim %d\n", rmesa->radeon.tcl.aos_count);
r200EmitAOS( rmesa,
- rmesa->tcl.aos_components,
- rmesa->tcl.nr_aos_components,
- start );
+ rmesa->radeon.tcl.aos_count,
+ start );
/* Why couldn't this packet have taken an offset param?
*/
@@ -207,6 +204,7 @@ static void r200EmitPrim( GLcontext *ctx,
r200EmitPrim( ctx, prim, hwprim, start, count ); \
(void) rmesa; } while (0)
+#define MAX_CONVERSION_SIZE 40
/* Try & join small primitives
*/
#if 0
@@ -369,6 +367,66 @@ r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
}
}
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static GLuint r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev )
+{
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint space_required;
+ GLuint state_size;
+ GLuint nr_aos = 0;
+ int i;
+ /* predict number of aos to emit */
+ for (i = 0; i < 15; ++i)
+ {
+ if (vimap_rev[i] != 255)
+ {
+ ++nr_aos;
+ }
+ }
+
+ {
+ /* count the prediction for state size */
+ space_required = 0;
+ state_size = radeonCountStateEmitSize( &rmesa->radeon );
+ /* vtx may be changed in r200EmitArrays so account for it if not dirty */
+ if (!rmesa->hw.vtx.dirty)
+ state_size += rmesa->hw.vtx.check(rmesa->radeon.glCtx, &rmesa->hw.vtx);
+ /* predict size for elements */
+ for (i = 0; i < VB->PrimitiveCount; ++i)
+ {
+ if (!VB->Primitive[i].count)
+ continue;
+ /* If primitive.count is less than MAX_CONVERSION_SIZE
+ rendering code may decide convert to elts.
+ In that case we have to make pessimistic prediction.
+ and use larger of 2 paths. */
+ const GLuint elts = ELTS_BUFSZ(nr_aos);
+ const GLuint index = INDEX_BUFSZ;
+ const GLuint vbuf = VBUF_BUFSZ;
+ if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+ || vbuf > index + elts)
+ space_required += vbuf;
+ else
+ space_required += index + elts;
+ space_required += AOS_BUFSZ(nr_aos);
+ }
+ }
+
+ radeon_print(RADEON_RENDER,RADEON_VERBOSE,
+ "%s space %u, aos %d\n",
+ __func__, space_required, AOS_BUFSZ(nr_aos) );
+ /* flush the buffer in case we need more than is left. */
+ if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required + state_size, __FUNCTION__))
+ return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+ else
+ return space_required + state_size;
+}
+
/**********************************************************************/
/* Render pipeline stage */
@@ -394,19 +452,19 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
/* TODO: separate this from the swtnl pipeline
*/
- if (rmesa->TclFallback)
+ if (rmesa->radeon.TclFallback)
return GL_TRUE; /* fallback to software t&l */
- if (R200_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s\n", __FUNCTION__);
+ radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s\n", __FUNCTION__);
if (VB->Count == 0)
return GL_FALSE;
/* Validate state:
*/
- if (rmesa->NewGLState)
- r200ValidateState( ctx );
+ if (rmesa->radeon.NewGLState)
+ if (!r200ValidateState( ctx ))
+ return GL_TRUE; /* fallback to sw t&l */
if (!ctx->VertexProgram._Enabled) {
/* NOTE: inputs != tnl->render_inputs - these are the untransformed
@@ -481,11 +539,11 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
/* Do the actual work:
*/
- r200ReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
+ radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
+ GLuint emit_end = r200EnsureEmitSize( ctx, vimap_rev )
+ + rmesa->radeon.cmdbuf.cs->cdw;
r200EmitArrays( ctx, vimap_rev );
- rmesa->tcl.Elts = VB->Elts;
-
for (i = 0 ; i < VB->PrimitiveCount ; i++)
{
GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
@@ -495,11 +553,14 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
if (!length)
continue;
- if (rmesa->tcl.Elts)
+ if (VB->Elts)
r200EmitEltPrimitive( ctx, start, start+length, prim );
else
r200EmitPrimitive( ctx, start, start+length, prim );
}
+ if ( emit_end < rmesa->radeon.cmdbuf.cs->cdw )
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
return GL_FALSE; /* finished the pipe */
}
@@ -545,7 +606,7 @@ static void transition_to_swtnl( GLcontext *ctx )
tnl->Driver.NotifyMaterialChange =
_mesa_validate_all_lighting_tables;
- r200ReleaseArrays( ctx, ~0 );
+ radeonReleaseArrays( ctx, ~0 );
/* Still using the D3D based hardware-rasterizer from the radeon;
* need to put the card into D3D mode to make it work:
@@ -565,15 +626,11 @@ static void transition_to_hwtnl( GLcontext *ctx )
tnl->Driver.NotifyMaterialChange = r200UpdateMaterial;
- if ( rmesa->dma.flush )
- rmesa->dma.flush( rmesa );
+ if ( rmesa->radeon.dma.flush )
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- rmesa->dma.flush = NULL;
+ rmesa->radeon.dma.flush = NULL;
- if (rmesa->swtcl.indexed_verts.buf)
- r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
- __FUNCTION__ );
-
R200_STATECHANGE( rmesa, vap );
rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE;
rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE;
@@ -594,7 +651,7 @@ static void transition_to_hwtnl( GLcontext *ctx )
rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VTX_XY_FMT|R200_VTX_Z_FMT);
rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] |= R200_VTX_W0_FMT;
- if (R200_DEBUG & DEBUG_FALLBACKS)
+ if (R200_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "R200 end tcl fallback\n");
}
@@ -631,21 +688,21 @@ static char *getFallbackString(GLuint bit)
void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- GLuint oldfallback = rmesa->TclFallback;
+ GLuint oldfallback = rmesa->radeon.TclFallback;
if (mode) {
- rmesa->TclFallback |= bit;
+ rmesa->radeon.TclFallback |= bit;
if (oldfallback == 0) {
- if (R200_DEBUG & DEBUG_FALLBACKS)
+ if (R200_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "R200 begin tcl fallback %s\n",
getFallbackString( bit ));
transition_to_swtnl( ctx );
}
}
else {
- rmesa->TclFallback &= ~bit;
+ rmesa->radeon.TclFallback &= ~bit;
if (oldfallback == bit) {
- if (R200_DEBUG & DEBUG_FALLBACKS)
+ if (R200_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "R200 end tcl fallback %s\n",
getFallbackString( bit ));
transition_to_hwtnl( ctx );
diff --git a/r200/r200_tex.c b/r200/r200_tex.c
index 259f35a..36d9e37 100644
--- a/r200/r200_tex.c
+++ b/r200/r200_tex.c
@@ -43,8 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/teximage.h"
#include "main/texobj.h"
-#include "texmem.h"
-
+#include "radeon_mipmap_tree.h"
#include "r200_context.h"
#include "r200_state.h"
#include "r200_ioctl.h"
@@ -63,10 +62,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* \param twrap Wrap mode for the \a t texture coordinate
*/
-static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
+static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
{
GLboolean is_clamp = GL_FALSE;
GLboolean is_clamp_to_border = GL_FALSE;
+ struct gl_texture_object *tObj = &t->base;
t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D);
@@ -103,7 +103,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum
_mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
}
- if (t->base.tObj->Target != GL_TEXTURE_1D) {
+ if (tObj->Target != GL_TEXTURE_1D) {
switch ( twrap ) {
case GL_REPEAT:
t->pp_txfilter |= R200_CLAMP_T_WRAP;
@@ -180,7 +180,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum
t->border_fallback = (is_clamp && is_clamp_to_border);
}
-static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
+static void r200SetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max )
{
t->pp_txfilter &= ~R200_MAX_ANISO_MASK;
@@ -205,10 +205,13 @@ static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
* \param magf Texture magnification mode
*/
-static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf )
+static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
{
GLuint anisotropy = (t->pp_txfilter & R200_MAX_ANISO_MASK);
+ /* Force revalidation to account for switches from/to mipmapping. */
+ t->validated = GL_FALSE;
+
t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK);
t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK;
@@ -267,700 +270,15 @@ static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf )
}
}
-static void r200SetTexBorderColor( r200TexObjPtr t, const GLfloat color[4] )
+static void r200SetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] )
{
GLubyte c[4];
CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
- t->pp_border_color = r200PackColor( 4, c[0], c[1], c[2], c[3] );
-}
-
-
-/**
- * Allocate space for and load the mesa images into the texture memory block.
- * This will happen before drawing with a new texture, or drawing with a
- * texture after it was swapped out or teximaged again.
- */
-
-static r200TexObjPtr r200AllocTexObj( struct gl_texture_object *texObj )
-{
- r200TexObjPtr t;
-
- t = CALLOC_STRUCT( r200_tex_obj );
- texObj->DriverData = t;
- if ( t != NULL ) {
- if ( R200_DEBUG & DEBUG_TEXTURE ) {
- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj,
- (void *)t );
- }
-
- /* Initialize non-image-dependent parts of the state:
- */
- t->base.tObj = texObj;
- t->border_fallback = GL_FALSE;
-
- make_empty_list( & t->base );
-
- r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR );
- r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
- r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
- r200SetTexBorderColor( t, texObj->BorderColor );
- }
-
- return t;
-}
-
-/* try to find a format which will only need a memcopy */
-static const struct gl_texture_format *
-r200Choose8888TexFormat( GLenum srcFormat, GLenum srcType )
-{
- const GLuint ui = 1;
- const GLubyte littleEndian = *((const GLubyte *) &ui);
-
- if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
- return &_mesa_texformat_rgba8888;
- }
- else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
- return &_mesa_texformat_rgba8888_rev;
- }
- else return _dri_texformat_argb8888;
-}
-
-static const struct gl_texture_format *
-r200ChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
- GLenum format, GLenum type )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
- const GLboolean do32bpt =
- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
- const GLboolean force16bpt =
- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
- (void) format;
-
- switch ( internalFormat ) {
- case 4:
- case GL_RGBA:
- case GL_COMPRESSED_RGBA:
- switch ( type ) {
- case GL_UNSIGNED_INT_10_10_10_2:
- case GL_UNSIGNED_INT_2_10_10_10_REV:
- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555;
- case GL_UNSIGNED_SHORT_4_4_4_4:
- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
- return _dri_texformat_argb4444;
- case GL_UNSIGNED_SHORT_5_5_5_1:
- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
- return _dri_texformat_argb1555;
- default:
- return do32bpt ?
- r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444;
- }
-
- case 3:
- case GL_RGB:
- case GL_COMPRESSED_RGB:
- switch ( type ) {
- case GL_UNSIGNED_SHORT_4_4_4_4:
- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
- return _dri_texformat_argb4444;
- case GL_UNSIGNED_SHORT_5_5_5_1:
- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
- return _dri_texformat_argb1555;
- case GL_UNSIGNED_SHORT_5_6_5:
- case GL_UNSIGNED_SHORT_5_6_5_REV:
- return _dri_texformat_rgb565;
- default:
- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
- }
-
- case GL_RGBA8:
- case GL_RGB10_A2:
- case GL_RGBA12:
- case GL_RGBA16:
- return !force16bpt ?
- r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444;
-
- case GL_RGBA4:
- case GL_RGBA2:
- return _dri_texformat_argb4444;
-
- case GL_RGB5_A1:
- return _dri_texformat_argb1555;
-
- case GL_RGB8:
- case GL_RGB10:
- case GL_RGB12:
- case GL_RGB16:
- return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
-
- case GL_RGB5:
- case GL_RGB4:
- case GL_R3_G3_B2:
- return _dri_texformat_rgb565;
-
- case GL_ALPHA:
- case GL_ALPHA4:
- case GL_ALPHA8:
- case GL_ALPHA12:
- case GL_ALPHA16:
- case GL_COMPRESSED_ALPHA:
- /* can't use a8 format since interpreting hw I8 as a8 would result
- in wrong rgb values (same as alpha value instead of 0). */
- return _dri_texformat_al88;
-
- case 1:
- case GL_LUMINANCE:
- case GL_LUMINANCE4:
- case GL_LUMINANCE8:
- case GL_LUMINANCE12:
- case GL_LUMINANCE16:
- case GL_COMPRESSED_LUMINANCE:
- return _dri_texformat_l8;
-
- case 2:
- case GL_LUMINANCE_ALPHA:
- case GL_LUMINANCE4_ALPHA4:
- case GL_LUMINANCE6_ALPHA2:
- case GL_LUMINANCE8_ALPHA8:
- case GL_LUMINANCE12_ALPHA4:
- case GL_LUMINANCE12_ALPHA12:
- case GL_LUMINANCE16_ALPHA16:
- case GL_COMPRESSED_LUMINANCE_ALPHA:
- return _dri_texformat_al88;
-
- case GL_INTENSITY:
- case GL_INTENSITY4:
- case GL_INTENSITY8:
- case GL_INTENSITY12:
- case GL_INTENSITY16:
- case GL_COMPRESSED_INTENSITY:
- return _dri_texformat_i8;
-
- case GL_YCBCR_MESA:
- if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
- type == GL_UNSIGNED_BYTE)
- return &_mesa_texformat_ycbcr;
- else
- return &_mesa_texformat_ycbcr_rev;
-
- case GL_RGB_S3TC:
- case GL_RGB4_S3TC:
- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
- return &_mesa_texformat_rgb_dxt1;
-
- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
- return &_mesa_texformat_rgba_dxt1;
-
- case GL_RGBA_S3TC:
- case GL_RGBA4_S3TC:
- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
- return &_mesa_texformat_rgba_dxt3;
-
- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
- return &_mesa_texformat_rgba_dxt5;
-
- default:
- _mesa_problem(ctx,
- "unexpected internalFormat 0x%x in r200ChooseTextureFormat",
- (int) internalFormat);
- return NULL;
- }
-
- return NULL; /* never get here */
-}
-
-
-static GLboolean
-r200ValidateClientStorage( GLcontext *ctx, GLenum target,
- GLint internalFormat,
- GLint srcWidth, GLint srcHeight,
- GLenum format, GLenum type, const void *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
-
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
-
- if ( R200_DEBUG & DEBUG_TEXTURE )
- fprintf(stderr, "intformat %s format %s type %s\n",
- _mesa_lookup_enum_by_nr( internalFormat ),
- _mesa_lookup_enum_by_nr( format ),
- _mesa_lookup_enum_by_nr( type ));
-
- if (!ctx->Unpack.ClientStorage)
- return 0;
-
- if (ctx->_ImageTransferState ||
- texImage->IsCompressed ||
- texObj->GenerateMipmap)
- return 0;
-
-
- /* This list is incomplete, may be different on ppc???
- */
- switch ( internalFormat ) {
- case GL_RGBA:
- if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
- texImage->TexFormat = _dri_texformat_argb8888;
- }
- else
- return 0;
- break;
-
- case GL_RGB:
- if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
- texImage->TexFormat = _dri_texformat_rgb565;
- }
- else
- return 0;
- break;
-
- case GL_YCBCR_MESA:
- if ( format == GL_YCBCR_MESA &&
- type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) {
- texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
- }
- else if ( format == GL_YCBCR_MESA &&
- (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
- type == GL_UNSIGNED_BYTE)) {
- texImage->TexFormat = &_mesa_texformat_ycbcr;
- }
- else
- return 0;
- break;
-
- default:
- return 0;
- }
-
- /* Could deal with these packing issues, but currently don't:
- */
- if (packing->SkipPixels ||
- packing->SkipRows ||
- packing->SwapBytes ||
- packing->LsbFirst) {
- return 0;
- }
-
- {
- GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
- format, type);
-
-
- if ( R200_DEBUG & DEBUG_TEXTURE )
- fprintf(stderr, "%s: srcRowStride %d/%x\n",
- __FUNCTION__, srcRowStride, srcRowStride);
-
- /* Could check this later in upload, pitch restrictions could be
- * relaxed, but would need to store the image pitch somewhere,
- * as packing details might change before image is uploaded:
- */
- if (!r200IsGartMemory( rmesa, pixels, srcHeight * srcRowStride ) ||
- (srcRowStride & 63))
- return 0;
-
-
- /* Have validated that _mesa_transfer_teximage would be a straight
- * memcpy at this point. NOTE: future calls to TexSubImage will
- * overwrite the client data. This is explicitly mentioned in the
- * extension spec.
- */
- texImage->Data = (void *)pixels;
- texImage->IsClientData = GL_TRUE;
- texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
-
- return 1;
- }
-}
-
-
-static void r200TexImage1D( GLcontext *ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint border,
- GLenum format, GLenum type, const GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
- if ( t ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) r200AllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
- return;
- }
- }
-
- /* Note, this will call ChooseTextureFormat */
- _mesa_store_teximage1d(ctx, target, level, internalFormat,
- width, border, format, type, pixels,
- &ctx->Unpack, texObj, texImage);
-
- t->dirty_images[0] |= (1 << level);
-}
-
-
-static void r200TexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
- GLint xoffset,
- GLsizei width,
- GLenum format, GLenum type,
- const GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
- assert( t ); /* this _should_ be true */
- if ( t ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) r200AllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
- return;
- }
- }
-
- _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
- format, type, pixels, packing, texObj,
- texImage);
-
- t->dirty_images[0] |= (1 << level);
-}
-
-
-static void r200TexImage2D( GLcontext *ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint border,
- GLenum format, GLenum type, const GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- if ( t != NULL ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) r200AllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
- return;
- }
- }
-
- texImage->IsClientData = GL_FALSE;
-
- if (r200ValidateClientStorage( ctx, target,
- internalFormat,
- width, height,
- format, type, pixels,
- packing, texObj, texImage)) {
- if (R200_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using client storage\n", __FUNCTION__);
- }
- else {
- if (R200_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__);
-
- /* Normal path: copy (to cached memory) and eventually upload
- * via another copy to GART memory and then a blit... Could
- * eliminate one copy by going straight to (permanent) GART.
- *
- * Note, this will call r200ChooseTextureFormat.
- */
- _mesa_store_teximage2d(ctx, target, level, internalFormat,
- width, height, border, format, type, pixels,
- &ctx->Unpack, texObj, texImage);
-
- t->dirty_images[face] |= (1 << level);
- }
-}
-
-
-static void r200TexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
- GLint xoffset, GLint yoffset,
- GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- assert( t ); /* this _should_ be true */
- if ( t ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) r200AllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
- return;
- }
- }
-
- _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
- height, format, type, pixels, packing, texObj,
- texImage);
-
- t->dirty_images[face] |= (1 << level);
-}
-
-
-static void r200CompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint border,
- GLsizei imageSize, const GLvoid *data,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- if ( t != NULL ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) r200AllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
- return;
- }
- }
-
- texImage->IsClientData = GL_FALSE;
-/* can't call this, different parameters. Would never evaluate to true anyway currently
- if (r200ValidateClientStorage( ctx, target,
- internalFormat,
- width, height,
- format, type, pixels,
- packing, texObj, texImage)) {
- if (R200_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using client storage\n", __FUNCTION__);
- }
- else */{
- if (R200_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__);
-
- /* Normal path: copy (to cached memory) and eventually upload
- * via another copy to GART memory and then a blit... Could
- * eliminate one copy by going straight to (permanent) GART.
- *
- * Note, this will call r200ChooseTextureFormat.
- */
- _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
- height, border, imageSize, data, texObj, texImage);
-
- t->dirty_images[face] |= (1 << level);
- }
-}
-
-
-static void r200CompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
- GLint xoffset, GLint yoffset,
- GLsizei width, GLsizei height,
- GLenum format,
- GLsizei imageSize, const GLvoid *data,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- assert( t ); /* this _should_ be true */
- if ( t ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) r200AllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D");
- return;
- }
- }
-
- _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
- height, format, imageSize, data, texObj, texImage);
-
- t->dirty_images[face] |= (1 << level);
-}
-
-
-#if ENABLE_HW_3D_TEXTURE
-static void r200TexImage3D( GLcontext *ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint depth,
- GLint border,
- GLenum format, GLenum type, const GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
- if ( t ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) r200AllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
- return;
- }
- }
-
- texImage->IsClientData = GL_FALSE;
-
-#if 0
- if (r200ValidateClientStorage( ctx, target,
- internalFormat,
- width, height,
- format, type, pixels,
- packing, texObj, texImage)) {
- if (R200_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using client storage\n", __FUNCTION__);
- }
- else
-#endif
- {
- if (R200_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__);
-
- /* Normal path: copy (to cached memory) and eventually upload
- * via another copy to GART memory and then a blit... Could
- * eliminate one copy by going straight to (permanent) GART.
- *
- * Note, this will call r200ChooseTextureFormat.
- */
- _mesa_store_teximage3d(ctx, target, level, internalFormat,
- width, height, depth, border,
- format, type, pixels,
- &ctx->Unpack, texObj, texImage);
-
- t->dirty_images[0] |= (1 << level);
- }
+ t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
}
-#endif
-
-
-#if ENABLE_HW_3D_TEXTURE
-static void
-r200TexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
- GLint xoffset, GLint yoffset, GLint zoffset,
- GLsizei width, GLsizei height, GLsizei depth,
- GLenum format, GLenum type,
- const GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
-/* fprintf(stderr, "%s\n", __FUNCTION__); */
-
- assert( t ); /* this _should_ be true */
- if ( t ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) r200AllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
- return;
- }
- texObj->DriverData = t;
- }
-
- _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
- width, height, depth,
- format, type, pixels, packing, texObj, texImage);
-
- t->dirty_images[0] |= (1 << level);
-}
-#endif
-
-
static void r200TexEnv( GLcontext *ctx, GLenum target,
GLenum pname, const GLfloat *param )
@@ -969,7 +287,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target,
GLuint unit = ctx->Texture.CurrentUnit;
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- if ( R200_DEBUG & DEBUG_STATE ) {
+ if ( R200_DEBUG & RADEON_STATE ) {
fprintf( stderr, "%s( %s )\n",
__FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
}
@@ -983,7 +301,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target,
GLubyte c[4];
GLuint envColor;
UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
- envColor = r200PackColor( 4, c[0], c[1], c[2], c[3] );
+ envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) {
R200_STATECHANGE( rmesa, tf );
rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor;
@@ -1002,7 +320,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target,
* NOTE: Add a small bias to the bias for conform mipsel.c test.
*/
bias = *param + .01;
- min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ?
+ min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
0.0 : -16.0;
bias = CLAMP( bias, min, 16.0 );
b = (int)(bias * fixed_one) & R200_LOD_BIAS_MASK;
@@ -1039,9 +357,9 @@ static void r200TexParameter( GLcontext *ctx, GLenum target,
struct gl_texture_object *texObj,
GLenum pname, const GLfloat *params )
{
- r200TexObjPtr t = (r200TexObjPtr) texObj->DriverData;
+ radeonTexObj* t = radeon_tex_obj(texObj);
- if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+ if ( R200_DEBUG & (RADEON_STATE|RADEON_TEXTURE) ) {
fprintf( stderr, "%s( %s )\n", __FUNCTION__,
_mesa_lookup_enum_by_nr( pname ) );
}
@@ -1073,59 +391,46 @@ static void r200TexParameter( GLcontext *ctx, GLenum target,
* we just have to rely on loading the right subset of mipmap levels
* to simulate a clamped LOD.
*/
- driSwapOutTextureObject( (driTextureObject *) t );
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = 0;
+ t->validated = GL_FALSE;
+ }
break;
default:
return;
}
-
- /* Mark this texobj as dirty (one bit per tex unit)
- */
- t->dirty_state = TEX_ALL;
}
-
-static void r200BindTexture( GLcontext *ctx, GLenum target,
- struct gl_texture_object *texObj )
-{
- if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
- fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj,
- ctx->Texture.CurrentUnit );
- }
-
- if ( (target == GL_TEXTURE_1D)
- || (target == GL_TEXTURE_2D)
-#if ENABLE_HW_3D_TEXTURE
- || (target == GL_TEXTURE_3D)
-#endif
- || (target == GL_TEXTURE_CUBE_MAP)
- || (target == GL_TEXTURE_RECTANGLE_NV) ) {
- assert( texObj->DriverData != NULL );
- }
-}
-
-
-static void r200DeleteTexture( GLcontext *ctx,
- struct gl_texture_object *texObj )
+static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
- if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
- fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
- _mesa_lookup_enum_by_nr( texObj->Target ) );
+ radeonTexObj* t = radeon_tex_obj(texObj);
+
+ if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
+ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+ (void *)texObj,
+ _mesa_lookup_enum_by_nr(texObj->Target));
+ }
+
+ if (rmesa) {
+ int i;
+ radeon_firevertices(&rmesa->radeon);
+ for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
+ if ( t == rmesa->state.texture.unit[i].texobj ) {
+ rmesa->state.texture.unit[i].texobj = NULL;
+ rmesa->hw.tex[i].dirty = GL_FALSE;
+ rmesa->hw.cube[i].dirty = GL_FALSE;
+ }
+ }
}
-
- if ( t != NULL ) {
- if ( rmesa ) {
- R200_FIREVERTICES( rmesa );
- }
-
- driDestroyTextureObject( t );
+
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = 0;
}
- /* Free mipmap images and the texture object itself */
_mesa_delete_texture_object(ctx, texObj);
}
@@ -1155,46 +460,59 @@ static void r200TexGen( GLcontext *ctx,
* Called via ctx->Driver.NewTextureObject.
* Note: this function will be called during context creation to
* allocate the default texture objects.
- * Note: we could use containment here to 'derive' the driver-specific
- * texture object from the core mesa gl_texture_object. Not done at this time.
* Fixup MaxAnisotropy according to user preference.
*/
-static struct gl_texture_object *
-r200NewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx,
+ GLuint name,
+ GLenum target)
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- struct gl_texture_object *obj;
- obj = _mesa_new_texture_object(ctx, name, target);
- if (!obj)
- return NULL;
- obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
- r200AllocTexObj( obj );
- return obj;
+ radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+
+
+ if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
+ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+ t, _mesa_lookup_enum_by_nr(target));
+ }
+
+ _mesa_initialize_texture_object(&t->base, name, target);
+ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+
+ /* Initialize hardware state */
+ r200SetTexWrap( t, t->base.WrapS, t->base.WrapT, t->base.WrapR );
+ r200SetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
+ r200SetTexFilter(t, t->base.MinFilter, t->base.MagFilter);
+ r200SetTexBorderColor(t, t->base.BorderColor);
+
+ return &t->base;
}
+
void r200InitTextureFuncs( struct dd_function_table *functions )
{
/* Note: we only plug in the functions we implement in the driver
* since _mesa_init_driver_functions() was already called.
*/
- functions->ChooseTextureFormat = r200ChooseTextureFormat;
- functions->TexImage1D = r200TexImage1D;
- functions->TexImage2D = r200TexImage2D;
+ functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
+ functions->TexImage1D = radeonTexImage1D;
+ functions->TexImage2D = radeonTexImage2D;
#if ENABLE_HW_3D_TEXTURE
- functions->TexImage3D = r200TexImage3D;
+ functions->TexImage3D = radeonTexImage3D;
#else
functions->TexImage3D = _mesa_store_teximage3d;
#endif
- functions->TexSubImage1D = r200TexSubImage1D;
- functions->TexSubImage2D = r200TexSubImage2D;
+ functions->TexSubImage1D = radeonTexSubImage1D;
+ functions->TexSubImage2D = radeonTexSubImage2D;
#if ENABLE_HW_3D_TEXTURE
- functions->TexSubImage3D = r200TexSubImage3D;
+ functions->TexSubImage3D = radeonTexSubImage3D;
#else
functions->TexSubImage3D = _mesa_store_texsubimage3d;
#endif
+ functions->GetTexImage = radeonGetTexImage;
+ functions->GetCompressedTexImage = radeonGetCompressedTexImage;
functions->NewTextureObject = r200NewTextureObject;
- functions->BindTexture = r200BindTexture;
+ // functions->BindTexture = r200BindTexture;
functions->DeleteTexture = r200DeleteTexture;
functions->IsTextureResident = driIsTextureResident;
@@ -1202,22 +520,16 @@ void r200InitTextureFuncs( struct dd_function_table *functions )
functions->TexParameter = r200TexParameter;
functions->TexGen = r200TexGen;
- functions->CompressedTexImage2D = r200CompressedTexImage2D;
- functions->CompressedTexSubImage2D = r200CompressedTexSubImage2D;
+ functions->CompressedTexImage2D = radeonCompressedTexImage2D;
+ functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
- driInitTextureFormats();
+ functions->GenerateMipmap = radeonGenerateMipmap;
-#if 000
- /* moved or obsolete code */
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
- driInitTextureObjects( ctx, & rmesa->swapped,
- DRI_TEXMGR_DO_TEXTURE_1D
- | DRI_TEXMGR_DO_TEXTURE_2D );
+ functions->NewTextureImage = radeonNewTextureImage;
+ functions->FreeTexImageData = radeonFreeTexImageData;
+ functions->MapTexture = radeonMapTexture;
+ functions->UnmapTexture = radeonUnmapTexture;
+
+ driInitTextureFormats();
- /* Hack: r200NewTextureObject is not yet installed when the
- * default textures are created. Therefore set MaxAnisotropy of the
- * default 2D texture now. */
- ctx->Shared->Default2D->MaxAnisotropy = driQueryOptionf (&rmesa->optionCache,
- "def_max_anisotropy");
-#endif
}
diff --git a/r200/r200_tex.h b/r200/r200_tex.h
index 10ff8e8..e122de6 100644
--- a/r200/r200_tex.h
+++ b/r200/r200_tex.h
@@ -35,15 +35,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#ifndef __R200_TEX_H__
#define __R200_TEX_H__
+extern void r200SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv);
+extern void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
+ __DRIdrawable *dPriv);
extern void r200SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
unsigned long long offset, GLint depth,
GLuint pitch);
extern void r200UpdateTextureState( GLcontext *ctx );
-extern int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face );
+extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint face );
-extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t );
+extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t );
extern void r200InitTextureFuncs( struct dd_function_table *functions );
diff --git a/r200/r200_texmem.c b/r200/r200_texmem.c
deleted file mode 100644
index 3b81ac0..0000000
--- a/r200/r200_texmem.c
+++ /dev/null
@@ -1,530 +0,0 @@
-/**************************************************************************
-
-Copyright (C) Tungsten Graphics 2002. All Rights Reserved.
-The Weather Channel, Inc. funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86
-license. This notice must be preserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation on the rights to use, copy, modify, merge, publish,
-distribute, sub license, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Kevin E. Martin <martin@valinux.com>
- * Gareth Hughes <gareth@valinux.com>
- *
- */
-
-#include <errno.h>
-
-#include "main/glheader.h"
-#include "main/imports.h"
-#include "main/context.h"
-#include "main/colormac.h"
-#include "main/macros.h"
-#include "r200_context.h"
-#include "r200_ioctl.h"
-#include "r200_tex.h"
-#include "radeon_reg.h"
-
-#include <unistd.h> /* for usleep() */
-
-
-/**
- * Destroy any device-dependent state associated with the texture. This may
- * include NULLing out hardware state that points to the texture.
- */
-void
-r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t )
-{
- if ( R200_DEBUG & DEBUG_TEXTURE ) {
- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__,
- (void *)t, (void *)t->base.tObj );
- }
-
- if ( rmesa != NULL ) {
- unsigned i;
-
-
- for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
- if ( t == rmesa->state.texture.unit[i].texobj ) {
- rmesa->state.texture.unit[i].texobj = NULL;
- rmesa->hw.tex[i].dirty = GL_FALSE;
- rmesa->hw.cube[i].dirty = GL_FALSE;
- }
- }
- }
-}
-
-
-/* ------------------------------------------------------------
- * Texture image conversions
- */
-
-
-static void r200UploadGARTClientSubImage( r200ContextPtr rmesa,
- r200TexObjPtr t,
- struct gl_texture_image *texImage,
- GLint hwlevel,
- GLint x, GLint y,
- GLint width, GLint height )
-{
- const struct gl_texture_format *texFormat = texImage->TexFormat;
- GLuint srcPitch, dstPitch;
- int blit_format;
- int srcOffset;
-
- /*
- * XXX it appears that we always upload the full image, not a subimage.
- * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever
- * changed, the src pitch will have to change.
- */
- switch ( texFormat->TexelBytes ) {
- case 1:
- blit_format = R200_CP_COLOR_FORMAT_CI8;
- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
- break;
- case 2:
- blit_format = R200_CP_COLOR_FORMAT_RGB565;
- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
- break;
- case 4:
- blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
- break;
- default:
- return;
- }
-
- t->image[0][hwlevel].data = texImage->Data;
- srcOffset = r200GartOffsetFromVirtual( rmesa, texImage->Data );
-
- assert( srcOffset != ~0 );
-
- /* Don't currently need to cope with small pitches?
- */
- width = texImage->Width;
- height = texImage->Height;
-
- r200EmitWait( rmesa, RADEON_WAIT_3D );
-
- r200EmitBlit( rmesa, blit_format,
- srcPitch,
- srcOffset,
- dstPitch,
- t->bufAddr,
- x,
- y,
- t->image[0][hwlevel].x + x,
- t->image[0][hwlevel].y + y,
- width,
- height );
-
- r200EmitWait( rmesa, RADEON_WAIT_2D );
-}
-
-static void r200UploadRectSubImage( r200ContextPtr rmesa,
- r200TexObjPtr t,
- struct gl_texture_image *texImage,
- GLint x, GLint y,
- GLint width, GLint height )
-{
- const struct gl_texture_format *texFormat = texImage->TexFormat;
- int blit_format, dstPitch, done;
-
- switch ( texFormat->TexelBytes ) {
- case 1:
- blit_format = R200_CP_COLOR_FORMAT_CI8;
- break;
- case 2:
- blit_format = R200_CP_COLOR_FORMAT_RGB565;
- break;
- case 4:
- blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
- break;
- default:
- return;
- }
-
- t->image[0][0].data = texImage->Data;
-
- /* Currently don't need to cope with small pitches.
- */
- width = texImage->Width;
- height = texImage->Height;
- dstPitch = t->pp_txpitch + 32;
-
- if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
- /* In this case, could also use GART texturing. This is
- * currently disabled, but has been tested & works.
- */
- if ( !t->image_override )
- t->pp_txoffset = r200GartOffsetFromVirtual( rmesa, texImage->Data );
- t->pp_txpitch = texImage->RowStride * texFormat->TexelBytes - 32;
-
- if (R200_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr,
- "Using GART texturing for rectangular client texture\n");
-
- /* Release FB memory allocated for this image:
- */
- /* FIXME This may not be correct as driSwapOutTextureObject sets
- * FIXME dirty_images. It may be fine, though.
- */
- if ( t->base.memBlock ) {
- driSwapOutTextureObject( (driTextureObject *) t );
- }
- }
- else if (texImage->IsClientData) {
- /* Data already in GART memory, with usable pitch.
- */
- GLuint srcPitch;
- srcPitch = texImage->RowStride * texFormat->TexelBytes;
- r200EmitBlit( rmesa,
- blit_format,
- srcPitch,
- r200GartOffsetFromVirtual( rmesa, texImage->Data ),
- dstPitch, t->bufAddr,
- 0, 0,
- 0, 0,
- width, height );
- }
- else {
- /* Data not in GART memory, or bad pitch.
- */
- for (done = 0; done < height ; ) {
- struct r200_dma_region region;
- int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
- int src_pitch;
- char *tex;
-
- src_pitch = texImage->RowStride * texFormat->TexelBytes;
-
- tex = (char *)texImage->Data + done * src_pitch;
-
- memset(&region, 0, sizeof(region));
- r200AllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
-
- /* Copy texdata to dma:
- */
- if (0)
- fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
- __FUNCTION__, src_pitch, dstPitch);
-
- if (src_pitch == dstPitch) {
- memcpy( region.address + region.start, tex, lines * src_pitch );
- }
- else {
- char *buf = region.address + region.start;
- int i;
- for (i = 0 ; i < lines ; i++) {
- memcpy( buf, tex, src_pitch );
- buf += dstPitch;
- tex += src_pitch;
- }
- }
-
- r200EmitWait( rmesa, RADEON_WAIT_3D );
-
- /* Blit to framebuffer
- */
- r200EmitBlit( rmesa,
- blit_format,
- dstPitch, GET_START( &region ),
- dstPitch | (t->tile_bits >> 16),
- t->bufAddr,
- 0, 0,
- 0, done,
- width, lines );
-
- r200EmitWait( rmesa, RADEON_WAIT_2D );
-
- r200ReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
- done += lines;
- }
- }
-}
-
-
-/**
- * Upload the texture image associated with texture \a t at the specified
- * level at the address relative to \a start.
- */
-static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t,
- GLint hwlevel,
- GLint x, GLint y, GLint width, GLint height,
- GLuint face )
-{
- struct gl_texture_image *texImage = NULL;
- GLuint offset;
- GLint imageWidth, imageHeight;
- GLint ret;
- drm_radeon_texture_t tex;
- drm_radeon_tex_image_t tmp;
- const int level = hwlevel + t->base.firstLevel;
-
- if ( R200_DEBUG & DEBUG_TEXTURE ) {
- fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
- __FUNCTION__, (void *)t, (void *)t->base.tObj,
- level, width, height, face );
- }
-
- ASSERT(face < 6);
-
- /* Ensure we have a valid texture to upload */
- if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
- _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
- return;
- }
-
- texImage = t->base.tObj->Image[face][level];
-
- if ( !texImage ) {
- if ( R200_DEBUG & DEBUG_TEXTURE )
- fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
- return;
- }
- if ( !texImage->Data ) {
- if ( R200_DEBUG & DEBUG_TEXTURE )
- fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
- return;
- }
-
-
- if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- assert(level == 0);
- assert(hwlevel == 0);
- if ( R200_DEBUG & DEBUG_TEXTURE )
- fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
- r200UploadRectSubImage( rmesa, t, texImage, x, y, width, height );
- return;
- }
- else if (texImage->IsClientData) {
- if ( R200_DEBUG & DEBUG_TEXTURE )
- fprintf( stderr, "%s: image data is in GART client storage\n",
- __FUNCTION__);
- r200UploadGARTClientSubImage( rmesa, t, texImage, hwlevel,
- x, y, width, height );
- return;
- }
- else if ( R200_DEBUG & DEBUG_TEXTURE )
- fprintf( stderr, "%s: image data is in normal memory\n",
- __FUNCTION__);
-
-
- imageWidth = texImage->Width;
- imageHeight = texImage->Height;
-
- offset = t->bufAddr + t->base.totalSize / 6 * face;
-
- if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
- GLint imageX = 0;
- GLint imageY = 0;
- GLint blitX = t->image[face][hwlevel].x;
- GLint blitY = t->image[face][hwlevel].y;
- GLint blitWidth = t->image[face][hwlevel].width;
- GLint blitHeight = t->image[face][hwlevel].height;
- fprintf( stderr, " upload image: %d,%d at %d,%d\n",
- imageWidth, imageHeight, imageX, imageY );
- fprintf( stderr, " upload blit: %d,%d at %d,%d\n",
- blitWidth, blitHeight, blitX, blitY );
- fprintf( stderr, " blit ofs: 0x%07x level: %d/%d\n",
- (GLuint)offset, hwlevel, level );
- }
-
- t->image[face][hwlevel].data = texImage->Data;
-
- /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
- * NOTE: we're always use a 1KB-wide blit and I8 texture format.
- * We used to use 1, 2 and 4-byte texels and used to use the texture
- * width to dictate the blit width - but that won't work for compressed
- * textures. (Brian)
- * NOTE: can't do that with texture tiling. (sroland)
- */
- tex.offset = offset;
- tex.image = &tmp;
- /* copy (x,y,width,height,data) */
- memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) );
-
- if (texImage->TexFormat->TexelBytes) {
- /* use multi-byte upload scheme */
- tex.height = imageHeight;
- tex.width = imageWidth;
- tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK;
- if (tex.format == R200_TXFORMAT_ABGR8888) {
- /* drm will refuse abgr8888 textures. */
- tex.format = R200_TXFORMAT_ARGB8888;
- }
- tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
- tex.offset += tmp.x & ~1023;
- tmp.x = tmp.x % 1024;
- if (t->tile_bits & R200_TXO_MICRO_TILE) {
- /* need something like "tiled coordinates" ? */
- tmp.y = tmp.x / (tex.pitch * 128) * 2;
- tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
- tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
- }
- else {
- tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
- }
- if ((t->tile_bits & R200_TXO_MACRO_TILE) &&
- (texImage->Width * texImage->TexFormat->TexelBytes >= 256) &&
- ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) ||
- (texImage->Height >= 16))) {
- /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
- OR if height is smaller than 8 automatically, but if micro tiling is active
- the limit is height 16 instead ? */
- tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
- }
- }
- else {
- /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
- needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
- /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
- so the kernel module reads the right amount of data. */
- tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */
- tex.pitch = (BLIT_WIDTH_BYTES / 64);
- tex.height = (imageHeight + 3) / 4;
- tex.width = (imageWidth + 3) / 4;
- switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) {
- case R200_TXFORMAT_DXT1:
- tex.width *= 8;
- break;
- case R200_TXFORMAT_DXT23:
- case R200_TXFORMAT_DXT45:
- tex.width *= 16;
- break;
- default:
- fprintf(stderr, "unknown compressed tex format in uploadSubImage\n");
- }
- }
-
- LOCK_HARDWARE( rmesa );
- do {
- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
- &tex, sizeof(drm_radeon_texture_t) );
- if (ret) {
- if (R200_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n");
- usleep(1);
- }
- } while ( ret == -EAGAIN );
-
- UNLOCK_HARDWARE( rmesa );
-
- if ( ret ) {
- fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
- fprintf( stderr, " offset=0x%08x\n",
- offset );
- fprintf( stderr, " image width=%d height=%d\n",
- imageWidth, imageHeight );
- fprintf( stderr, " blit width=%d height=%d data=%p\n",
- t->image[face][hwlevel].width, t->image[face][hwlevel].height,
- t->image[face][hwlevel].data );
- exit( 1 );
- }
-}
-
-
-/**
- * Upload the texture images associated with texture \a t. This might
- * require the allocation of texture memory.
- *
- * \param rmesa Context pointer
- * \param t Texture to be uploaded
- * \param face Cube map face to be uploaded. Zero for non-cube maps.
- */
-
-int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face )
-{
- const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-
- if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
- fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
- (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize,
- t->base.firstLevel, t->base.lastLevel );
- }
-
- if ( !t || t->base.totalSize == 0 || t->image_override )
- return 0;
-
- if (R200_DEBUG & DEBUG_SYNC) {
- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
- r200Finish( rmesa->glCtx );
- }
-
- LOCK_HARDWARE( rmesa );
-
- if ( t->base.memBlock == NULL ) {
- int heap;
-
- heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
- (driTextureObject *) t );
- if ( heap == -1 ) {
- UNLOCK_HARDWARE( rmesa );
- return -1;
- }
-
- /* Set the base offset of the texture image */
- t->bufAddr = rmesa->r200Screen->texOffset[heap]
- + t->base.memBlock->ofs;
- t->pp_txoffset = t->bufAddr;
-
- if (!(t->base.tObj->Image[0][0]->IsClientData)) {
- /* hope it's safe to add that here... */
- t->pp_txoffset |= t->tile_bits;
- }
-
- /* Mark this texobj as dirty on all units:
- */
- t->dirty_state = TEX_ALL;
- }
-
- /* Let the world know we've used this memory recently.
- */
- driUpdateTextureLRU( (driTextureObject *) t );
- UNLOCK_HARDWARE( rmesa );
-
- /* Upload any images that are new */
- if (t->base.dirty_images[face]) {
- int i;
- for ( i = 0 ; i < numLevels ; i++ ) {
- if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
- uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
- t->image[face][i].height, face );
- }
- }
- t->base.dirty_images[face] = 0;
- }
-
-
- if (R200_DEBUG & DEBUG_SYNC) {
- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
- r200Finish( rmesa->glCtx );
- }
-
- return 0;
-}
diff --git a/r200/r200_texstate.c b/r200/r200_texstate.c
index 0ad5651..c948347 100644
--- a/r200/r200_texstate.c
+++ b/r200/r200_texstate.c
@@ -37,9 +37,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/context.h"
#include "main/macros.h"
#include "main/texformat.h"
+#include "main/teximage.h"
#include "main/texobj.h"
#include "main/enums.h"
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
#include "r200_context.h"
#include "r200_state.h"
#include "r200_ioctl.h"
@@ -139,257 +142,6 @@ static const struct tx_table tx_table_le[] =
#undef _ALPHA
#undef _INVALID
-/**
- * This function computes the number of bytes of storage needed for
- * the given texture object (all mipmap levels, all cube faces).
- * The \c image[face][level].x/y/width/height parameters for upload/blitting
- * are computed here. \c pp_txfilter, \c pp_txformat, etc. will be set here
- * too.
- *
- * \param rmesa Context pointer
- * \param tObj GL texture object whose images are to be posted to
- * hardware state.
- */
-static void r200SetTexImages( r200ContextPtr rmesa,
- struct gl_texture_object *tObj )
-{
- r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData;
- const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
- GLint curOffset, blitWidth;
- GLint i, texelBytes;
- GLint numLevels;
- GLint log2Width, log2Height, log2Depth;
-
- /* Set the hardware texture format
- */
- if ( !t->image_override ) {
- if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
- const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
- tx_table_be;
-
- t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
- R200_TXFORMAT_ALPHA_IN_MAP);
- t->pp_txfilter &= ~R200_YUV_TO_RGB;
-
- t->pp_txformat |= table[ baseImage->TexFormat->MesaFormat ].format;
- t->pp_txfilter |= table[ baseImage->TexFormat->MesaFormat ].filter;
- }
- else {
- _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
- return;
- }
- }
-
- texelBytes = baseImage->TexFormat->TexelBytes;
-
- /* Compute which mipmap levels we really want to send to the hardware.
- */
-
- driCalculateTextureFirstLastLevel( (driTextureObject *) t );
- log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
- log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
- log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
-
- numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-
- assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
-
- /* Calculate mipmap offsets and dimensions for blitting (uploading)
- * The idea is that we lay out the mipmap levels within a block of
- * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
- */
- curOffset = 0;
- blitWidth = BLIT_WIDTH_BYTES;
- t->tile_bits = 0;
-
- /* figure out if this texture is suitable for tiling. */
- if (texelBytes) {
- if (rmesa->texmicrotile && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
- /* texrect might be able to use micro tiling too in theory? */
- (baseImage->Height > 1)) {
- /* allow 32 (bytes) x 1 mip (which will use two times the space
- the non-tiled version would use) max if base texture is large enough */
- if ((numLevels == 1) ||
- (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
- (baseImage->Width * texelBytes > 64)) ||
- ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
- t->tile_bits |= R200_TXO_MICRO_TILE;
- }
- }
- if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
- /* we can set macro tiling even for small textures, they will be untiled anyway */
- t->tile_bits |= R200_TXO_MACRO_TILE;
- }
- }
-
- for (i = 0; i < numLevels; i++) {
- const struct gl_texture_image *texImage;
- GLuint size;
-
- texImage = tObj->Image[0][i + t->base.firstLevel];
- if ( !texImage )
- break;
-
- /* find image size in bytes */
- if (texImage->IsCompressed) {
- /* need to calculate the size AFTER padding even though the texture is
- submitted without padding.
- Only handle pot textures currently - don't know if npot is even possible,
- size calculation would certainly need (trivial) adjustments.
- Align (and later pad) to 32byte, not sure what that 64byte blit width is
- good for? */
- if ((t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) == R200_TXFORMAT_DXT1) {
- /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
- if ((texImage->Width + 3) < 8) /* width one block */
- size = texImage->CompressedSize * 4;
- else if ((texImage->Width + 3) < 16)
- size = texImage->CompressedSize * 2;
- else size = texImage->CompressedSize;
- }
- else /* DXT3/5, 16 bytes per block */
- if ((texImage->Width + 3) < 8)
- size = texImage->CompressedSize * 2;
- else size = texImage->CompressedSize;
- }
- else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
- }
- else if (t->tile_bits & R200_TXO_MICRO_TILE) {
- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
- though the actual offset may be different (if texture is less than
- 32 bytes width) to the untiled case */
- int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
- size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
- }
- else {
- int w = (texImage->Width * texelBytes + 31) & ~31;
- size = w * texImage->Height * texImage->Depth;
- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
- }
- assert(size > 0);
-
- /* Align to 32-byte offset. It is faster to do this unconditionally
- * (no branch penalty).
- */
-
- curOffset = (curOffset + 0x1f) & ~0x1f;
-
- if (texelBytes) {
- t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
- t->image[0][i].y = 0;
- t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
- t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
- }
- else {
- t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
- t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
- t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
- t->image[0][i].height = size / t->image[0][i].width;
- }
-
-#if 0
- /* for debugging only and only applicable to non-rectangle targets */
- assert(size % t->image[0][i].width == 0);
- assert(t->image[0][i].x == 0
- || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
-#endif
-
- if (0)
- fprintf(stderr,
- "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
- i, texImage->Width, texImage->Height,
- t->image[0][i].x, t->image[0][i].y,
- t->image[0][i].width, t->image[0][i].height, size, curOffset);
-
- curOffset += size;
-
- }
-
- /* Align the total size of texture memory block.
- */
- t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
-
- /* Setup remaining cube face blits, if needed */
- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
- const GLuint faceSize = t->base.totalSize;
- GLuint face;
- /* reuse face 0 x/y/width/height - just update the offset when uploading */
- for (face = 1; face < 6; face++) {
- for (i = 0; i < numLevels; i++) {
- t->image[face][i].x = t->image[0][i].x;
- t->image[face][i].y = t->image[0][i].y;
- t->image[face][i].width = t->image[0][i].width;
- t->image[face][i].height = t->image[0][i].height;
- }
- }
- t->base.totalSize = 6 * faceSize; /* total texmem needed */
- }
-
-
- /* Hardware state:
- */
- t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
- t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT;
-
- t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
- R200_TXFORMAT_HEIGHT_MASK |
- R200_TXFORMAT_CUBIC_MAP_ENABLE |
- R200_TXFORMAT_F5_WIDTH_MASK |
- R200_TXFORMAT_F5_HEIGHT_MASK);
- t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
- (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
-
- t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
- if (tObj->Target == GL_TEXTURE_3D) {
- t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
- t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
- }
- else if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
- ASSERT(log2Width == log2Height);
- t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
- (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
-/* don't think we need this bit, if it exists at all - fglrx does not set it */
- (R200_TXFORMAT_CUBIC_MAP_ENABLE));
- t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
- t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
- (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
- (log2Width << R200_FACE_WIDTH_2_SHIFT) |
- (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
- (log2Width << R200_FACE_WIDTH_3_SHIFT) |
- (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
- (log2Width << R200_FACE_WIDTH_4_SHIFT) |
- (log2Height << R200_FACE_HEIGHT_4_SHIFT));
- }
- else {
- /* If we don't in fact send enough texture coordinates, q will be 1,
- * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
- */
- t->pp_txformat_x |= R200_TEXCOORD_PROJ;
- }
-
- t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
- ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
-
- /* Only need to round to nearest 32 for textures, but the blitter
- * requires 64-byte aligned pitches, and we may/may not need the
- * blitter. NPOT only!
- */
- if ( !t->image_override ) {
- if (baseImage->IsCompressed)
- t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
- else
- t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
- t->pp_txpitch -= 32;
- }
-
- t->dirty_state = TEX_ALL;
-
- /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */
-}
-
-
-
/* ================================================================
* Texture combine functions
*/
@@ -569,7 +321,7 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
assert( (texUnit->_ReallyEnabled == 0)
|| (texUnit->_Current != NULL) );
- if ( R200_DEBUG & DEBUG_TEXTURE ) {
+ if ( R200_DEBUG & RADEON_TEXTURE ) {
fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit );
}
@@ -981,20 +733,19 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
{
r200ContextPtr rmesa = pDRICtx->driverPrivate;
struct gl_texture_object *tObj =
- _mesa_lookup_texture(rmesa->glCtx, texname);
- r200TexObjPtr t;
+ _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+ radeonTexObjPtr t = radeon_tex_obj(tObj);
if (!tObj)
return;
- t = (r200TexObjPtr) tObj->DriverData;
-
t->image_override = GL_TRUE;
if (!offset)
return;
- t->pp_txoffset = offset;
+ t->bo = NULL;
+ t->override_offset = offset;
t->pp_txpitch = pitch - 32;
switch (depth) {
@@ -1014,6 +765,125 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
}
}
+void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
+ __DRIdrawable *dPriv)
+{
+ struct gl_texture_unit *texUnit;
+ struct gl_texture_object *texObj;
+ struct gl_texture_image *texImage;
+ struct radeon_renderbuffer *rb;
+ radeon_texture_image *rImage;
+ radeonContextPtr radeon;
+ r200ContextPtr rmesa;
+ struct radeon_framebuffer *rfb;
+ radeonTexObjPtr t;
+ uint32_t pitch_val;
+ uint32_t internalFormat, type, format;
+
+ type = GL_BGRA;
+ format = GL_UNSIGNED_BYTE;
+ internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4);
+
+ radeon = pDRICtx->driverPrivate;
+ rmesa = pDRICtx->driverPrivate;
+
+ rfb = dPriv->driverPrivate;
+ texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
+ texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
+ texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
+
+ rImage = get_radeon_texture_image(texImage);
+ t = radeon_tex_obj(texObj);
+ if (t == NULL) {
+ return;
+ }
+
+ radeon_update_renderbuffers(pDRICtx, dPriv);
+ /* back & depth buffer are useless free them right away */
+ rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
+ if (rb && rb->bo) {
+ radeon_bo_unref(rb->bo);
+ rb->bo = NULL;
+ }
+ rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+ if (rb && rb->bo) {
+ radeon_bo_unref(rb->bo);
+ rb->bo = NULL;
+ }
+ rb = rfb->color_rb[0];
+ if (rb->bo == NULL) {
+ /* Failed to BO for the buffer */
+ return;
+ }
+
+ _mesa_lock_texture(radeon->glCtx, texObj);
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+ if (rImage->bo) {
+ radeon_bo_unref(rImage->bo);
+ rImage->bo = NULL;
+ }
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = NULL;
+ }
+ if (rImage->mt) {
+ radeon_miptree_unreference(rImage->mt);
+ rImage->mt = NULL;
+ }
+ _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+ rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
+ texImage->RowStride = rb->pitch / rb->cpp;
+ texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
+ internalFormat,
+ type, format, 0);
+ rImage->bo = rb->bo;
+ radeon_bo_ref(rImage->bo);
+ t->bo = rb->bo;
+ radeon_bo_ref(t->bo);
+ t->tile_bits = 0;
+ t->image_override = GL_TRUE;
+ t->override_offset = 0;
+ t->pp_txpitch &= (1 << 13) -1;
+ pitch_val = rb->pitch;
+ switch (rb->cpp) {
+ case 4:
+ if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT)
+ t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format;
+ else
+ t->pp_txformat = tx_table_le[MESA_FORMAT_ARGB8888].format;
+ t->pp_txfilter |= tx_table_le[MESA_FORMAT_ARGB8888].filter;
+ break;
+ case 3:
+ default:
+ t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format;
+ t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB888].filter;
+ break;
+ case 2:
+ t->pp_txformat = tx_table_le[MESA_FORMAT_RGB565].format;
+ t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB565].filter;
+ break;
+ }
+ t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT)
+ | ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT);
+ t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
+ t->pp_txpitch = pitch_val;
+ t->pp_txpitch -= 32;
+
+ t->validated = GL_TRUE;
+ _mesa_unlock_texture(radeon->glCtx, texObj);
+ return;
+}
+
+
+void r200SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+ r200SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
+}
+
+
#define REF_COLOR 1
#define REF_ALPHA 2
@@ -1207,12 +1077,43 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
R200_VOLUME_FILTER_MASK)
+static void disable_tex_obj_state( r200ContextPtr rmesa,
+ int unit )
+{
+
+ R200_STATECHANGE( rmesa, vtx );
+ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+
+ R200_STATECHANGE( rmesa, ctx );
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit);
+ if (rmesa->radeon.TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
+ TCL_FALLBACK( rmesa->radeon.glCtx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+ }
+
+ /* Actually want to keep all units less than max active texture
+ * enabled, right? Fix this for >2 texunits.
+ */
+
+ {
+ GLuint tmp = rmesa->TexGenEnabled;
+
+ rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
+ rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
+ rmesa->TexGenNeedNormals[unit] = GL_FALSE;
+ rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
+
+ if (tmp != rmesa->TexGenEnabled) {
+ rmesa->recheck_texgen[unit] = GL_TRUE;
+ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+ }
+ }
+}
static void import_tex_obj_state( r200ContextPtr rmesa,
int unit,
- r200TexObjPtr texobj )
+ radeonTexObjPtr texobj )
{
/* do not use RADEON_DB_STATE to avoid stale texture caches */
- int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+ GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
R200_STATECHANGE( rmesa, tex[unit] );
@@ -1225,36 +1126,21 @@ static void import_tex_obj_state( r200ContextPtr rmesa,
cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
- if (rmesa->r200Screen->drmSupportsFragShader) {
- cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset;
- }
- else {
- cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset;
- }
- if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
- int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
- GLuint bytesPerFace = texobj->base.totalSize / 6;
- ASSERT(texobj->base.totalSize % 6 == 0);
+ if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
+ GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
R200_STATECHANGE( rmesa, cube[unit] );
cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
- if (rmesa->r200Screen->drmSupportsFragShader) {
+ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
/* that value is submitted twice. could change cube atom
to not include that command when new drm is used */
cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
}
- cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace;
- cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace;
- cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace;
- cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace;
- cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace;
}
- texobj->dirty_state &= ~(1<<unit);
}
-
static void set_texgen_matrix( r200ContextPtr rmesa,
GLuint unit,
const GLfloat *s_plane,
@@ -1377,7 +1263,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
} else {
tgcm |= R200_TEXGEN_COMP_T << (unit * 4);
}
-
if (texUnit->TexGenEnabled & R_BIT) {
if (texUnit->GenR.Mode != mode)
mixed_fallback = GL_TRUE;
@@ -1393,7 +1278,7 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
}
if (mixed_fallback) {
- if (R200_DEBUG & DEBUG_FALLBACKS)
+ if (R200_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "fallback mixed texgen, 0x%x (0x%x 0x%x 0x%x 0x%x)\n",
texUnit->TexGenEnabled, texUnit->GenS.Mode, texUnit->GenT.Mode,
texUnit->GenR.Mode, texUnit->GenQ.Mode);
@@ -1419,7 +1304,7 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
texUnit->GenR.ObjectPlane,
texUnit->GenQ.ObjectPlane );
if (needtgenable & (S_BIT | T_BIT)) {
- if (R200_DEBUG & DEBUG_FALLBACKS)
+ if (R200_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "fallback mixed texgen / obj plane, 0x%x\n",
texUnit->TexGenEnabled);
return GL_FALSE;
@@ -1447,7 +1332,7 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
texUnit->GenR.EyePlane,
texUnit->GenQ.EyePlane );
if (needtgenable & (S_BIT | T_BIT)) {
- if (R200_DEBUG & DEBUG_FALLBACKS)
+ if (R200_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "fallback mixed texgen / eye plane, 0x%x\n",
texUnit->TexGenEnabled);
return GL_FALSE;
@@ -1497,7 +1382,7 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
default:
/* Unsupported mode, fallback:
*/
- if (R200_DEBUG & DEBUG_FALLBACKS)
+ if (R200_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "fallback unsupported texgen, %d\n",
texUnit->GenS.Mode);
return GL_FALSE;
@@ -1517,52 +1402,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
return GL_TRUE;
}
-
-static void disable_tex( GLcontext *ctx, int unit )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
-
- if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit)) {
- /* Texture unit disabled */
- if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
- /* The old texture is no longer bound to this texture unit.
- * Mark it as such.
- */
-
- rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
- rmesa->state.texture.unit[unit].texobj = NULL;
- }
-
- R200_STATECHANGE( rmesa, ctx );
- rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit);
-
- R200_STATECHANGE( rmesa, vtx );
- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
-
- if (rmesa->TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
- TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
- }
-
- /* Actually want to keep all units less than max active texture
- * enabled, right? Fix this for >2 texunits.
- */
-
- {
- GLuint tmp = rmesa->TexGenEnabled;
-
- rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
- rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
- rmesa->TexGenNeedNormals[unit] = GL_FALSE;
- rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
-
- if (tmp != rmesa->TexGenEnabled) {
- rmesa->recheck_texgen[unit] = GL_TRUE;
- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
- }
- }
- }
-}
-
void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
@@ -1579,237 +1418,169 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
}
}
-static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
-
- /* Need to load the 2d images associated with this unit.
- */
- if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
- t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
- t->base.dirty_images[0] = ~0;
+ int firstlevel = t->mt ? t->mt->firstLevel : 0;
+ const struct gl_texture_image *firstImage = t->base.Image[0][firstlevel];
+ GLint log2Width, log2Height, log2Depth, texelBytes;
+
+ if ( t->bo ) {
+ return;
}
- ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
+ log2Width = firstImage->WidthLog2;
+ log2Height = firstImage->HeightLog2;
+ log2Depth = firstImage->DepthLog2;
+ texelBytes = firstImage->TexFormat->TexelBytes;
- if ( t->base.dirty_images[0] ) {
- R200_FIREVERTICES( rmesa );
- r200SetTexImages( rmesa, tObj );
- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
- if ( !t->base.memBlock && !t->image_override )
- return GL_FALSE;
- }
- set_re_cntl_d3d( ctx, unit, GL_FALSE );
-
- return GL_TRUE;
-}
-
-#if ENABLE_HW_3D_TEXTURE
-static GLboolean enable_tex_3d( GLcontext *ctx, int unit )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
-
- /* Need to load the 3d images associated with this unit.
- */
- if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
- t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
- t->base.dirty_images[0] = ~0;
+ if (!t->image_override) {
+ if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
+ const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
+ tx_table_be;
+
+ t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
+ R200_TXFORMAT_ALPHA_IN_MAP);
+ t->pp_txfilter &= ~R200_YUV_TO_RGB;
+
+ t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
+ t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
+ } else {
+ _mesa_problem(NULL, "unexpected texture format in %s",
+ __FUNCTION__);
+ return;
+ }
}
+
+ t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
+ t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << R200_MAX_MIP_LEVEL_SHIFT;
+
+ t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
+ R200_TXFORMAT_HEIGHT_MASK |
+ R200_TXFORMAT_CUBIC_MAP_ENABLE |
+ R200_TXFORMAT_F5_WIDTH_MASK |
+ R200_TXFORMAT_F5_HEIGHT_MASK);
+ t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
+ (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
+
+ t->tile_bits = 0;
+
+ t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
+ if (t->base.Target == GL_TEXTURE_3D) {
+ t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
+ t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
- ASSERT(tObj->Target == GL_TEXTURE_3D);
-
- /* R100 & R200 do not support mipmaps for 3D textures.
- */
- if ( (tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR) ) {
- return GL_FALSE;
}
-
- if ( t->base.dirty_images[0] ) {
- R200_FIREVERTICES( rmesa );
- r200SetTexImages( rmesa, tObj );
- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
- if ( !t->base.memBlock )
- return GL_FALSE;
+ else if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
+ ASSERT(log2Width == log2Height);
+ t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
+ (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
+ /* don't think we need this bit, if it exists at all - fglrx does not set it */
+ (R200_TXFORMAT_CUBIC_MAP_ENABLE));
+ t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+ t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
+ (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
+ (log2Width << R200_FACE_WIDTH_2_SHIFT) |
+ (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
+ (log2Width << R200_FACE_WIDTH_3_SHIFT) |
+ (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
+ (log2Width << R200_FACE_WIDTH_4_SHIFT) |
+ (log2Height << R200_FACE_HEIGHT_4_SHIFT));
}
-
- set_re_cntl_d3d( ctx, unit, GL_TRUE );
-
- return GL_TRUE;
-}
-#endif
-
-static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
- GLuint face;
-
- /* Need to load the 2d images associated with this unit.
- */
- if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
- t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
- for (face = 0; face < 6; face++)
- t->base.dirty_images[face] = ~0;
+ else {
+ /* If we don't in fact send enough texture coordinates, q will be 1,
+ * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
+ */
+ t->pp_txformat_x |= R200_TEXCOORD_PROJ;
}
- ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+ t->pp_txsize = (((firstImage->Width - 1) << R200_PP_TX_WIDTHMASK_SHIFT)
+ | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT));
- if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
- t->base.dirty_images[2] || t->base.dirty_images[3] ||
- t->base.dirty_images[4] || t->base.dirty_images[5] ) {
- /* flush */
- R200_FIREVERTICES( rmesa );
- /* layout memory space, once for all faces */
- r200SetTexImages( rmesa, tObj );
- }
-
- /* upload (per face) */
- for (face = 0; face < 6; face++) {
- if (t->base.dirty_images[face]) {
- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, face );
- }
- }
-
- if ( !t->base.memBlock ) {
- /* texmem alloc failed, use s/w fallback */
- return GL_FALSE;
+ if ( !t->image_override ) {
+ if (firstImage->IsCompressed)
+ t->pp_txpitch = (firstImage->Width + 63) & ~(63);
+ else
+ t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
+ t->pp_txpitch -= 32;
}
- set_re_cntl_d3d( ctx, unit, GL_TRUE );
-
- return GL_TRUE;
-}
-
-static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
-{
- r200ContextPtr rmesa = R200_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
-
- if (!(t->pp_txformat & R200_TXFORMAT_NON_POWER2)) {
+ if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
- t->base.dirty_images[0] = ~0;
- }
-
- ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
-
- if ( t->base.dirty_images[0] ) {
- R200_FIREVERTICES( rmesa );
- r200SetTexImages( rmesa, tObj );
- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
- if ( !t->base.memBlock &&
- !t->image_override &&
- !rmesa->prefer_gart_client_texturing )
- return GL_FALSE;
}
- set_re_cntl_d3d( ctx, unit, GL_FALSE );
-
- return GL_TRUE;
}
-
-static GLboolean update_tex_common( GLcontext *ctx, int unit )
+static GLboolean r200_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
-
- /* Fallback if there's a texture border */
- if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 )
- return GL_FALSE;
-
- /* Update state if this is a different texture object to last
- * time.
- */
- if ( rmesa->state.texture.unit[unit].texobj != t ) {
- if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
- /* The old texture is no longer bound to this texture unit.
- * Mark it as such.
- */
+ radeonTexObj *t = radeon_tex_obj(texObj);
- rmesa->state.texture.unit[unit].texobj->base.bound &=
- ~(1UL << unit);
- }
-
- rmesa->state.texture.unit[unit].texobj = t;
- t->base.bound |= (1UL << unit);
- t->dirty_state |= 1<<unit;
- driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
- }
-
-
- /* Newly enabled?
- */
- if ( 1|| !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit))) {
- R200_STATECHANGE( rmesa, ctx );
- rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
-
- R200_STATECHANGE( rmesa, vtx );
- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
+ if (!radeon_validate_texture_miptree(ctx, texObj))
+ return GL_FALSE;
- rmesa->recheck_texgen[unit] = GL_TRUE;
- }
+ r200_validate_texgen(ctx, unit);
+ /* Configure the hardware registers (more precisely, the cached version
+ * of the hardware registers). */
+ setup_hardware_state(rmesa, t);
+
+ if (texObj->Target == GL_TEXTURE_RECTANGLE_NV ||
+ texObj->Target == GL_TEXTURE_2D ||
+ texObj->Target == GL_TEXTURE_1D)
+ set_re_cntl_d3d( ctx, unit, GL_FALSE );
+ else
+ set_re_cntl_d3d( ctx, unit, GL_TRUE );
+ R200_STATECHANGE( rmesa, ctx );
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
+
+ R200_STATECHANGE( rmesa, vtx );
+ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
- if (t->dirty_state & (1<<unit)) {
- import_tex_obj_state( rmesa, unit, t );
- }
+ rmesa->recheck_texgen[unit] = GL_TRUE;
+ import_tex_obj_state( rmesa, unit, t );
if (rmesa->recheck_texgen[unit]) {
GLboolean fallback = !r200_validate_texgen( ctx, unit );
TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
rmesa->recheck_texgen[unit] = 0;
- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
}
- FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback );
- return !t->border_fallback;
-}
+ t->validated = GL_TRUE;
+ FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
+ return !t->border_fallback;
+}
-static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit )
+static GLboolean r200UpdateTextureUnit(GLcontext *ctx, int unit)
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;
- if ( unitneeded & (TEXTURE_RECT_BIT) ) {
- return (enable_tex_rect( ctx, unit ) &&
- update_tex_common( ctx, unit ));
- }
- else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
- return (enable_tex_2d( ctx, unit ) &&
- update_tex_common( ctx, unit ));
- }
-#if ENABLE_HW_3D_TEXTURE
- else if ( unitneeded & (TEXTURE_3D_BIT) ) {
- return (enable_tex_3d( ctx, unit ) &&
- update_tex_common( ctx, unit ));
- }
-#endif
- else if ( unitneeded & (TEXTURE_CUBE_BIT) ) {
- return (enable_tex_cube( ctx, unit ) &&
- update_tex_common( ctx, unit ));
- }
- else if ( unitneeded ) {
- return GL_FALSE;
- }
- else {
- disable_tex( ctx, unit );
- return GL_TRUE;
+ if (!unitneeded) {
+ /* disable the unit */
+ disable_tex_obj_state(rmesa, unit);
+ return GL_TRUE;
}
+
+ if (!r200_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
+ _mesa_warning(ctx,
+ "failed to validate texture for unit %d.\n",
+ unit);
+ rmesa->state.texture.unit[unit].texobj = NULL;
+ return GL_FALSE;
+ }
+
+ rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+ return GL_TRUE;
}
@@ -1850,11 +1621,11 @@ void r200UpdateTextureState( GLcontext *ctx )
FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
- if (rmesa->TclFallback)
+ if (rmesa->radeon.TclFallback)
r200ChooseVertexState( ctx );
- if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
+ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
/*
* T0 hang workaround -------------
@@ -1867,7 +1638,7 @@ void r200UpdateTextureState( GLcontext *ctx )
R200_STATECHANGE(rmesa, tex[1]);
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
- rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+ rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
}
else if (!ctx->ATIFragmentShader._Enabled) {
diff --git a/r200/r200_vertprog.c b/r200/r200_vertprog.c
index 4ce93b5..11405d7 100644
--- a/r200/r200_vertprog.c
+++ b/r200/r200_vertprog.c
@@ -423,7 +423,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte
~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
- if (R200_DEBUG & DEBUG_FALLBACKS) {
+ if (R200_DEBUG & RADEON_FALLBACKS) {
fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
mesa_vp->Base.InputsRead);
}
@@ -436,7 +436,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte
(1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) |
(1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) |
(1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) {
- if (R200_DEBUG & DEBUG_FALLBACKS) {
+ if (R200_DEBUG & RADEON_FALLBACKS) {
fprintf(stderr, "can't handle vert prog outputs 0x%x\n",
mesa_vp->Base.OutputsWritten);
}
@@ -551,7 +551,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte
if (mesa_vp->Base.InputsRead & (1 << i)) {
array_count++;
if (array_count > 12) {
- if (R200_DEBUG & DEBUG_FALLBACKS) {
+ if (R200_DEBUG & RADEON_FALLBACKS) {
fprintf(stderr, "more than 12 attribs used in vert prog\n");
}
return GL_FALSE;
@@ -571,13 +571,13 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte
}
if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
- if (R200_DEBUG & DEBUG_FALLBACKS) {
+ if (R200_DEBUG & RADEON_FALLBACKS) {
fprintf(stderr, "can't handle vert prog without position output\n");
}
return GL_FALSE;
}
if (free_inputs & 1) {
- if (R200_DEBUG & DEBUG_FALLBACKS) {
+ if (R200_DEBUG & RADEON_FALLBACKS) {
fprintf(stderr, "can't handle vert prog without position input\n");
}
return GL_FALSE;
@@ -1070,7 +1070,7 @@ else {
mesa_vp->Base.NumTemporaries + u_temp_used;
}
if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
- if (R200_DEBUG & DEBUG_FALLBACKS) {
+ if (R200_DEBUG & RADEON_FALLBACKS) {
fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
}
return GL_FALSE;
@@ -1078,7 +1078,7 @@ else {
u_temp_i = R200_VSF_MAX_TEMPS - 1;
if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
mesa_vp->Base.NumNativeInstructions = 129;
- if (R200_DEBUG & DEBUG_FALLBACKS) {
+ if (R200_DEBUG & RADEON_FALLBACKS) {
fprintf(stderr, "more than 128 native instructions\n");
}
return GL_FALSE;
@@ -1110,9 +1110,9 @@ void r200SetupVertexProg( GLcontext *ctx ) {
}
/* could optimize setting up vertex progs away for non-tcl hw */
fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
- rmesa->r200Screen->drmSupportsVertexProgram);
+ rmesa->radeon.radeonScreen->drmSupportsVertexProgram);
TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
- if (rmesa->TclFallback) return;
+ if (rmesa->radeon.TclFallback) return;
R200_STATECHANGE( rmesa, vap );
/* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
diff --git a/r300/Makefile.am b/r300/Makefile.am
index a1fa2ef..a678ddb 100644
--- a/r300/Makefile.am
+++ b/r300/Makefile.am
@@ -1,3 +1,5 @@
+SUBDIRS = compiler
+
AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
R300_CFLAGS = -DCOMPILE_R300 -DR200_MERGED=0 -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300
@@ -7,33 +9,40 @@ r300_dri_la_LTLIBRARIES = r300_dri.la
r300_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(R300_CFLAGS)
r300_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \
$(DRM_LIBS) $(DRI_LIBS)
+r300_dri_la_LIBADD = compiler/libr300compiler.la
r300_dri_ladir = @libdir@/dri
r300_dri_la_SOURCES = \
+ ../radeon/radeon_bo_legacy.c \
+ ../radeon/radeon_buffer_objects.c \
+ ../radeon/radeon_common_context.c \
+ ../radeon/radeon_common.c \
+ ../radeon/radeon_cs_legacy.c \
+ ../radeon/radeon_dma.c \
+ ../radeon/radeon_debug.c \
+ ../radeon/radeon_fbo.c \
+ ../radeon/radeon_lock.c \
+ ../radeon/radeon_mipmap_tree.c \
+ ../radeon/radeon_span.c \
+ ../radeon/radeon_queryobj.c \
+ ../radeon/radeon_texture.c \
../radeon/radeon_screen.c \
- radeon_context.c \
- radeon_ioctl.c \
- radeon_lock.c \
- radeon_span.c \
- radeon_state.c \
- r300_mem.c \
r300_context.c \
+ r300_draw.c \
r300_ioctl.c \
r300_cmdbuf.c \
r300_state.c \
r300_render.c \
- r300_texmem.c \
r300_tex.c \
r300_texstate.c \
- radeon_program.c \
- radeon_program_alu.c \
- radeon_program_pair.c \
- radeon_nqssadce.c \
r300_vertprog.c \
- r300_fragprog.c \
- r300_fragprog_swizzle.c \
- r300_fragprog_emit.c \
- r500_fragprog.c \
- r500_fragprog_emit.c \
+ r300_fragprog_common.c \
r300_shader.c \
r300_emit.c \
r300_swtcl.c
+
+if HAVE_LIBDRM_RADEON
+r300_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
+r300_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
+r300_dri_la_SOURCES += \
+ ../radeon/radeon_cs_space_drm.c
+endif
diff --git a/r300/compiler/Makefile.am b/r300/compiler/Makefile.am
new file mode 100644
index 0000000..e24a719
--- /dev/null
+++ b/r300/compiler/Makefile.am
@@ -0,0 +1,20 @@
+AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
+
+noinst_LTLIBRARIES = libr300compiler.la
+libr300compiler_la_CFLAGS = $(AM_CFLAGS) $(DRI_CFLAGS)
+libr300compiler_la_SOURCES = \
+ radeon_code.c \
+ radeon_compiler.c \
+ radeon_nqssadce.c \
+ radeon_program.c \
+ radeon_program_alu.c \
+ radeon_program_pair.c \
+ r3xx_fragprog.c \
+ r300_fragprog.c \
+ r300_fragprog_swizzle.c \
+ r300_fragprog_emit.c \
+ r500_fragprog.c \
+ r500_fragprog_emit.c \
+ r3xx_vertprog.c \
+ r3xx_vertprog_dump.c \
+ memory_pool.c
diff --git a/r300/compiler/memory_pool.c b/r300/compiler/memory_pool.c
new file mode 100644
index 0000000..37aa2b6
--- /dev/null
+++ b/r300/compiler/memory_pool.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "memory_pool.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#define POOL_LARGE_ALLOC 4096
+#define POOL_ALIGN 4
+
+
+struct memory_block {
+ struct memory_block * next;
+};
+
+void memory_pool_init(struct memory_pool * pool)
+{
+ memset(pool, 0, sizeof(struct memory_pool));
+}
+
+
+void memory_pool_destroy(struct memory_pool * pool)
+{
+ while(pool->blocks) {
+ struct memory_block * block = pool->blocks;
+ pool->blocks = block->next;
+ free(block);
+ }
+}
+
+static void refill_pool(struct memory_pool * pool)
+{
+ unsigned int blocksize = pool->total_allocated;
+ struct memory_block * newblock;
+
+ if (!blocksize)
+ blocksize = 2*POOL_LARGE_ALLOC;
+
+ newblock = (struct memory_block*)malloc(blocksize);
+ newblock->next = pool->blocks;
+ pool->blocks = newblock;
+
+ pool->head = (unsigned char*)(newblock + 1);
+ pool->end = ((unsigned char*)newblock) + blocksize;
+ pool->total_allocated += blocksize;
+}
+
+
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
+{
+ if (bytes < POOL_LARGE_ALLOC) {
+ if (pool->head + bytes > pool->end)
+ refill_pool(pool);
+
+ assert(pool->head + bytes <= pool->end);
+
+ void * ptr = pool->head;
+
+ pool->head += bytes;
+ pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
+
+ return ptr;
+ } else {
+ struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block));
+
+ block->next = pool->blocks;
+ pool->blocks = block;
+
+ return (block + 1);
+ }
+}
+
+
diff --git a/r300/compiler/memory_pool.h b/r300/compiler/memory_pool.h
new file mode 100644
index 0000000..ce23c31
--- /dev/null
+++ b/r300/compiler/memory_pool.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef MEMORY_POOL_H
+#define MEMORY_POOL_H
+
+struct memory_block;
+
+/**
+ * Provides a pool of memory that can quickly be allocated from, at the
+ * cost of being unable to explicitly free one of the allocated blocks.
+ * Instead, the entire pool can be freed at once.
+ *
+ * The idea is to allow one to quickly allocate a flexible amount of
+ * memory during operations like shader compilation while avoiding
+ * reference counting headaches.
+ */
+struct memory_pool {
+ unsigned char * head;
+ unsigned char * end;
+ unsigned int total_allocated;
+ struct memory_block * blocks;
+};
+
+
+void memory_pool_init(struct memory_pool * pool);
+void memory_pool_destroy(struct memory_pool * pool);
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
+
+#endif /* MEMORY_POOL_H */
diff --git a/r300/compiler/r300_fragprog.c b/r300/compiler/r300_fragprog.c
new file mode 100644
index 0000000..6c9fba4
--- /dev/null
+++ b/r300/compiler/r300_fragprog.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r300_fragprog.h"
+
+#include "shader/prog_parameter.h"
+
+#include "../r300_reg.h"
+
+static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
+{
+ struct prog_src_register reg = { 0, };
+
+ reg.File = PROGRAM_STATE_VAR;
+ reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
+ reg.Swizzle = SWIZZLE_WWWW;
+ return reg;
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following way:
+ * - premultiply texture coordinates for RECT
+ * - extract operand swizzles
+ * - introduce a temporary register when write masks are needed
+ */
+GLboolean r300_transform_TEX(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
+ void* data)
+{
+ struct r300_fragment_program_compiler *compiler =
+ (struct r300_fragment_program_compiler*)data;
+
+ if (inst->I.Opcode != OPCODE_TEX &&
+ inst->I.Opcode != OPCODE_TXB &&
+ inst->I.Opcode != OPCODE_TXP &&
+ inst->I.Opcode != OPCODE_KIL)
+ return GL_FALSE;
+
+ /* ARB_shadow & EXT_shadow_funcs */
+ if (inst->I.Opcode != OPCODE_KIL &&
+ c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+
+ if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
+ inst->I.Opcode = OPCODE_MOV;
+
+ if (comparefunc == GL_ALWAYS) {
+ inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
+ } else {
+ inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
+ }
+
+ return GL_TRUE;
+ } else {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+ GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
+ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
+ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
+ struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
+ int pass, fail;
+
+ inst_rcp->I.Opcode = OPCODE_RCP;
+ inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
+ inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ inst_cmp->I.DstReg = inst->I.DstReg;
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = rc_find_free_temporary(c);
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+
+ inst_mad->I.Opcode = OPCODE_MAD;
+ inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
+ inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
+ inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
+ if (depthmode == 0) /* GL_LUMINANCE */
+ inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ else if (depthmode == 2) /* GL_ALPHA */
+ inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
+
+ /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
+ * r < tex <=> -tex+r < 0
+ * r >= tex <=> not (-tex+r < 0 */
+ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
+ inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
+ else
+ inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
+
+ inst_cmp->I.Opcode = OPCODE_CMP;
+ /* DstReg has been filled out above */
+ inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
+
+ if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
+
+ inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
+ inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
+ inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
+ }
+ }
+
+ /* Hardware uses [0..1]x[0..1] range for rectangle textures
+ * instead of [0..Width]x[0..Height].
+ * Add a scaling instruction.
+ */
+ if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) {
+ struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mul->I.Opcode = OPCODE_MUL;
+ inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mul->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mul->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR;
+ inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit);
+
+ reset_srcreg(&inst->I.SrcReg[0]);
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index;
+ }
+
+ /* Cannot write texture to output registers or with masks */
+ if (inst->I.Opcode != OPCODE_KIL &&
+ (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
+
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg = inst->I.DstReg;
+ inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
+
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ }
+
+
+ /* Cannot read texture coordinate from constants file */
+ if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
+
+ reset_srcreg(&inst->I.SrcReg[0]);
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
+ }
+
+ return GL_TRUE;
+}
+
+/* just some random things... */
+void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
+{
+ struct r300_fragment_program_code *code = &c->code.r300;
+ int n, i, j;
+ static int pc = 0;
+
+ fprintf(stderr, "pc=%d*************************************\n", pc++);
+
+ fprintf(stderr, "Hardware program\n");
+ fprintf(stderr, "----------------\n");
+
+ for (n = 0; n <= (code->config & 3); n++) {
+ uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
+ int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT;
+ int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT;
+ int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
+ int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
+
+ fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
+ "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n,
+ alu_offset, tex_offset, alu_end, tex_end, code_addr);
+
+ if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
+ fprintf(stderr, " TEX:\n");
+ for (i = tex_offset;
+ i <= tex_offset + tex_end;
+ ++i) {
+ const char *instr;
+
+ switch ((code->tex.
+ inst[i] >> R300_TEX_INST_SHIFT) &
+ 15) {
+ case R300_TEX_OP_LD:
+ instr = "TEX";
+ break;
+ case R300_TEX_OP_KIL:
+ instr = "KIL";
+ break;
+ case R300_TEX_OP_TXP:
+ instr = "TXP";
+ break;
+ case R300_TEX_OP_TXB:
+ instr = "TXB";
+ break;
+ default:
+ instr = "UNKNOWN";
+ }
+
+ fprintf(stderr,
+ " %s t%i, %c%i, texture[%i] (%08x)\n",
+ instr,
+ (code->tex.
+ inst[i] >> R300_DST_ADDR_SHIFT) & 31,
+ 't',
+ (code->tex.
+ inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
+ (code->tex.
+ inst[i] & R300_TEX_ID_MASK) >>
+ R300_TEX_ID_SHIFT,
+ code->tex.inst[i]);
+ }
+ }
+
+ for (i = alu_offset;
+ i <= alu_offset + alu_end; ++i) {
+ char srcc[3][10], dstc[20];
+ char srca[3][10], dsta[20];
+ char argc[3][20];
+ char arga[3][20];
+ char flags[5], tmp[10];
+
+ for (j = 0; j < 3; ++j) {
+ int regc = code->alu.inst[i].rgb_addr >> (j * 6);
+ int rega = code->alu.inst[i].alpha_addr >> (j * 6);
+
+ sprintf(srcc[j], "%c%i",
+ (regc & 32) ? 'c' : 't', regc & 31);
+ sprintf(srca[j], "%c%i",
+ (rega & 32) ? 'c' : 't', rega & 31);
+ }
+
+ dstc[0] = 0;
+ sprintf(flags, "%s%s%s",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(dstc, "t%i.%s ",
+ (code->alu.inst[i].
+ rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
+ flags);
+ }
+ sprintf(flags, "%s%s%s",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(tmp, "o%i.%s",
+ (code->alu.inst[i].
+ rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
+ flags);
+ strcat(dstc, tmp);
+ }
+
+ dsta[0] = 0;
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
+ sprintf(dsta, "t%i.w ",
+ (code->alu.inst[i].
+ alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
+ }
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
+ sprintf(tmp, "o%i.w ",
+ (code->alu.inst[i].
+ alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
+ strcat(dsta, tmp);
+ }
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
+ strcat(dsta, "Z");
+ }
+
+ fprintf(stderr,
+ "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
+ " w: %3s %3s %3s -> %-20s (%08x)\n", i,
+ srcc[0], srcc[1], srcc[2], dstc,
+ code->alu.inst[i].rgb_addr, srca[0], srca[1],
+ srca[2], dsta, code->alu.inst[i].alpha_addr);
+
+ for (j = 0; j < 3; ++j) {
+ int regc = code->alu.inst[i].rgb_inst >> (j * 7);
+ int rega = code->alu.inst[i].alpha_inst >> (j * 7);
+ int d;
+ char buf[20];
+
+ d = regc & 31;
+ if (d < 12) {
+ switch (d % 4) {
+ case R300_ALU_ARGC_SRC0C_XYZ:
+ sprintf(buf, "%s.xyz",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_XXX:
+ sprintf(buf, "%s.xxx",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_YYY:
+ sprintf(buf, "%s.yyy",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_ZZZ:
+ sprintf(buf, "%s.zzz",
+ srcc[d / 4]);
+ break;
+ }
+ } else if (d < 15) {
+ sprintf(buf, "%s.www", srca[d - 12]);
+ } else if (d == 20) {
+ sprintf(buf, "0.0");
+ } else if (d == 21) {
+ sprintf(buf, "1.0");
+ } else if (d == 22) {
+ sprintf(buf, "0.5");
+ } else if (d >= 23 && d < 32) {
+ d -= 23;
+ switch (d / 3) {
+ case 0:
+ sprintf(buf, "%s.yzx",
+ srcc[d % 3]);
+ break;
+ case 1:
+ sprintf(buf, "%s.zxy",
+ srcc[d % 3]);
+ break;
+ case 2:
+ sprintf(buf, "%s.Wzy",
+ srcc[d % 3]);
+ break;
+ }
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(argc[j], "%s%s%s%s",
+ (regc & 32) ? "-" : "",
+ (regc & 64) ? "|" : "",
+ buf, (regc & 64) ? "|" : "");
+
+ d = rega & 31;
+ if (d < 9) {
+ sprintf(buf, "%s.%c", srcc[d / 3],
+ 'x' + (char)(d % 3));
+ } else if (d < 12) {
+ sprintf(buf, "%s.w", srca[d - 9]);
+ } else if (d == 16) {
+ sprintf(buf, "0.0");
+ } else if (d == 17) {
+ sprintf(buf, "1.0");
+ } else if (d == 18) {
+ sprintf(buf, "0.5");
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(arga[j], "%s%s%s%s",
+ (rega & 32) ? "-" : "",
+ (rega & 64) ? "|" : "",
+ buf, (rega & 64) ? "|" : "");
+ }
+
+ fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
+ " w: %8s %8s %8s op: %08x\n",
+ argc[0], argc[1], argc[2],
+ code->alu.inst[i].rgb_inst, arga[0], arga[1],
+ arga[2], code->alu.inst[i].alpha_inst);
+ }
+ }
+}
diff --git a/r300/compiler/r300_fragprog.h b/r300/compiler/r300_fragprog.h
new file mode 100644
index 0000000..0ac46db
--- /dev/null
+++ b/r300/compiler/r300_fragprog.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ * Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R300_FRAGPROG_H_
+#define __R300_FRAGPROG_H_
+
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+
+extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+
+extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
+
+extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data);
+
+#endif
diff --git a/r300/r300_fragprog_emit.c b/r300/compiler/r300_fragprog_emit.c
index 9f0b7e3..c7227bb 100644
--- a/r300/r300_fragprog_emit.c
+++ b/r300/compiler/r300_fragprog_emit.c
@@ -40,57 +40,43 @@
#include "r300_fragprog.h"
+#include "../r300_reg.h"
+
#include "radeon_program_pair.h"
#include "r300_fragprog_swizzle.h"
-#include "r300_reg.h"
+struct r300_emit_state {
+ struct r300_fragment_program_compiler * compiler;
+
+ unsigned current_node : 2;
+ unsigned node_first_tex : 8;
+ unsigned node_first_alu : 8;
+ uint32_t node_flags;
+};
+
#define PROG_CODE \
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
- struct r300_fragment_program_code *code = c->code
+ struct r300_emit_state * emit = (struct r300_emit_state*)data; \
+ struct r300_fragment_program_compiler *c = emit->compiler; \
+ struct r300_fragment_program_code *code = &c->code->code.r300
#define error(fmt, args...) do { \
- fprintf(stderr, "%s::%s(): " fmt "\n", \
+ rc_error(&c->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
-static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex)
-{
- PROG_CODE;
-
- for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
- if (code->constant[*hwindex].File == file &&
- code->constant[*hwindex].Index == index)
- break;
- }
-
- if (*hwindex >= code->const_nr) {
- if (*hwindex >= PFS_NUM_CONST_REGS) {
- error("Out of hw constants!\n");
- return GL_FALSE;
- }
-
- code->const_nr++;
- code->constant[*hwindex].File = file;
- code->constant[*hwindex].Index = index;
- }
-
- return GL_TRUE;
-}
-
-
/**
* Mark a temporary register as used.
*/
static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
{
- if (index > code->max_temp_idx)
- code->max_temp_idx = index;
+ if (index > code->pixsize)
+ code->pixsize = index;
}
-static GLuint translate_rgb_opcode(GLuint opcode)
+static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R300_ALU_OUTC_CMP;
@@ -109,7 +95,7 @@ static GLuint translate_rgb_opcode(GLuint opcode)
}
}
-static GLuint translate_alpha_opcode(GLuint opcode)
+static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R300_ALU_OUTA_CMP;
@@ -138,70 +124,69 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
{
PROG_CODE;
- if (code->alu.length >= PFS_MAX_ALU_INST) {
+ if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
error("Too many ALU instructions");
return GL_FALSE;
}
int ip = code->alu.length++;
int j;
- code->node[code->cur_node].alu_end++;
- code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode);
- code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode);
+ code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
+ code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
if (!inst->RGB.Src[j].Constant)
use_temporary(code, inst->RGB.Src[j].Index);
- code->alu.inst[ip].inst1 |= src << (6*j);
+ code->alu.inst[ip].rgb_addr |= src << (6*j);
src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
if (!inst->Alpha.Src[j].Constant)
use_temporary(code, inst->Alpha.Src[j].Index);
- code->alu.inst[ip].inst3 |= src << (6*j);
+ code->alu.inst[ip].alpha_addr |= src << (6*j);
GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
arg |= inst->RGB.Arg[j].Abs << 6;
arg |= inst->RGB.Arg[j].Negate << 5;
- code->alu.inst[ip].inst0 |= arg << (7*j);
+ code->alu.inst[ip].rgb_inst |= arg << (7*j);
arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
arg |= inst->Alpha.Arg[j].Abs << 6;
arg |= inst->Alpha.Arg[j].Negate << 5;
- code->alu.inst[ip].inst2 |= arg << (7*j);
+ code->alu.inst[ip].alpha_inst |= arg << (7*j);
}
if (inst->RGB.Saturate)
- code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP;
+ code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
if (inst->Alpha.Saturate)
- code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP;
+ code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
if (inst->RGB.WriteMask) {
use_temporary(code, inst->RGB.DestIndex);
- code->alu.inst[ip].inst1 |=
+ code->alu.inst[ip].rgb_addr |=
(inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
}
if (inst->RGB.OutputWriteMask) {
- code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
- code->node[code->cur_node].flags |= R300_RGBA_OUT;
+ code->alu.inst[ip].rgb_addr |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
+ emit->node_flags |= R300_RGBA_OUT;
}
if (inst->Alpha.WriteMask) {
use_temporary(code, inst->Alpha.DestIndex);
- code->alu.inst[ip].inst3 |=
+ code->alu.inst[ip].alpha_addr |=
(inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
R300_ALU_DSTA_REG;
}
if (inst->Alpha.OutputWriteMask) {
- code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT;
- code->node[code->cur_node].flags |= R300_RGBA_OUT;
+ code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT;
+ emit->node_flags |= R300_RGBA_OUT;
}
if (inst->Alpha.DepthWriteMask) {
- code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH;
- code->node[code->cur_node].flags |= R300_W_OUT;
- c->fp->WritesDepth = GL_TRUE;
+ code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
+ emit->node_flags |= R300_W_OUT;
+ c->code->writes_depth = GL_TRUE;
}
return GL_TRUE;
@@ -211,31 +196,50 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
/**
* Finish the current node without advancing to the next one.
*/
-static GLboolean finish_node(struct r300_fragment_program_compiler *c)
+static GLboolean finish_node(struct r300_emit_state * emit)
{
- struct r300_fragment_program_code *code = c->code;
- struct r300_fragment_program_node *node = &code->node[code->cur_node];
+ struct r300_fragment_program_compiler * c = emit->compiler;
+ struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
- if (node->alu_end < 0) {
+ if (code->alu.length == emit->node_first_alu) {
/* Generate a single NOP for this node */
struct radeon_pair_instruction inst;
_mesa_bzero(&inst, sizeof(inst));
- if (!emit_alu(c, &inst))
+ if (!emit_alu(emit, &inst))
return GL_FALSE;
}
- if (node->tex_end < 0) {
- if (code->cur_node == 0) {
- node->tex_end = 0;
- } else {
- error("Node %i has no TEX instructions", code->cur_node);
+ unsigned alu_offset = emit->node_first_alu;
+ unsigned alu_end = code->alu.length - alu_offset - 1;
+ unsigned tex_offset = emit->node_first_tex;
+ unsigned tex_end = code->tex.length - tex_offset - 1;
+
+ if (code->tex.length == emit->node_first_tex) {
+ if (emit->current_node > 0) {
+ error("Node %i has no TEX instructions", emit->current_node);
return GL_FALSE;
}
+
+ tex_end = 0;
} else {
- if (code->cur_node == 0)
- code->first_node_has_tex = 1;
+ if (emit->current_node == 0)
+ code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
}
+ /* Write the config register.
+ * Note: The order in which the words for each node are written
+ * is not correct here and needs to be fixed up once we're entirely
+ * done
+ *
+ * Also note that the register specification from AMD is slightly
+ * incorrect in its description of this register. */
+ code->code_addr[emit->current_node] =
+ (alu_offset << R300_ALU_START_SHIFT) |
+ (alu_end << R300_ALU_SIZE_SHIFT) |
+ (tex_offset << R300_TEX_START_SHIFT) |
+ (tex_end << R300_TEX_SIZE_SHIFT) |
+ emit->node_flags;
+
return GL_TRUE;
}
@@ -248,64 +252,61 @@ static GLboolean begin_tex(void* data)
{
PROG_CODE;
- if (code->cur_node == 0) {
- if (code->node[0].alu_end < 0 &&
- code->node[0].tex_end < 0)
- return GL_TRUE;
+ if (code->alu.length == emit->node_first_alu &&
+ code->tex.length == emit->node_first_tex) {
+ return GL_TRUE;
}
- if (code->cur_node == 3) {
+ if (emit->current_node == 3) {
error("Too many texture indirections");
return GL_FALSE;
}
- if (!finish_node(c))
+ if (!finish_node(emit))
return GL_FALSE;
- struct r300_fragment_program_node *node = &code->node[++code->cur_node];
- node->alu_offset = code->alu.length;
- node->alu_end = -1;
- node->tex_offset = code->tex.length;
- node->tex_end = -1;
+ emit->current_node++;
+ emit->node_first_tex = code->tex.length;
+ emit->node_first_alu = code->alu.length;
+ emit->node_flags = 0;
return GL_TRUE;
}
-static GLboolean emit_tex(void* data, struct prog_instruction* inst)
+static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst)
{
PROG_CODE;
- if (code->tex.length >= PFS_MAX_TEX_INST) {
+ if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
error("Too many TEX instructions");
return GL_FALSE;
}
GLuint unit = inst->TexSrcUnit;
- GLuint dest = inst->DstReg.Index;
+ GLuint dest = inst->DestIndex;
GLuint opcode;
switch(inst->Opcode) {
- case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
- case OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
- case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
- case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+ case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+ case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+ case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+ case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
default:
error("Unknown texture opcode %i", inst->Opcode);
return GL_FALSE;
}
- if (inst->Opcode == OPCODE_KIL) {
+ if (inst->Opcode == RADEON_OPCODE_KIL) {
unit = 0;
dest = 0;
} else {
use_temporary(code, dest);
}
- use_temporary(code, inst->SrcReg[0].Index);
+ use_temporary(code, inst->SrcIndex);
- code->node[code->cur_node].tex_end++;
code->tex.inst[code->tex.length++] =
- (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
+ (inst->SrcIndex << R300_SRC_ADDR_SHIFT) |
(dest << R300_DST_ADDR_SHIFT) |
(unit << R300_TEX_ID_SHIFT) |
(opcode << R300_TEX_INST_SHIFT);
@@ -314,31 +315,46 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst)
static const struct radeon_pair_handler pair_handler = {
- .EmitConst = &emit_const,
.EmitPaired = &emit_alu,
.EmitTex = &emit_tex,
.BeginTexBlock = &begin_tex,
- .MaxHwTemps = PFS_NUM_TEMP_REGS
+ .MaxHwTemps = R300_PFS_NUM_TEMP_REGS
};
/**
* Final compilation step: Turn the intermediate radeon_program into
* machine-readable instructions.
*/
-GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler)
+void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
{
- struct r300_fragment_program_code *code = compiler->code;
-
- _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
- code->node[0].alu_end = -1;
- code->node[0].tex_end = -1;
+ struct r300_emit_state emit;
+ struct r300_fragment_program_code *code = &compiler->code->code.r300;
- if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
- return GL_FALSE;
+ memset(&emit, 0, sizeof(emit));
+ emit.compiler = compiler;
- if (!finish_node(compiler))
- return GL_FALSE;
+ _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
- return GL_TRUE;
+ radeonPairProgram(compiler, &pair_handler, &emit);
+ if (compiler->Base.Error)
+ return;
+
+ /* Finish the program */
+ finish_node(&emit);
+
+ code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+ code->code_offset =
+ (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
+ ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
+ (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
+ ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
+
+ if (emit.current_node < 3) {
+ int shift = 3 - emit.current_node;
+ int i;
+ for(i = emit.current_node; i >= 0; --i)
+ code->code_addr[shift + i] = code->code_addr[i];
+ for(i = 0; i < shift; ++i)
+ code->code_addr[i] = 0;
+ }
}
-
diff --git a/r300/r300_fragprog_swizzle.c b/r300/compiler/r300_fragprog_swizzle.c
index fc9d855..1b14cc3 100644
--- a/r300/r300_fragprog_swizzle.c
+++ b/r300/compiler/r300_fragprog_swizzle.c
@@ -33,8 +33,9 @@
#include "r300_fragprog_swizzle.h"
-#include "r300_reg.h"
+#include "../r300_reg.h"
#include "radeon_nqssadce.h"
+#include "radeon_compiler.h"
#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO))
@@ -174,18 +175,15 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst,
}
}
- struct prog_instruction *inst;
-
- _mesa_insert_instructions(s->Program, s->IP, 1);
- inst = s->Program->Instructions + s->IP++;
- inst->Opcode = OPCODE_MOV;
- inst->DstReg = dst;
- inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
- inst->SrcReg[0] = src;
- inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
+ struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg = dst;
+ inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
+ inst->I.SrcReg[0] = src;
+ inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
/* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
- dst.WriteMask &= ~inst->DstReg.WriteMask;
+ dst.WriteMask &= ~inst->I.DstReg.WriteMask;
}
}
diff --git a/r300/r300_fragprog_swizzle.h b/r300/compiler/r300_fragprog_swizzle.h
index 231bf4e..231bf4e 100644
--- a/r300/r300_fragprog_swizzle.h
+++ b/r300/compiler/r300_fragprog_swizzle.h
diff --git a/r300/compiler/r3xx_fragprog.c b/r300/compiler/r3xx_fragprog.c
new file mode 100644
index 0000000..76c3a7e
--- /dev/null
+++ b/r300/compiler/r3xx_fragprog.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+
+#include "radeon_nqssadce.h"
+#include "radeon_program_alu.h"
+#include "r300_fragprog.h"
+#include "r300_fragprog_swizzle.h"
+#include "r500_fragprog.h"
+
+
+static void nqssadce_init(struct nqssadce_state* s)
+{
+ struct r300_fragment_program_compiler * c = s->UserData;
+ s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW;
+ s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W;
+}
+
+static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
+{
+ struct rc_instruction *rci;
+
+ for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
+ struct prog_instruction * inst = &rci->I;
+
+ if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth)
+ continue;
+
+ if (inst->DstReg.WriteMask & WRITEMASK_Z) {
+ inst->DstReg.WriteMask = WRITEMASK_W;
+ } else {
+ inst->DstReg.WriteMask = 0;
+ continue;
+ }
+
+ switch (inst->Opcode) {
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ break;
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ break;
+ case OPCODE_CMP:
+ case OPCODE_MAD:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
+ break;
+ default:
+ // Scalar instructions needn't be reswizzled
+ break;
+ }
+ }
+}
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
+{
+ rewrite_depth_out(c);
+
+ if (c->is_r500) {
+ struct radeon_program_transformation transformations[] = {
+ { &r500_transform_TEX, c },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformDeriv, 0 },
+ { &radeonTransformTrigScale, 0 }
+ };
+ radeonLocalTransform(&c->Base, 4, transformations);
+ } else {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_TEX, c },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformTrigSimple, 0 }
+ };
+ radeonLocalTransform(&c->Base, 3, transformations);
+ }
+
+ if (c->Base.Debug) {
+ _mesa_printf("Fragment Program: After native rewrite:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stderr);
+ }
+
+ if (c->is_r500) {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r500FPIsNativeSwizzle,
+ .BuildSwizzle = &r500FPBuildSwizzle
+ };
+ radeonNqssaDce(&c->Base, &nqssadce, c);
+ } else {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r300FPIsNativeSwizzle,
+ .BuildSwizzle = &r300FPBuildSwizzle
+ };
+ radeonNqssaDce(&c->Base, &nqssadce, c);
+ }
+
+ if (c->Base.Debug) {
+ _mesa_printf("Compiler: after NqSSA-DCE:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stderr);
+ }
+
+ if (c->is_r500) {
+ r500BuildFragmentProgramHwCode(c);
+ } else {
+ r300BuildFragmentProgramHwCode(c);
+ }
+
+ rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+
+ if (c->Base.Debug) {
+ if (c->is_r500) {
+ r500FragmentProgramDump(c->code);
+ } else {
+ r300FragmentProgramDump(c->code);
+ }
+ }
+}
diff --git a/r300/compiler/r3xx_vertprog.c b/r300/compiler/r3xx_vertprog.c
new file mode 100644
index 0000000..dad27fc
--- /dev/null
+++ b/r300/compiler/r3xx_vertprog.c
@@ -0,0 +1,656 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_nqssadce.h"
+#include "radeon_program.h"
+#include "radeon_program_alu.h"
+
+#include "shader/prog_print.h"
+
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y) \
+ (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_src_class(vpi->SrcReg[x].File), \
+ NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+
+
+static unsigned long t_dst_mask(GLuint mask)
+{
+ /* WRITEMASK_* is equivalent to VSF_FLAG_* */
+ return mask & WRITEMASK_XYZW;
+}
+
+static unsigned long t_dst_class(gl_register_file file)
+{
+
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ return PVS_DST_REG_TEMPORARY;
+ case PROGRAM_OUTPUT:
+ return PVS_DST_REG_OUT;
+ case PROGRAM_ADDRESS:
+ return PVS_DST_REG_A0;
+ /*
+ case PROGRAM_INPUT:
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_WRITE_ONLY:
+ case PROGRAM_ADDRESS:
+ */
+ default:
+ fprintf(stderr, "problem in %s", __FUNCTION__);
+ _mesa_exit(-1);
+ return -1;
+ }
+}
+
+static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
+ struct prog_dst_register *dst)
+{
+ if (dst->File == PROGRAM_OUTPUT)
+ return vp->outputs[dst->Index];
+
+ return dst->Index;
+}
+
+static unsigned long t_src_class(gl_register_file file)
+{
+ switch (file) {
+ case PROGRAM_BUILTIN:
+ case PROGRAM_TEMPORARY:
+ return PVS_SRC_REG_TEMPORARY;
+ case PROGRAM_INPUT:
+ return PVS_SRC_REG_INPUT;
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_STATE_VAR:
+ return PVS_SRC_REG_CONSTANT;
+ /*
+ case PROGRAM_OUTPUT:
+ case PROGRAM_WRITE_ONLY:
+ case PROGRAM_ADDRESS:
+ */
+ default:
+ fprintf(stderr, "problem in %s", __FUNCTION__);
+ _mesa_exit(-1);
+ return -1;
+ }
+}
+
+static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b)
+{
+ unsigned long aclass = t_src_class(a.File);
+ unsigned long bclass = t_src_class(b.File);
+
+ if (aclass != bclass)
+ return GL_FALSE;
+ if (aclass == PVS_SRC_REG_TEMPORARY)
+ return GL_FALSE;
+
+ if (a.RelAddr || b.RelAddr)
+ return GL_TRUE;
+ if (a.Index != b.Index)
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+static INLINE unsigned long t_swizzle(GLubyte swizzle)
+{
+ /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+ return swizzle;
+}
+
+static unsigned long t_src_index(struct r300_vertex_program_code *vp,
+ struct prog_src_register *src)
+{
+ if (src->File == PROGRAM_INPUT) {
+ assert(vp->inputs[src->Index] != -1);
+ return vp->inputs[src->Index];
+ } else {
+ if (src->Index < 0) {
+ fprintf(stderr,
+ "negative offsets for indirect addressing do not work.\n");
+ return 0;
+ }
+ return src->Index;
+ }
+}
+
+/* these two functions should probably be merged... */
+
+static unsigned long t_src(struct r300_vertex_program_code *vp,
+ struct prog_src_register *src)
+{
+ /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 1)),
+ t_swizzle(GET_SWZ(src->Swizzle, 2)),
+ t_swizzle(GET_SWZ(src->Swizzle, 3)),
+ t_src_class(src->File),
+ src->Negate) | (src->RelAddr << 4);
+}
+
+static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
+ struct prog_src_register *src)
+{
+ /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_src_class(src->File),
+ src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src->RelAddr << 4);
+}
+
+static GLboolean valid_dst(struct r300_vertex_program_code *vp,
+ struct prog_dst_register *dst)
+{
+ if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
+ return GL_FALSE;
+ } else if (dst->File == PROGRAM_ADDRESS) {
+ assert(dst->Index == 0);
+ }
+
+ return GL_TRUE;
+}
+
+static void ei_vector1(struct r300_vertex_program_code *vp,
+ GLuint hw_opcode,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+}
+
+static void ei_vector2(struct r300_vertex_program_code *vp,
+ GLuint hw_opcode,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = t_src(vp, &vpi->SrcReg[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+}
+
+static void ei_math1(struct r300_vertex_program_code *vp,
+ GLuint hw_opcode,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+}
+
+static void ei_lit(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+
+ inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ /* NOTE: Users swizzling might not work. */
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+ inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+}
+
+static void ei_mad(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ /* Remarks about hardware limitations of MAD
+ * (please preserve this comment, as this information is _NOT_
+ * in the documentation provided by AMD).
+ *
+ * As described in the documentation, MAD with three unique temporary
+ * source registers requires the use of the macro version.
+ *
+ * However (and this is not mentioned in the documentation), apparently
+ * the macro version is _NOT_ a full superset of the normal version.
+ * In particular, the macro version does not always work when relative
+ * addressing is used in the source operands.
+ *
+ * This limitation caused incorrect rendering in Sauerbraten's OpenGL
+ * assembly shader path when using medium quality animations
+ * (i.e. animations with matrix blending instead of quaternion blending).
+ *
+ * Unfortunately, I (nha) have been unable to extract a Piglit regression
+ * test for this issue - for some reason, it is possible to have vertex
+ * programs whose prefix is *exactly* the same as the prefix of the
+ * offending program in Sauerbraten up to the offending instruction
+ * without causing any trouble.
+ *
+ * Bottom line: Only use the macro version only when really necessary;
+ * according to AMD docs, this should improve performance by one clock
+ * as a nice side bonus.
+ */
+ if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY &&
+ vpi->SrcReg[1].File == PROGRAM_TEMPORARY &&
+ vpi->SrcReg[2].File == PROGRAM_TEMPORARY &&
+ vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
+ vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
+ vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
+ inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+ GL_FALSE,
+ GL_TRUE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ } else {
+ inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ }
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = t_src(vp, &vpi->SrcReg[1]);
+ inst[3] = t_src(vp, &vpi->SrcReg[2]);
+}
+
+static void ei_pow(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
+}
+
+
+static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
+{
+ struct rc_instruction *rci;
+
+ compiler->code->pos_end = 0; /* Not supported yet */
+ compiler->code->length = 0;
+
+ compiler->SetHwInputOutput(compiler);
+
+ for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
+ struct prog_instruction *vpi = &rci->I;
+ GLuint *inst = compiler->code->body.d + compiler->code->length;
+
+ /* Skip instructions writing to non-existing destination */
+ if (!valid_dst(compiler->code, &vpi->DstReg))
+ continue;
+
+ if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+ rc_error(&compiler->Base, "Vertex program has too many instructions\n");
+ return;
+ }
+
+ switch (vpi->Opcode) {
+ case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+ case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+ case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+ case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+ case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+ case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+ case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+ case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+ case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+ case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+ case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+ case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+ case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+ case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+ case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+ case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+ case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+ case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+ default:
+ rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
+ return;
+ }
+
+ compiler->code->length += 4;
+
+ if (compiler->Base.Error)
+ return;
+ }
+}
+
+struct temporary_allocation {
+ GLuint Allocated:1;
+ GLuint HwTemp:15;
+ struct rc_instruction * LastRead;
+};
+
+static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
+{
+ struct rc_instruction *inst;
+ GLuint num_orig_temps = 0;
+ GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ struct temporary_allocation * ta;
+ GLuint i, j;
+
+ compiler->code->num_temporaries = 0;
+ memset(hwtemps, 0, sizeof(hwtemps));
+
+ /* Pass 1: Count original temporaries and allocate structures */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+
+ for (i = 0; i < numsrcs; ++i) {
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
+ if (inst->I.SrcReg[i].Index >= num_orig_temps)
+ num_orig_temps = inst->I.SrcReg[i].Index + 1;
+ }
+ }
+
+ if (numdsts) {
+ if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
+ if (inst->I.DstReg.Index >= num_orig_temps)
+ num_orig_temps = inst->I.DstReg.Index + 1;
+ }
+ }
+ }
+
+ ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
+ sizeof(struct temporary_allocation) * num_orig_temps);
+ memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
+
+ /* Pass 2: Determine original temporary lifetimes */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+
+ for (i = 0; i < numsrcs; ++i) {
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY)
+ ta[inst->I.SrcReg[i].Index].LastRead = inst;
+ }
+ }
+
+ /* Pass 3: Register allocation */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+
+ for (i = 0; i < numsrcs; ++i) {
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
+ GLuint orig = inst->I.SrcReg[i].Index;
+ inst->I.SrcReg[i].Index = ta[orig].HwTemp;
+
+ if (ta[orig].Allocated && inst == ta[orig].LastRead)
+ hwtemps[ta[orig].HwTemp] = GL_FALSE;
+ }
+ }
+
+ if (numdsts) {
+ if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
+ GLuint orig = inst->I.DstReg.Index;
+
+ if (!ta[orig].Allocated) {
+ for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
+ if (!hwtemps[j])
+ break;
+ }
+ if (j >= VSF_MAX_FRAGMENT_TEMPS) {
+ fprintf(stderr, "Out of hw temporaries\n");
+ } else {
+ ta[orig].Allocated = GL_TRUE;
+ ta[orig].HwTemp = j;
+ hwtemps[j] = GL_TRUE;
+
+ if (j >= compiler->code->num_temporaries)
+ compiler->code->num_temporaries = j + 1;
+ }
+ }
+
+ inst->I.DstReg.Index = ta[orig].HwTemp;
+ }
+ }
+ }
+}
+
+
+/**
+ * Vertex engine cannot read two inputs or two constants at the same time.
+ * Introduce intermediate MOVs to temporary registers to account for this.
+ */
+static GLboolean transform_source_conflicts(
+ struct radeon_compiler *c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
+
+ if (num_operands == 3) {
+ if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
+ || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
+ int tmpreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = tmpreg;
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
+
+ reset_srcreg(&inst->I.SrcReg[2]);
+ inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[2].Index = tmpreg;
+ }
+ }
+
+ if (num_operands >= 2) {
+ if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
+ int tmpreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = tmpreg;
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
+
+ reset_srcreg(&inst->I.SrcReg[1]);
+ inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[1].Index = tmpreg;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
+{
+ int i;
+
+ for(i = 0; i < 32; ++i) {
+ if ((compiler->RequiredOutputs & (1 << i)) &&
+ !(compiler->Base.Program.OutputsWritten & (1 << i))) {
+ struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
+ inst->I.Opcode = OPCODE_MOV;
+
+ inst->I.DstReg.File = PROGRAM_OUTPUT;
+ inst->I.DstReg.Index = i;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+
+ inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
+ inst->I.SrcReg[0].Index = 0;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ compiler->Base.Program.OutputsWritten |= 1 << i;
+ }
+ }
+}
+
+static void nqssadceInit(struct nqssadce_state* s)
+{
+ struct r300_vertex_program_compiler * compiler = s->UserData;
+ int i;
+
+ for(i = 0; i < VERT_RESULT_MAX; ++i) {
+ if (compiler->RequiredOutputs & (1 << i))
+ s->Outputs[i].Sourced = WRITEMASK_XYZW;
+ }
+}
+
+static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
+{
+ (void) opcode;
+ (void) reg;
+
+ return GL_TRUE;
+}
+
+
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
+{
+ addArtificialOutputs(compiler);
+
+ {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_vertex_alu, 0 },
+ };
+ radeonLocalTransform(&compiler->Base, 1, transformations);
+ }
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after native rewrite:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stderr);
+ }
+
+ {
+ /* Note: This pass has to be done seperately from ALU rewrite,
+ * otherwise non-native ALU instructions with source conflits
+ * will not be treated properly.
+ */
+ struct radeon_program_transformation transformations[] = {
+ { &transform_source_conflicts, 0 },
+ };
+ radeonLocalTransform(&compiler->Base, 1, transformations);
+ }
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after source conflict resolve:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stderr);
+ }
+
+ {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadceInit,
+ .IsNativeSwizzle = &swizzleIsNative,
+ .BuildSwizzle = NULL
+ };
+ radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
+
+ /* We need this step for reusing temporary registers */
+ allocate_temporary_registers(compiler);
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after NQSSADCE:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stderr);
+ }
+ }
+
+ translate_vertex_program(compiler);
+
+ rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
+
+ compiler->code->InputsRead = compiler->Base.Program.InputsRead;
+ compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Final vertex program code:\n");
+ r300_vertex_program_dump(compiler->code);
+ }
+}
diff --git a/r300/compiler/r3xx_vertprog_dump.c b/r300/compiler/r3xx_vertprog_dump.c
new file mode 100644
index 0000000..980ef3e
--- /dev/null
+++ b/r300/compiler/r3xx_vertprog_dump.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_code.h"
+
+#include <stdio.h>
+
+static char* r300_vs_ve_ops[] = {
+ /* R300 vector ops */
+ " VE_NO_OP",
+ " VE_DOT_PRODUCT",
+ " VE_MULTIPLY",
+ " VE_ADD",
+ " VE_MULTIPLY_ADD",
+ " VE_DISTANCE_FACTOR",
+ " VE_FRACTION",
+ " VE_MAXIMUM",
+ " VE_MINIMUM",
+ "VE_SET_GREATER_THAN_EQUAL",
+ " VE_SET_LESS_THAN",
+ " VE_MULTIPLYX2_ADD",
+ " VE_MULTIPLY_CLAMP",
+ " VE_FLT2FIX_DX",
+ " VE_FLT2FIX_DX_RND",
+ /* R500 vector ops */
+ " VE_PRED_SET_EQ_PUSH",
+ " VE_PRED_SET_GT_PUSH",
+ " VE_PRED_SET_GTE_PUSH",
+ " VE_PRED_SET_NEQ_PUSH",
+ " VE_COND_WRITE_EQ",
+ " VE_COND_WRITE_GT",
+ " VE_COND_WRITE_GTE",
+ " VE_COND_WRITE_NEQ",
+ " VE_SET_GREATER_THAN",
+ " VE_SET_EQUAL",
+ " VE_SET_NOT_EQUAL",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+static char* r300_vs_me_ops[] = {
+ /* R300 math ops */
+ " ME_NO_OP",
+ " ME_EXP_BASE2_DX",
+ " ME_LOG_BASE2_DX",
+ " ME_EXP_BASEE_FF",
+ " ME_LIGHT_COEFF_DX",
+ " ME_POWER_FUNC_FF",
+ " ME_RECIP_DX",
+ " ME_RECIP_FF",
+ " ME_RECIP_SQRT_DX",
+ " ME_RECIP_SQRT_FF",
+ " ME_MULTIPLY",
+ " ME_EXP_BASE2_FULL_DX",
+ " ME_LOG_BASE2_FULL_DX",
+ " ME_POWER_FUNC_FF_CLAMP_B",
+ "ME_POWER_FUNC_FF_CLAMP_B1",
+ "ME_POWER_FUNC_FF_CLAMP_01",
+ " ME_SIN",
+ " ME_COS",
+ /* R500 math ops */
+ " ME_LOG_BASE2_IEEE",
+ " ME_RECIP_IEEE",
+ " ME_RECIP_SQRT_IEEE",
+ " ME_PRED_SET_EQ",
+ " ME_PRED_SET_GT",
+ " ME_PRED_SET_GTE",
+ " ME_PRED_SET_NEQ",
+ " ME_PRED_SET_CLR",
+ " ME_PRED_SET_INV",
+ " ME_PRED_SET_POP",
+ " ME_PRED_SET_RESTORE",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+/* XXX refactor to avoid clashing symbols */
+static char* r300_vs_src_debug[] = {
+ "t",
+ "i",
+ "c",
+ "a",
+};
+
+static char* r300_vs_dst_debug[] = {
+ "t",
+ "a0",
+ "o",
+ "ox",
+ "a",
+ "i",
+ "u",
+ "u",
+};
+
+static char* r300_vs_swiz_debug[] = {
+ "X",
+ "Y",
+ "Z",
+ "W",
+ "0",
+ "1",
+ "U",
+ "U",
+};
+
+
+static void r300_vs_op_dump(uint32_t op)
+{
+ fprintf(stderr, " dst: %d%s op: ",
+ (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+ if (op & 0x80) {
+ if (op & 0x1) {
+ fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
+ } else {
+ fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n");
+ }
+ } else if (op & 0x40) {
+ fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]);
+ } else {
+ fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]);
+ }
+}
+
+static void r300_vs_src_dump(uint32_t src)
+{
+ fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
+ (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3],
+ src & (1 << 25) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 13) & 0x7],
+ src & (1 << 26) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 16) & 0x7],
+ src & (1 << 27) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 19) & 0x7],
+ src & (1 << 28) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 22) & 0x7]);
+}
+
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
+{
+ unsigned instrcount = vs->length / 4;
+ unsigned i;
+
+ for(i = 0; i < instrcount; i++) {
+ unsigned offset = i*4;
+ unsigned src;
+
+ fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]);
+ r300_vs_op_dump(vs->body.d[offset]);
+
+ for(src = 0; src < 3; ++src) {
+ fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]);
+ r300_vs_src_dump(vs->body.d[offset+1+src]);
+ }
+ }
+}
diff --git a/r300/compiler/r500_fragprog.c b/r300/compiler/r500_fragprog.c
new file mode 100644
index 0000000..7e2faed
--- /dev/null
+++ b/r300/compiler/r500_fragprog.c
@@ -0,0 +1,449 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include "../r300_reg.h"
+
+static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
+{
+ struct prog_src_register reg = { 0, };
+
+ reg.File = PROGRAM_STATE_VAR;
+ reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
+ reg.Swizzle = SWIZZLE_WWWW;
+ return reg;
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following way:
+ * - implement texture compare (shadow extensions)
+ * - extract non-native source / destination operands
+ */
+GLboolean r500_transform_TEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data)
+{
+ struct r300_fragment_program_compiler *compiler =
+ (struct r300_fragment_program_compiler*)data;
+
+ if (inst->I.Opcode != OPCODE_TEX &&
+ inst->I.Opcode != OPCODE_TXB &&
+ inst->I.Opcode != OPCODE_TXP &&
+ inst->I.Opcode != OPCODE_KIL)
+ return GL_FALSE;
+
+ /* ARB_shadow & EXT_shadow_funcs */
+ if (inst->I.Opcode != OPCODE_KIL &&
+ c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+
+ if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
+ inst->I.Opcode = OPCODE_MOV;
+
+ if (comparefunc == GL_ALWAYS) {
+ inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
+ } else {
+ inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
+ }
+
+ return GL_TRUE;
+ } else {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+ GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
+ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
+ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
+ struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
+ int pass, fail;
+
+ inst_rcp->I.Opcode = OPCODE_RCP;
+ inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
+ inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ inst_cmp->I.DstReg = inst->I.DstReg;
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = rc_find_free_temporary(c);
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+
+ inst_mad->I.Opcode = OPCODE_MAD;
+ inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
+ inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
+ inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
+ if (depthmode == 0) /* GL_LUMINANCE */
+ inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ else if (depthmode == 2) /* GL_ALPHA */
+ inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
+
+ /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
+ * r < tex <=> -tex+r < 0
+ * r >= tex <=> not (-tex+r < 0 */
+ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
+ inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
+ else
+ inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
+
+ inst_cmp->I.Opcode = OPCODE_CMP;
+ /* DstReg has been filled out above */
+ inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
+
+ if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
+
+ inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
+ inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
+ inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
+ }
+ }
+
+ /* Cannot write texture to output registers */
+ if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
+
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg = inst->I.DstReg;
+ inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
+
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ }
+
+ /* Cannot read texture coordinate from constants file */
+ if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
+
+ reset_srcreg(&inst->I.SrcReg[0]);
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
+{
+ GLuint relevant;
+ int i;
+
+ if (opcode == OPCODE_TEX ||
+ opcode == OPCODE_TXB ||
+ opcode == OPCODE_TXP ||
+ opcode == OPCODE_KIL) {
+ if (reg.Abs)
+ return GL_FALSE;
+
+ if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE))
+ return GL_FALSE;
+
+ if (reg.Negate)
+ reg.Negate ^= NEGATE_XYZW;
+
+ for(i = 0; i < 4; ++i) {
+ GLuint swz = GET_SWZ(reg.Swizzle, i);
+ if (swz == SWIZZLE_NIL) {
+ reg.Negate &= ~(1 << i);
+ continue;
+ }
+ if (swz >= 4)
+ return GL_FALSE;
+ }
+
+ if (reg.Negate)
+ return GL_FALSE;
+
+ return GL_TRUE;
+ } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
+ /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
+ * if it doesn't fit perfectly into a .xyzw case... */
+ if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate)
+ return GL_TRUE;
+
+ return GL_FALSE;
+ } else {
+ /* ALU instructions support almost everything */
+ if (reg.Abs)
+ return GL_TRUE;
+
+ relevant = 0;
+ for(i = 0; i < 3; ++i) {
+ GLuint swz = GET_SWZ(reg.Swizzle, i);
+ if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
+ relevant |= 1 << i;
+ }
+ if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+ return GL_FALSE;
+
+ return GL_TRUE;
+ }
+}
+
+/**
+ * Implement a MOV with a potentially non-native swizzle.
+ *
+ * The only thing we *cannot* do in an ALU instruction is per-component
+ * negation. Therefore, we split the MOV into two instructions when necessary.
+ */
+void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
+{
+ GLuint negatebase[2] = { 0, 0 };
+ int i;
+
+ for(i = 0; i < 4; ++i) {
+ GLuint swz = GET_SWZ(src.Swizzle, i);
+ if (swz == SWIZZLE_NIL)
+ continue;
+ negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
+ }
+
+ for(i = 0; i <= 1; ++i) {
+ if (!negatebase[i])
+ continue;
+
+ struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg = dst;
+ inst->I.DstReg.WriteMask = negatebase[i];
+ inst->I.SrcReg[0] = src;
+ inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
+ }
+}
+
+
+static char *toswiz(int swiz_val) {
+ switch(swiz_val) {
+ case 0: return "R";
+ case 1: return "G";
+ case 2: return "B";
+ case 3: return "A";
+ case 4: return "0";
+ case 5: return "1/2";
+ case 6: return "1";
+ case 7: return "U";
+ }
+ return NULL;
+}
+
+static char *toop(int op_val)
+{
+ char *str = NULL;
+ switch (op_val) {
+ case 0: str = "MAD"; break;
+ case 1: str = "DP3"; break;
+ case 2: str = "DP4"; break;
+ case 3: str = "D2A"; break;
+ case 4: str = "MIN"; break;
+ case 5: str = "MAX"; break;
+ case 6: str = "Reserved"; break;
+ case 7: str = "CND"; break;
+ case 8: str = "CMP"; break;
+ case 9: str = "FRC"; break;
+ case 10: str = "SOP"; break;
+ case 11: str = "MDH"; break;
+ case 12: str = "MDV"; break;
+ }
+ return str;
+}
+
+static char *to_alpha_op(int op_val)
+{
+ char *str = NULL;
+ switch (op_val) {
+ case 0: str = "MAD"; break;
+ case 1: str = "DP"; break;
+ case 2: str = "MIN"; break;
+ case 3: str = "MAX"; break;
+ case 4: str = "Reserved"; break;
+ case 5: str = "CND"; break;
+ case 6: str = "CMP"; break;
+ case 7: str = "FRC"; break;
+ case 8: str = "EX2"; break;
+ case 9: str = "LN2"; break;
+ case 10: str = "RCP"; break;
+ case 11: str = "RSQ"; break;
+ case 12: str = "SIN"; break;
+ case 13: str = "COS"; break;
+ case 14: str = "MDH"; break;
+ case 15: str = "MDV"; break;
+ }
+ return str;
+}
+
+static char *to_mask(int val)
+{
+ char *str = NULL;
+ switch(val) {
+ case 0: str = "NONE"; break;
+ case 1: str = "R"; break;
+ case 2: str = "G"; break;
+ case 3: str = "RG"; break;
+ case 4: str = "B"; break;
+ case 5: str = "RB"; break;
+ case 6: str = "GB"; break;
+ case 7: str = "RGB"; break;
+ case 8: str = "A"; break;
+ case 9: str = "AR"; break;
+ case 10: str = "AG"; break;
+ case 11: str = "ARG"; break;
+ case 12: str = "AB"; break;
+ case 13: str = "ARB"; break;
+ case 14: str = "AGB"; break;
+ case 15: str = "ARGB"; break;
+ }
+ return str;
+}
+
+static char *to_texop(int val)
+{
+ switch(val) {
+ case 0: return "NOP";
+ case 1: return "LD";
+ case 2: return "TEXKILL";
+ case 3: return "PROJ";
+ case 4: return "LODBIAS";
+ case 5: return "LOD";
+ case 6: return "DXDY";
+ }
+ return NULL;
+}
+
+void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
+{
+ struct r500_fragment_program_code *code = &c->code.r500;
+ fprintf(stderr, "R500 Fragment Program:\n--------\n");
+
+ int n;
+ uint32_t inst;
+ uint32_t inst0;
+ char *str = NULL;
+
+ for (n = 0; n < code->inst_end+1; n++) {
+ inst0 = inst = code->inst[n].inst0;
+ fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
+ switch(inst & 0x3) {
+ case R500_INST_TYPE_ALU: str = "ALU"; break;
+ case R500_INST_TYPE_OUT: str = "OUT"; break;
+ case R500_INST_TYPE_FC: str = "FC"; break;
+ case R500_INST_TYPE_TEX: str = "TEX"; break;
+ };
+ fprintf(stderr,"%s %s %s %s %s ", str,
+ inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
+ inst & R500_INST_LAST ? "LAST" : "",
+ inst & R500_INST_NOP ? "NOP" : "",
+ inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
+ fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
+ to_mask((inst >> 15) & 0xf));
+
+ switch(inst0 & 0x3) {
+ case 0:
+ case 1:
+ fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
+ inst = code->inst[n].inst1;
+
+ fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+ inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+ (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+ (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+ (inst >> 30));
+
+ fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
+ inst = code->inst[n].inst2;
+ fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+ inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+ (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+ (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+ (inst >> 30));
+ fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
+ inst = code->inst[n].inst3;
+ fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
+ (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
+ (inst >> 11) & 0x3,
+ (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
+ (inst >> 24) & 0x3);
+
+
+ fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
+ inst = code->inst[n].inst4;
+ fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
+ (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+ (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
+ (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+ (inst >> 31) & 0x1);
+
+ fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
+ inst = code->inst[n].inst5;
+ fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
+ (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+ (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
+ (inst >> 23) & 0x3,
+ (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
+ break;
+ case 2:
+ break;
+ case 3:
+ inst = code->inst[n].inst1;
+ fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
+ to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
+ (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
+ inst = code->inst[n].inst2;
+ fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
+ inst & 127, inst & (1<<7) ? "(rel)" : "",
+ toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
+ toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
+ (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
+ toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
+ toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
+
+ fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
+ break;
+ }
+ fprintf(stderr,"\n");
+ }
+
+}
diff --git a/r300/r500_fragprog.h b/r300/compiler/r500_fragprog.h
index 1e45538..9091f65 100644
--- a/r300/r500_fragprog.h
+++ b/r300/compiler/r500_fragprog.h
@@ -33,30 +33,23 @@
#ifndef __R500_FRAGPROG_H_
#define __R500_FRAGPROG_H_
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/program.h"
#include "shader/prog_instruction.h"
-#include "r300_context.h"
-#include "r300_state.h"
-#include "radeon_program.h"
+#include "radeon_compiler.h"
+#include "radeon_nqssadce.h"
-struct r500_fragment_program;
+extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
-extern void r500TranslateFragmentShader(r300ContextPtr r300,
- struct r500_fragment_program *fp);
+extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
-struct r500_fragment_program_compiler {
- r300ContextPtr r300;
- struct r500_fragment_program *fp;
- struct r500_fragment_program_code *code;
- struct gl_program *program;
-};
+extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
-extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler);
+extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src);
+
+extern GLboolean r500_transform_TEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data);
#endif
diff --git a/r300/r500_fragprog_emit.c b/r300/compiler/r500_fragprog_emit.c
index 4631235..d694725 100644
--- a/r300/r500_fragprog_emit.c
+++ b/r300/compiler/r500_fragprog_emit.c
@@ -45,47 +45,22 @@
#include "r500_fragprog.h"
+#include "../r300_reg.h"
+
#include "radeon_program_pair.h"
#define PROG_CODE \
- struct r500_fragment_program_compiler *c = (struct r500_fragment_program_compiler*)data; \
- struct r500_fragment_program_code *code = c->code
+ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
+ struct r500_fragment_program_code *code = &c->code->code.r500
#define error(fmt, args...) do { \
- fprintf(stderr, "%s::%s(): " fmt "\n", \
+ rc_error(&c->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
-/**
- * Callback to register hardware constants.
- */
-static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex)
-{
- PROG_CODE;
-
- for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
- if (code->constant[*hwindex].File == file &&
- code->constant[*hwindex].Index == idx)
- break;
- }
-
- if (*hwindex >= code->const_nr) {
- if (*hwindex >= PFS_NUM_CONST_REGS) {
- error("Out of hw constants!\n");
- return GL_FALSE;
- }
-
- code->const_nr++;
- code->constant[*hwindex].File = file;
- code->constant[*hwindex].Index = idx;
- }
-
- return GL_TRUE;
-}
-
-static GLuint translate_rgb_op(GLuint opcode)
+static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
@@ -106,7 +81,7 @@ static GLuint translate_rgb_op(GLuint opcode)
}
}
-static GLuint translate_alpha_op(GLuint opcode)
+static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R500_ALPHA_OP_CMP;
@@ -189,8 +164,8 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
int ip = ++code->inst_end;
- code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode);
- code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode);
+ code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
+ code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
@@ -202,7 +177,7 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
if (inst->Alpha.DepthWriteMask) {
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
- c->fp->writes_depth = GL_TRUE;
+ c->code->writes_depth = GL_TRUE;
}
code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
@@ -234,19 +209,19 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
return GL_TRUE;
}
-static GLuint translate_strq_swizzle(struct prog_src_register src)
+static GLuint translate_strq_swizzle(GLuint swizzle)
{
GLuint swiz = 0;
int i;
for (i = 0; i < 4; i++)
- swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2;
+ swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
return swiz;
}
/**
* Emit a single TEX instruction
*/
-static GLboolean emit_tex(void *data, struct prog_instruction *inst)
+static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst)
{
PROG_CODE;
@@ -258,7 +233,7 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst)
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_TEX
- | (inst->DstReg.WriteMask << 11)
+ | (inst->WriteMask << 11)
| R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
| R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
@@ -267,25 +242,25 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst)
code->inst[ip].inst1 |= R500_TEX_UNSCALED;
switch (inst->Opcode) {
- case OPCODE_KIL:
+ case RADEON_OPCODE_KIL:
code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
break;
- case OPCODE_TEX:
+ case RADEON_OPCODE_TEX:
code->inst[ip].inst1 |= R500_TEX_INST_LD;
break;
- case OPCODE_TXB:
+ case RADEON_OPCODE_TXB:
code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
break;
- case OPCODE_TXP:
+ case RADEON_OPCODE_TXP:
code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
break;
default:
error("emit_tex can't handle opcode %x\n", inst->Opcode);
}
- code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
- | (translate_strq_swizzle(inst->SrcReg[0]) << 8)
- | R500_TEX_DST_ADDR(inst->DstReg.Index)
+ code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex)
+ | (translate_strq_swizzle(inst->SrcSwizzle) << 8)
+ | R500_TEX_DST_ADDR(inst->DestIndex)
| R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
| R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
@@ -293,35 +268,32 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst)
}
static const struct radeon_pair_handler pair_handler = {
- .EmitConst = emit_const,
.EmitPaired = emit_paired,
.EmitTex = emit_tex,
.MaxHwTemps = 128
};
-GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler)
+void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
{
- struct r500_fragment_program_code *code = compiler->code;
+ struct r500_fragment_program_code *code = &compiler->code->code.r500;
_mesa_bzero(code, sizeof(*code));
code->max_temp_idx = 1;
- code->inst_offset = 0;
code->inst_end = -1;
- if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
- return GL_FALSE;
+ radeonPairProgram(compiler, &pair_handler, compiler);
+ if (compiler->Base.Error)
+ return;
if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
/* This may happen when dead-code elimination is disabled or
* when most of the fragment program logic is leading to a KIL */
if (code->inst_end >= 511) {
- error("Introducing fake OUT: Too many instructions");
- return GL_FALSE;
+ rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
+ return;
}
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
-
- return GL_TRUE;
}
diff --git a/r300/compiler/radeon_code.c b/r300/compiler/radeon_code.c
new file mode 100644
index 0000000..a9dedf7
--- /dev/null
+++ b/r300/compiler/radeon_code.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+
+#include "radeon_code.h"
+
+void rc_constants_init(struct rc_constant_list * c)
+{
+ memset(c, 0, sizeof(*c));
+}
+
+/**
+ * Copy a constants structure, assuming that the destination structure
+ * is not initialized.
+ */
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
+{
+ dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
+ memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
+ dst->Count = src->Count;
+ dst->_Reserved = src->Count;
+}
+
+void rc_constants_destroy(struct rc_constant_list * c)
+{
+ free(c->Constants);
+ memset(c, 0, sizeof(*c));
+}
+
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
+{
+ unsigned index = c->Count;
+
+ if (c->Count >= c->_Reserved) {
+ struct rc_constant * newlist;
+
+ c->_Reserved = c->_Reserved * 2;
+ if (!c->_Reserved)
+ c->_Reserved = 16;
+
+ newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
+ memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
+
+ free(c->Constants);
+ c->Constants = newlist;
+ }
+
+ c->Constants[index] = *constant;
+ c->Count++;
+
+ return index;
+}
+
+
+/**
+ * Add a state vector to the constant list, while trying to avoid duplicates.
+ */
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
+{
+ unsigned index;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_STATE) {
+ if (c->Constants[index].u.State[0] == state0 &&
+ c->Constants[index].u.State[1] == state1)
+ return index;
+ }
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_STATE;
+ constant.Size = 4;
+ constant.u.State[0] = state0;
+ constant.u.State[1] = state1;
+
+ return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate vector to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
+{
+ unsigned index;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+ if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
+ return index;
+ }
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 4;
+ memcpy(constant.u.Immediate, data, sizeof(float) * 4);
+
+ return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate scalar to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
+{
+ unsigned index;
+ int free_index = -1;
+ struct rc_constant constant;
+ unsigned comp;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+ for(comp = 0; comp < c->Constants[index].Size; ++comp) {
+ if (c->Constants[index].u.Immediate[comp] == data) {
+ *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ return index;
+ }
+ }
+
+ if (c->Constants[index].Size < 4)
+ free_index = index;
+ }
+ }
+
+ if (free_index >= 0) {
+ comp = c->Constants[free_index].Size++;
+ c->Constants[free_index].u.Immediate[comp] = data;
+ *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ return free_index;
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 1;
+ constant.u.Immediate[0] = data;
+ *swizzle = SWIZZLE_XXXX;
+
+ return rc_constants_add(c, &constant);
+}
diff --git a/r300/compiler/radeon_code.h b/r300/compiler/radeon_code.h
new file mode 100644
index 0000000..3e88554
--- /dev/null
+++ b/r300/compiler/radeon_code.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_CODE_H
+#define RADEON_CODE_H
+
+#include <stdint.h>
+
+#define R300_PFS_MAX_ALU_INST 64
+#define R300_PFS_MAX_TEX_INST 32
+#define R300_PFS_MAX_TEX_INDIRECT 4
+#define R300_PFS_NUM_TEMP_REGS 32
+#define R300_PFS_NUM_CONST_REGS 32
+
+#define R500_PFS_MAX_INST 512
+#define R500_PFS_NUM_TEMP_REGS 128
+#define R500_PFS_NUM_CONST_REGS 256
+
+
+#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+
+enum {
+ /**
+ * External constants are constants whose meaning is unknown to this
+ * compiler. For example, a Mesa gl_program's constants are turned
+ * into external constants.
+ */
+ RC_CONSTANT_EXTERNAL = 0,
+
+ RC_CONSTANT_IMMEDIATE,
+
+ /**
+ * Constant referring to state that is known by this compiler,
+ * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
+ */
+ RC_CONSTANT_STATE
+};
+
+enum {
+ RC_STATE_SHADOW_AMBIENT = 0,
+
+ RC_STATE_R300_WINDOW_DIMENSION,
+ RC_STATE_R300_TEXRECT_FACTOR
+};
+
+struct rc_constant {
+ unsigned Type:2; /**< RC_CONSTANT_xxx */
+ unsigned Size:3;
+
+ union {
+ unsigned External;
+ float Immediate[4];
+ unsigned State[2];
+ } u;
+};
+
+struct rc_constant_list {
+ struct rc_constant * Constants;
+ unsigned Count;
+
+ unsigned _Reserved;
+};
+
+void rc_constants_init(struct rc_constant_list * c);
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
+void rc_constants_destroy(struct rc_constant_list * c);
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
+
+/**
+ * Stores state that influences the compilation of a fragment program.
+ */
+struct r300_fragment_program_external_state {
+ struct {
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is:
+ * 0 - GL_LUMINANCE
+ * 1 - GL_INTENSITY
+ * 2 - GL_ALPHA
+ * depending on the depth texture mode.
+ */
+ unsigned depth_texture_mode : 2;
+
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is (texture_compare_func - GL_NEVER).
+ * [e.g. if compare function is GL_LEQUAL, this field is 3]
+ *
+ * Otherwise, this field is 0.
+ */
+ unsigned texture_compare_func : 3;
+ } unit[16];
+};
+
+
+
+struct r300_fragment_program_node {
+ int tex_offset; /**< first tex instruction */
+ int tex_end; /**< last tex instruction, relative to tex_offset */
+ int alu_offset; /**< first ALU instruction */
+ int alu_end; /**< last ALU instruction, relative to alu_offset */
+ int flags;
+};
+
+/**
+ * Stores an R300 fragment program in its compiled-to-hardware form.
+ */
+struct r300_fragment_program_code {
+ struct {
+ int length; /**< total # of texture instructions used */
+ uint32_t inst[R300_PFS_MAX_TEX_INST];
+ } tex;
+
+ struct {
+ int length; /**< total # of ALU instructions used */
+ struct {
+ uint32_t rgb_inst;
+ uint32_t rgb_addr;
+ uint32_t alpha_inst;
+ uint32_t alpha_addr;
+ } inst[R300_PFS_MAX_ALU_INST];
+ } alu;
+
+ uint32_t config; /* US_CONFIG */
+ uint32_t pixsize; /* US_PIXSIZE */
+ uint32_t code_offset; /* US_CODE_OFFSET */
+ uint32_t code_addr[4]; /* US_CODE_ADDR */
+};
+
+
+struct r500_fragment_program_code {
+ struct {
+ uint32_t inst0;
+ uint32_t inst1;
+ uint32_t inst2;
+ uint32_t inst3;
+ uint32_t inst4;
+ uint32_t inst5;
+ } inst[R500_PFS_MAX_INST];
+
+ int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
+
+ int max_temp_idx;
+};
+
+struct rX00_fragment_program_code {
+ union {
+ struct r300_fragment_program_code r300;
+ struct r500_fragment_program_code r500;
+ } code;
+
+ unsigned writes_depth:1;
+
+ struct rc_constant_list constants;
+};
+
+
+#define VSF_MAX_FRAGMENT_LENGTH (255*4)
+#define VSF_MAX_FRAGMENT_TEMPS (14)
+
+#define VSF_MAX_INPUTS 32
+#define VSF_MAX_OUTPUTS 32
+
+struct r300_vertex_program_code {
+ int length;
+ union {
+ uint32_t d[VSF_MAX_FRAGMENT_LENGTH];
+ float f[VSF_MAX_FRAGMENT_LENGTH];
+ } body;
+
+ int pos_end;
+ int num_temporaries; /* Number of temp vars used by program */
+ int inputs[VSF_MAX_INPUTS];
+ int outputs[VSF_MAX_OUTPUTS];
+
+ struct rc_constant_list constants;
+
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
+};
+
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs);
+
+#endif /* RADEON_CODE_H */
+
diff --git a/r300/compiler/radeon_compiler.c b/r300/compiler/radeon_compiler.c
new file mode 100644
index 0000000..da950d5
--- /dev/null
+++ b/r300/compiler/radeon_compiler.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdarg.h>
+
+#include "radeon_program.h"
+
+
+void rc_init(struct radeon_compiler * c)
+{
+ memset(c, 0, sizeof(*c));
+
+ memory_pool_init(&c->Pool);
+ c->Program.Instructions.Prev = &c->Program.Instructions;
+ c->Program.Instructions.Next = &c->Program.Instructions;
+ c->Program.Instructions.I.Opcode = OPCODE_END;
+}
+
+void rc_destroy(struct radeon_compiler * c)
+{
+ rc_constants_destroy(&c->Program.Constants);
+ memory_pool_destroy(&c->Pool);
+ free(c->ErrorMsg);
+}
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
+{
+ va_list ap;
+
+ if (!c->Debug)
+ return;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
+void rc_error(struct radeon_compiler * c, const char * fmt, ...)
+{
+ va_list ap;
+
+ c->Error = GL_TRUE;
+
+ if (!c->ErrorMsg) {
+ /* Only remember the first error */
+ char buf[1024];
+ int written;
+
+ va_start(ap, fmt);
+ written = vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+
+ if (written < sizeof(buf)) {
+ c->ErrorMsg = strdup(buf);
+ } else {
+ c->ErrorMsg = malloc(written + 1);
+
+ va_start(ap, fmt);
+ vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
+ va_end(ap);
+ }
+ }
+
+ if (c->Debug) {
+ fprintf(stderr, "r300compiler error: ");
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+}
+
+/**
+ * Rewrite the program such that everything that source the given input
+ * register will source new_input instead.
+ */
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input)
+{
+ struct rc_instruction * inst;
+
+ c->Program.InputsRead &= ~(1 << input);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < numsrcs; ++i) {
+ if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) {
+ inst->I.SrcReg[i].File = new_input.File;
+ inst->I.SrcReg[i].Index = new_input.Index;
+ inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle);
+ if (!inst->I.SrcReg[i].Abs) {
+ inst->I.SrcReg[i].Negate ^= new_input.Negate;
+ inst->I.SrcReg[i].Abs = new_input.Abs;
+ }
+
+ c->Program.InputsRead |= 1 << new_input.Index;
+ }
+ }
+ }
+}
+
+
+/**
+ * Rewrite the program such that everything that writes into the given
+ * output register will instead write to new_output. The new_output
+ * writemask is honoured.
+ */
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
+{
+ struct rc_instruction * inst;
+
+ c->Program.OutputsWritten &= ~(1 << output);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+
+ if (numdsts) {
+ if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) {
+ inst->I.DstReg.Index = new_output;
+ inst->I.DstReg.WriteMask &= writemask;
+
+ c->Program.OutputsWritten |= 1 << new_output;
+ }
+ }
+ }
+}
+
+
+/**
+ * Rewrite the program such that a given output is duplicated.
+ */
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
+{
+ unsigned tempreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+
+ if (numdsts) {
+ if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) {
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = tempreg;
+ }
+ }
+ }
+
+ inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg.File = PROGRAM_OUTPUT;
+ inst->I.DstReg.Index = output;
+
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = tempreg;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg.File = PROGRAM_OUTPUT;
+ inst->I.DstReg.Index = dup_output;
+
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = tempreg;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ c->Program.OutputsWritten |= 1 << dup_output;
+}
+
+
+/**
+ * Introduce standard code fragment to deal with fragment.position.
+ */
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input)
+{
+ unsigned tempregi = rc_find_free_temporary(c);
+
+ c->Program.InputsRead &= ~(1 << wpos);
+ c->Program.InputsRead |= 1 << new_input;
+
+ /* perspective divide */
+ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
+ inst_rcp->I.Opcode = OPCODE_RCP;
+
+ inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_rcp->I.DstReg.Index = tempregi;
+ inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
+
+ inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT;
+ inst_rcp->I.SrcReg[0].Index = new_input;
+ inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp);
+ inst_mul->I.Opcode = OPCODE_MUL;
+
+ inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mul->I.DstReg.Index = tempregi;
+ inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ;
+
+ inst_mul->I.SrcReg[0].File = PROGRAM_INPUT;
+ inst_mul->I.SrcReg[0].Index = new_input;
+
+ inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst_mul->I.SrcReg[1].Index = tempregi;
+ inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
+
+ /* viewport transformation */
+ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul);
+ inst_mad->I.Opcode = OPCODE_MAD;
+
+ inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mad->I.DstReg.Index = tempregi;
+ inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ;
+
+ inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[0].Index = tempregi;
+ inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR;
+ inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
+ inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR;
+ inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index;
+ inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ struct rc_instruction * inst;
+ for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < numsrcs; i++) {
+ if (inst->I.SrcReg[i].File == PROGRAM_INPUT &&
+ inst->I.SrcReg[i].Index == wpos) {
+ inst->I.SrcReg[i].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[i].Index = tempregi;
+ }
+ }
+ }
+}
+
diff --git a/r300/compiler/radeon_compiler.h b/r300/compiler/radeon_compiler.h
new file mode 100644
index 0000000..e63ab88
--- /dev/null
+++ b/r300/compiler/radeon_compiler.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_COMPILER_H
+#define RADEON_COMPILER_H
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+
+#include "memory_pool.h"
+#include "radeon_code.h"
+
+
+struct rc_instruction {
+ struct rc_instruction * Prev;
+ struct rc_instruction * Next;
+ struct prog_instruction I;
+};
+
+struct rc_program {
+ /**
+ * Instructions.Next points to the first instruction,
+ * Instructions.Prev points to the last instruction.
+ */
+ struct rc_instruction Instructions;
+
+ /* Long term, we should probably remove InputsRead & OutputsWritten,
+ * since updating dependent state can be fragile, and they aren't
+ * actually used very often. */
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
+ uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+ struct rc_constant_list Constants;
+};
+
+struct radeon_compiler {
+ struct memory_pool Pool;
+ struct rc_program Program;
+ unsigned Debug:1;
+ unsigned Error:1;
+ char * ErrorMsg;
+};
+
+void rc_init(struct radeon_compiler * c);
+void rc_destroy(struct radeon_compiler * c);
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
+void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+
+void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+
+void rc_calculate_inputs_outputs(struct radeon_compiler * c);
+
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input);
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input);
+
+struct r300_fragment_program_compiler {
+ struct radeon_compiler Base;
+ struct rX00_fragment_program_code *code;
+ struct r300_fragment_program_external_state state;
+ unsigned is_r500;
+ unsigned OutputDepth;
+ unsigned OutputColor;
+
+ void * UserData;
+ void (*AllocateHwInputs)(
+ struct r300_fragment_program_compiler * c,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata);
+};
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
+
+
+struct r300_vertex_program_compiler {
+ struct radeon_compiler Base;
+ struct r300_vertex_program_code *code;
+ GLbitfield RequiredOutputs;
+
+ void * UserData;
+ void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+};
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+
+#endif /* RADEON_COMPILER_H */
diff --git a/r300/radeon_nqssadce.c b/r300/compiler/radeon_nqssadce.c
index 4a2e1cb..aaaa50a 100644
--- a/r300/radeon_nqssadce.c
+++ b/r300/compiler/radeon_nqssadce.c
@@ -36,6 +36,8 @@
#include "radeon_nqssadce.h"
+#include "radeon_compiler.h"
+
/**
* Return the @ref register_state for the given register (or 0 for untracked
@@ -46,6 +48,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil
switch(file) {
case PROGRAM_TEMPORARY: return &s->Temps[index];
case PROGRAM_OUTPUT: return &s->Outputs[index];
+ case PROGRAM_ADDRESS: return &s->Address;
default: return 0;
}
}
@@ -56,7 +59,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil
*
* @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
*/
-static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
+struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
{
struct prog_src_register tmp = srcreg;
int i;
@@ -75,9 +78,10 @@ static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_reg
}
-static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
- struct prog_instruction *inst, GLint src, GLuint sourced)
+static void track_used_srcreg(struct nqssadce_state* s,
+ GLint src, GLuint sourced)
{
+ struct prog_instruction * inst = &s->IP->I;
int i;
GLuint deswz_source = 0;
@@ -94,12 +98,11 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
struct prog_dst_register dstreg = inst->DstReg;
dstreg.File = PROGRAM_TEMPORARY;
- dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
+ dstreg.Index = rc_find_free_temporary(s->Compiler);
dstreg.WriteMask = sourced;
s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
- inst = s->Program->Instructions + s->IP;
inst->SrcReg[src].File = PROGRAM_TEMPORARY;
inst->SrcReg[src].Index = dstreg.Index;
inst->SrcReg[src].Swizzle = 0;
@@ -114,67 +117,38 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
deswz_source = sourced;
}
- struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
- if (regstate)
- regstate->Sourced |= deswz_source & 0xf;
-
- return inst;
-}
+ struct register_state *regstate;
-
-static void rewrite_depth_out(struct prog_instruction *inst)
-{
- if (inst->DstReg.WriteMask & WRITEMASK_Z) {
- inst->DstReg.WriteMask = WRITEMASK_W;
+ if (inst->SrcReg[src].RelAddr) {
+ regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
+ if (regstate)
+ regstate->Sourced |= WRITEMASK_X;
} else {
- inst->DstReg.WriteMask = 0;
- return;
- }
-
- switch (inst->Opcode) {
- case OPCODE_FRC:
- case OPCODE_MOV:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- break;
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- break;
- case OPCODE_CMP:
- case OPCODE_MAD:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
- break;
- default:
- // Scalar instructions needn't be reswizzled
- break;
+ regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
+ if (regstate)
+ regstate->Sourced |= deswz_source & 0xf;
}
}
-static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
+static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex)
{
- int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode);
int i;
for(i = 0; i < nsrc; ++i)
- if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
- inst->SrcReg[i].Index = newindex;
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex)
+ inst->I.SrcReg[i].Index = newindex;
}
static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
{
- GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
- int ip;
- for(ip = 0; ip < s->IP; ++ip) {
- struct prog_instruction* inst = s->Program->Instructions + ip;
- if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
- inst->DstReg.Index = newindex;
+ GLuint newindex = rc_find_free_temporary(s->Compiler);
+ struct rc_instruction * inst;
+ for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) {
+ if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex)
+ inst->I.DstReg.Index = newindex;
unalias_srcregs(inst, oldindex, newindex);
}
- unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
+ unalias_srcregs(s->IP, oldindex, newindex);
}
@@ -183,20 +157,16 @@ static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
*/
static void process_instruction(struct nqssadce_state* s)
{
- struct prog_instruction *inst = s->Program->Instructions + s->IP;
+ struct prog_instruction *inst = &s->IP->I;
+ GLuint WriteMask;
if (inst->Opcode == OPCODE_END)
return;
if (inst->Opcode != OPCODE_KIL) {
- if (s->Descr->RewriteDepthOut) {
- if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH)
- rewrite_depth_out(inst);
- }
-
struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
if (!regstate) {
- _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
+ rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n",
inst->DstReg.File, inst->DstReg.Index);
return;
}
@@ -205,7 +175,9 @@ static void process_instruction(struct nqssadce_state* s)
regstate->Sourced &= ~inst->DstReg.WriteMask;
if (inst->DstReg.WriteMask == 0) {
- _mesa_delete_instructions(s->Program, s->IP, 1);
+ struct rc_instruction * inst_remove = s->IP;
+ s->IP = s->IP->Prev;
+ rc_remove_instruction(inst_remove);
return;
}
@@ -213,28 +185,30 @@ static void process_instruction(struct nqssadce_state* s)
unalias_temporary(s, inst->DstReg.Index);
}
- /* Attention: Due to swizzle emulation code, the following
- * might change the instruction stream under us, so we have
- * to be careful with the inst pointer. */
+ WriteMask = inst->DstReg.WriteMask;
+
switch (inst->Opcode) {
+ case OPCODE_ARL:
case OPCODE_DDX:
case OPCODE_DDY:
case OPCODE_FRC:
case OPCODE_MOV:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ track_used_srcreg(s, 0, WriteMask);
break;
case OPCODE_ADD:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MUL:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
+ case OPCODE_SGE:
+ case OPCODE_SLT:
+ track_used_srcreg(s, 0, WriteMask);
+ track_used_srcreg(s, 1, WriteMask);
break;
case OPCODE_CMP:
case OPCODE_MAD:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
+ track_used_srcreg(s, 0, WriteMask);
+ track_used_srcreg(s, 1, WriteMask);
+ track_used_srcreg(s, 2, WriteMask);
break;
case OPCODE_COS:
case OPCODE_EX2:
@@ -242,42 +216,79 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
- inst = track_used_srcreg(s, inst, 0, 0x1);
+ track_used_srcreg(s, 0, 0x1);
break;
case OPCODE_DP3:
- inst = track_used_srcreg(s, inst, 0, 0x7);
- inst = track_used_srcreg(s, inst, 1, 0x7);
+ track_used_srcreg(s, 0, 0x7);
+ track_used_srcreg(s, 1, 0x7);
break;
case OPCODE_DP4:
- inst = track_used_srcreg(s, inst, 0, 0xf);
- inst = track_used_srcreg(s, inst, 1, 0xf);
+ track_used_srcreg(s, 0, 0xf);
+ track_used_srcreg(s, 1, 0xf);
break;
case OPCODE_KIL:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
- inst = track_used_srcreg(s, inst, 0, 0xf);
+ track_used_srcreg(s, 0, 0xf);
+ break;
+ case OPCODE_DST:
+ track_used_srcreg(s, 0, 0x6);
+ track_used_srcreg(s, 1, 0xa);
+ break;
+ case OPCODE_EXP:
+ case OPCODE_LOG:
+ case OPCODE_POW:
+ track_used_srcreg(s, 0, 0x3);
+ break;
+ case OPCODE_LIT:
+ track_used_srcreg(s, 0, 0xb);
break;
default:
- _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
+ rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
return;
}
+
+ s->IP = s->IP->Prev;
}
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
+{
+ struct rc_instruction *inst;
+
+ c->Program.InputsRead = 0;
+ c->Program.OutputsWritten = 0;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
+ {
+ int i;
+ int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode);
+
+ for (i = 0; i < num_src_regs; ++i) {
+ if (inst->I.SrcReg[i].File == PROGRAM_INPUT)
+ c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index;
+ }
+
+ if (_mesa_num_inst_dst_regs(inst->I.Opcode)) {
+ if (inst->I.DstReg.File == PROGRAM_OUTPUT)
+ c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index;
+ }
+ }
+}
-void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
+void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data)
{
struct nqssadce_state s;
_mesa_bzero(&s, sizeof(s));
- s.Ctx = ctx;
- s.Program = p;
+ s.Compiler = c;
s.Descr = descr;
+ s.UserData = data;
s.Descr->Init(&s);
- s.IP = p->NumInstructions;
+ s.IP = c->Program.Instructions.Prev;
- while(s.IP > 0) {
- s.IP--;
+ while(s.IP != &c->Program.Instructions && !c->Error)
process_instruction(&s);
- }
+
+ rc_calculate_inputs_outputs(c);
}
diff --git a/r300/radeon_nqssadce.h b/r300/compiler/radeon_nqssadce.h
index a4f94ab..b3fc77a 100644
--- a/r300/radeon_nqssadce.h
+++ b/r300/compiler/radeon_nqssadce.h
@@ -30,7 +30,6 @@
#include "radeon_program.h"
-
struct register_state {
/**
* Bitmask indicating which components of the register are sourced
@@ -44,20 +43,22 @@ struct register_state {
* read from, etc.
*/
struct nqssadce_state {
- GLcontext *Ctx;
- struct gl_program *Program;
+ struct radeon_compiler *Compiler;
struct radeon_nqssadce_descr *Descr;
/**
* All instructions after this instruction pointer have been dealt with.
*/
- int IP;
+ struct rc_instruction * IP;
/**
* Which registers are read by subsequent instructions?
*/
struct register_state Temps[MAX_PROGRAM_TEMPS];
struct register_state Outputs[VERT_RESULT_MAX];
+ struct register_state Address;
+
+ void * UserData;
};
@@ -82,15 +83,9 @@ struct radeon_nqssadce_descr {
* The transformation will work recursively on the emitted instruction(s).
*/
void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
-
- /**
- * Rewrite instructions that write to DEPR.z to write to DEPR.w
- * instead (rewriting is done *before* the WriteMask test).
- */
- GLboolean RewriteDepthOut;
- void *Data;
};
-void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
+void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data);
+struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/r300/compiler/radeon_program.c b/r300/compiler/radeon_program.c
new file mode 100644
index 0000000..605edb6
--- /dev/null
+++ b/r300/compiler/radeon_program.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program.h"
+
+#include "radeon_compiler.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+
+
+/**
+ * Transform the given clause in the following way:
+ * 1. Replace it with an empty clause
+ * 2. For every instruction in the original clause, try the given
+ * transformations in order.
+ * 3. If one of the transformations returns GL_TRUE, assume that it
+ * has emitted the appropriate instruction(s) into the new clause;
+ * otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void radeonLocalTransform(
+ struct radeon_compiler * c,
+ int num_transformations,
+ struct radeon_program_transformation* transformations)
+{
+ struct rc_instruction * inst = c->Program.Instructions.Next;
+
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * current = inst;
+ int i;
+
+ inst = inst->Next;
+
+ for(i = 0; i < num_transformations; ++i) {
+ struct radeon_program_transformation* t = transformations + i;
+
+ if (t->function(c, current, t->userData))
+ break;
+ }
+ }
+}
+
+
+GLint rc_find_free_temporary(struct radeon_compiler * c)
+{
+ struct rc_instruction * rcinst;
+ GLboolean used[MAX_PROGRAM_TEMPS];
+ GLuint i;
+
+ memset(used, 0, sizeof(used));
+
+ for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) {
+ const struct prog_instruction *inst = &rcinst->I;
+ const GLuint nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ const GLuint ndst = _mesa_num_inst_dst_regs(inst->Opcode);
+ GLuint k;
+
+ for (k = 0; k < nsrc; k++) {
+ if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
+ used[inst->SrcReg[k].Index] = GL_TRUE;
+ }
+
+ if (ndst) {
+ if (inst->DstReg.File == PROGRAM_TEMPORARY)
+ used[inst->DstReg.Index] = GL_TRUE;
+ }
+ }
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ if (!used[i])
+ return i;
+ }
+
+ return -1;
+}
+
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
+
+ inst->Prev = 0;
+ inst->Next = 0;
+
+ _mesa_init_instructions(&inst->I, 1);
+
+ return inst;
+}
+
+
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+{
+ struct rc_instruction * inst = rc_alloc_instruction(c);
+
+ inst->Prev = after;
+ inst->Next = after->Next;
+
+ inst->Prev->Next = inst;
+ inst->Next->Prev = inst;
+
+ return inst;
+}
+
+void rc_remove_instruction(struct rc_instruction * inst)
+{
+ inst->Prev->Next = inst->Next;
+ inst->Next->Prev = inst->Prev;
+}
+
+
+void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
+{
+ struct prog_instruction *source;
+ unsigned int i;
+
+ for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) {
+ struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ dest->I = *source;
+ }
+
+ c->Program.ShadowSamplers = program->ShadowSamplers;
+ c->Program.InputsRead = program->InputsRead;
+ c->Program.OutputsWritten = program->OutputsWritten;
+
+ int isNVProgram = 0;
+
+ if (program->Target == GL_VERTEX_PROGRAM_ARB) {
+ struct gl_vertex_program * vp = (struct gl_vertex_program *) program;
+ isNVProgram = vp->IsNVProgram;
+ }
+
+ if (isNVProgram) {
+ /* NV_vertex_program has a fixed-sized constant environment.
+ * This could be handled more efficiently for programs that
+ * do not use relative addressing.
+ */
+ for(i = 0; i < 96; ++i) {
+ struct rc_constant constant;
+
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
+ constant.u.External = i;
+
+ rc_constants_add(&c->Program.Constants, &constant);
+ }
+ } else {
+ for(i = 0; i < program->Parameters->NumParameters; ++i) {
+ struct rc_constant constant;
+
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
+ constant.u.External = i;
+
+ rc_constants_add(&c->Program.Constants, &constant);
+ }
+ }
+}
+
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog)
+{
+ GLuint indent = 0;
+ GLuint linenum = 1;
+ struct rc_instruction *inst;
+
+ fprintf(stderr, "# Radeon Compiler Program\n");
+
+ for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+ fprintf(stderr, "%3d: ", linenum);
+
+ /* Massive hack: We rely on the fact that the printers do not actually
+ * use the gl_program argument (last argument) in debug mode */
+ indent = _mesa_fprint_instruction_opt(
+ stderr, &inst->I,
+ indent, PROG_PRINT_DEBUG, 0);
+
+ linenum++;
+ }
+}
diff --git a/r300/radeon_program.h b/r300/compiler/radeon_program.h
index b411f69..5619586 100644
--- a/r300/radeon_program.h
+++ b/r300/compiler/radeon_program.h
@@ -34,12 +34,9 @@
#include "shader/program.h"
#include "shader/prog_instruction.h"
-
-enum {
- CLAUSE_MIXED = 0,
- CLAUSE_ALU,
- CLAUSE_TEX
-};
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_program;
enum {
PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
@@ -52,18 +49,43 @@ enum {
#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
-/**
- * Transformation context that is passed to local transformations.
- *
- * Care must be taken with some operations during transformation,
- * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
- */
-struct radeon_transform_context {
- GLcontext *Ctx;
- struct gl_program *Program;
- struct prog_instruction *OldInstructions;
- GLuint OldNumInstructions;
-};
+static inline GLuint get_swz(GLuint swz, GLuint idx)
+{
+ if (idx & 0x4)
+ return idx;
+ return GET_SWZ(swz, idx);
+}
+
+static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w)
+{
+ GLuint ret = 0;
+
+ ret |= get_swz(src, swz_x);
+ ret |= get_swz(src, swz_y) << 3;
+ ret |= get_swz(src, swz_z) << 6;
+ ret |= get_swz(src, swz_w) << 9;
+
+ return ret;
+}
+
+static inline GLuint combine_swizzles(GLuint src, GLuint swz)
+{
+ GLuint ret = 0;
+
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X));
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3;
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6;
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9;
+
+ return ret;
+}
+
+static INLINE void reset_srcreg(struct prog_src_register* reg)
+{
+ _mesa_bzero(reg, sizeof(*reg));
+ reg->Swizzle = SWIZZLE_NOOP;
+}
+
/**
* A transformation that can be passed to \ref radeonLocalTransform.
@@ -77,23 +99,23 @@ struct radeon_transform_context {
*/
struct radeon_program_transformation {
GLboolean (*function)(
- struct radeon_transform_context*,
- struct prog_instruction*,
+ struct radeon_compiler*,
+ struct rc_instruction*,
void*);
void *userData;
};
void radeonLocalTransform(
- GLcontext* ctx,
- struct gl_program *program,
+ struct radeon_compiler *c,
int num_transformations,
struct radeon_program_transformation* transformations);
-/**
- * Find a usable free temporary register during program transformation
- */
-GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
+GLint rc_find_free_temporary(struct radeon_compiler * c);
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+void rc_remove_instruction(struct rc_instruction * inst);
-struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
+void rc_print_program(const struct rc_program *prog);
#endif
diff --git a/r300/radeon_program_alu.c b/r300/compiler/radeon_program_alu.c
index 8283723..f23ce30 100644
--- a/r300/radeon_program_alu.c
+++ b/r300/compiler/radeon_program_alu.c
@@ -35,49 +35,52 @@
#include "radeon_program_alu.h"
-#include "shader/prog_parameter.h"
+#include "radeon_compiler.h"
-static struct prog_instruction *emit1(struct gl_program* p,
+static struct rc_instruction *emit1(
+ struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg)
{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg;
+ fpi->I.Opcode = Opcode;
+ fpi->I.SaturateMode = Saturate;
+ fpi->I.DstReg = DstReg;
+ fpi->I.SrcReg[0] = SrcReg;
return fpi;
}
-static struct prog_instruction *emit2(struct gl_program* p,
+static struct rc_instruction *emit2(
+ struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg0;
- fpi->SrcReg[1] = SrcReg1;
+ fpi->I.Opcode = Opcode;
+ fpi->I.SaturateMode = Saturate;
+ fpi->I.DstReg = DstReg;
+ fpi->I.SrcReg[0] = SrcReg0;
+ fpi->I.SrcReg[1] = SrcReg1;
return fpi;
}
-static struct prog_instruction *emit3(struct gl_program* p,
+static struct rc_instruction *emit3(
+ struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
struct prog_src_register SrcReg2)
{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
-
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg0;
- fpi->SrcReg[1] = SrcReg1;
- fpi->SrcReg[2] = SrcReg2;
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+ fpi->I.Opcode = Opcode;
+ fpi->I.SaturateMode = Saturate;
+ fpi->I.DstReg = DstReg;
+ fpi->I.SrcReg[0] = SrcReg0;
+ fpi->I.SrcReg[1] = SrcReg1;
+ fpi->I.SrcReg[2] = SrcReg2;
return fpi;
}
@@ -88,6 +91,7 @@ static struct prog_dst_register dstreg(int file, int index)
dst.Index = index;
dst.WriteMask = WRITEMASK_XYZW;
dst.CondMask = COND_TR;
+ dst.RelAddr = 0;
dst.CondSwizzle = SWIZZLE_NOOP;
dst.CondSrc = 0;
dst.pad = 0;
@@ -96,10 +100,11 @@ static struct prog_dst_register dstreg(int file, int index)
static struct prog_dst_register dstregtmpmask(int index, int mask)
{
- struct prog_dst_register dst;
+ struct prog_dst_register dst = {0};
dst.File = PROGRAM_TEMPORARY;
dst.Index = index;
dst.WriteMask = mask;
+ dst.RelAddr = 0;
dst.CondMask = COND_TR;
dst.CondSwizzle = SWIZZLE_NOOP;
dst.CondSrc = 0;
@@ -171,44 +176,63 @@ static struct prog_src_register scalar(struct prog_src_register reg)
return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
}
-static void transform_ABS(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_ABS(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- struct prog_src_register src = inst->SrcReg[0];
+ struct prog_src_register src = inst->I.SrcReg[0];
src.Abs = 1;
src.Negate = NEGATE_NONE;
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src);
+ emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src);
+ rc_remove_instruction(inst);
}
-static void transform_DPH(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_DP3(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- struct prog_src_register src0 = inst->SrcReg[0];
+ struct prog_src_register src0 = inst->I.SrcReg[0];
+ struct prog_src_register src1 = inst->I.SrcReg[1];
+ src0.Negate &= ~NEGATE_W;
+ src0.Swizzle &= ~(7 << (3 * 3));
+ src0.Swizzle |= SWIZZLE_ZERO << (3 * 3);
+ src1.Negate &= ~NEGATE_W;
+ src1.Swizzle &= ~(7 << (3 * 3));
+ src1.Swizzle |= SWIZZLE_ZERO << (3 * 3);
+ emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1);
+ rc_remove_instruction(inst);
+}
+
+static void transform_DPH(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct prog_src_register src0 = inst->I.SrcReg[0];
src0.Negate &= ~NEGATE_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= SWIZZLE_ONE << (3 * 3);
- emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]);
+ emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]);
+ rc_remove_instruction(inst);
}
/**
* [1, src0.y*src1.y, src0.z, src1.w]
* So basically MUL with lotsa swizzling.
*/
-static void transform_DST(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_DST(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg,
- swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
- swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
+ emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg,
+ swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
+ swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
+ rc_remove_instruction(inst);
}
-static void transform_FLR(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_FLR(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
- emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
- emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg,
- inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+ int tempreg = rc_find_free_temporary(c);
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]);
+ emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg,
+ inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+ rc_remove_instruction(inst);
}
/**
@@ -229,152 +253,159 @@ static void transform_FLR(struct radeon_transform_context* t,
* 5 slots, if the subsequent optimization passes are clever enough
* to pair instructions correctly.
*/
-static void transform_LIT(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_LIT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- static const GLfloat LitConst[4] = { -127.999999 };
-
GLuint constant;
GLuint constant_swizzle;
GLuint temp;
- int needTemporary = 0;
struct prog_src_register srctemp;
- constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
+ constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
- if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
- needTemporary = 1;
- } else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
- // LIT is typically followed by DP3/DP4, so there's no point
- // in creating special code for this case
- needTemporary = 1;
- }
+ if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) {
+ struct rc_instruction * inst_mov;
- if (needTemporary) {
- temp = radeonFindFreeTemporary(t);
- } else {
- temp = inst->DstReg.Index;
+ inst_mov = emit1(c, inst,
+ OPCODE_MOV, 0, inst->I.DstReg,
+ srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c)));
+
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
}
+
+ temp = inst->I.DstReg.Index;
srctemp = srcreg(PROGRAM_TEMPORARY, temp);
// tmp.x = max(0.0, Src.x);
// tmp.y = max(0.0, Src.y);
// tmp.w = clamp(Src.z, -128+eps, 128-eps);
- emit2(t->Program, OPCODE_MAX, 0,
+ emit2(c, inst->Prev, OPCODE_MAX, 0,
dstregtmpmask(temp, WRITEMASK_XYW),
- inst->SrcReg[0],
+ inst->I.SrcReg[0],
swizzle(srcreg(PROGRAM_CONSTANT, constant),
SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
- emit2(t->Program, OPCODE_MIN, 0,
+ emit2(c, inst->Prev, OPCODE_MIN, 0,
dstregtmpmask(temp, WRITEMASK_Z),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
// tmp.w = Pow(tmp.y, tmp.w)
- emit1(t->Program, OPCODE_LG2, 0,
+ emit1(c, inst->Prev, OPCODE_LG2, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit2(t->Program, OPCODE_MUL, 0,
+ emit2(c, inst->Prev, OPCODE_MUL, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
- emit1(t->Program, OPCODE_EX2, 0,
+ emit1(c, inst->Prev, OPCODE_EX2, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
// tmp.z = (tmp.x > 0) ? tmp.w : 0.0
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode,
+ emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode,
dstregtmpmask(temp, WRITEMASK_Z),
negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
builtin_zero);
// tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode,
+ emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode,
dstregtmpmask(temp, WRITEMASK_XYW),
swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
- if (needTemporary)
- emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp);
+ rc_remove_instruction(inst);
}
-static void transform_LRP(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_LRP(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_ADD, 0,
+ emit2(c, inst->Prev, OPCODE_ADD, 0,
dstreg(PROGRAM_TEMPORARY, tempreg),
- inst->SrcReg[1], negate(inst->SrcReg[2]));
- emit3(t->Program, OPCODE_MAD, inst->SaturateMode,
- inst->DstReg,
- inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);
+ inst->I.SrcReg[1], negate(inst->I.SrcReg[2]));
+ emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode,
+ inst->I.DstReg,
+ inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]);
+
+ rc_remove_instruction(inst);
}
-static void transform_POW(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_POW(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
tempdst.WriteMask = WRITEMASK_W;
tempsrc.Swizzle = SWIZZLE_WWWW;
- emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0]));
- emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1]));
- emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);
+ emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0]));
+ emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1]));
+ emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc);
+
+ rc_remove_instruction(inst);
}
-static void transform_RSQ(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_RSQ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));
+ inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]);
}
-static void transform_SGE(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SGE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
+ emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
+ emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
}
-static void transform_SLT(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SLT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
+ emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
+ emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
}
-static void transform_SUB(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SUB(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
+ inst->I.Opcode = OPCODE_ADD;
+ inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]);
}
-static void transform_SWZ(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SWZ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);
+ inst->I.Opcode = OPCODE_MOV;
}
-static void transform_XPD(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_XPD(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
-
- emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
- swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
- swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg,
- swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
- swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
+ swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
+ emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg,
+ swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
+ swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+
+ rc_remove_instruction(inst);
}
@@ -392,31 +423,64 @@ static void transform_XPD(struct radeon_transform_context* t,
*
* @note should be applicable to R300 and R500 fragment programs.
*/
-GLboolean radeonTransformALU(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformALU(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
void* unused)
{
- switch(inst->Opcode) {
- case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
- case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
- case OPCODE_DST: transform_DST(t, inst); return GL_TRUE;
- case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
- case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
- case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
- case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
- case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE;
- case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
- case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
- case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
- case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
- case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
+ switch(inst->I.Opcode) {
+ case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE;
+ case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
+ case OPCODE_DST: transform_DST(c, inst); return GL_TRUE;
+ case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
+ case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE;
+ case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
+ case OPCODE_POW: transform_POW(c, inst); return GL_TRUE;
+ case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE;
+ case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE;
+ case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE;
+ case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
+ case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
+ case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
default:
return GL_FALSE;
}
}
-static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
+static void transform_r300_vertex_ABS(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* Note: r500 can take absolute values, but r300 cannot. */
+ inst->I.Opcode = OPCODE_MAX;
+ inst->I.SrcReg[1] = inst->I.SrcReg[0];
+ inst->I.SrcReg[1].Negate ^= NEGATE_XYZW;
+}
+
+/**
+ * For use with radeonLocalTransform, this transforms non-native ALU
+ * instructions of the r300 up to r500 vertex engine.
+ */
+GLboolean r300_transform_vertex_alu(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ switch(inst->I.Opcode) {
+ case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE;
+ case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE;
+ case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
+ case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
+ case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
+ case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
+ case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
+ case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
+ default:
+ return GL_FALSE;
+ }
+}
+
+static void sincos_constants(struct radeon_compiler* c, GLuint *constants)
{
static const GLfloat SinCosConsts[2][4] = {
{
@@ -434,11 +498,8 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan
};
int i;
- for(i = 0; i < 2; ++i) {
- GLuint swz;
- constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
- ASSERT(swz == SWIZZLE_NOOP);
- }
+ for(i = 0; i < 2; ++i)
+ constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
}
/**
@@ -449,23 +510,24 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan
* MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
* MAD dest, tmp.y, weight, tmp.x
*/
-static void sin_approx(struct radeon_transform_context* t,
+static void sin_approx(
+ struct radeon_compiler* c, struct rc_instruction * before,
struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
{
- GLuint tempreg = radeonFindFreeTemporary(t);
+ GLuint tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ emit2(c, before->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
srcreg(PROGRAM_CONSTANT, constants[0]));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
+ emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
+ emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
- emit3(t->Program, OPCODE_MAD, 0, dst,
+ emit3(c, before->Prev, OPCODE_MAD, 0, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
@@ -476,78 +538,80 @@ static void sin_approx(struct radeon_transform_context* t,
* using only the basic instructions
* MOV, ADD, MUL, MAD, FRC
*/
-GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformTrigSimple(struct radeon_compiler* c,
+ struct rc_instruction* inst,
void* unused)
{
- if (inst->Opcode != OPCODE_COS &&
- inst->Opcode != OPCODE_SIN &&
- inst->Opcode != OPCODE_SCS)
+ if (inst->I.Opcode != OPCODE_COS &&
+ inst->I.Opcode != OPCODE_SIN &&
+ inst->I.Opcode != OPCODE_SCS)
return GL_FALSE;
GLuint constants[2];
- GLuint tempreg = radeonFindFreeTemporary(t);
+ GLuint tempreg = rc_find_free_temporary(c);
- sincos_constants(t, constants);
+ sincos_constants(c, constants);
- if (inst->Opcode == OPCODE_COS) {
+ if (inst->I.Opcode == OPCODE_COS) {
// MAD tmp.x, src, 1/(2*PI), 0.75
// FRC tmp.x, tmp.x
// MAD tmp.z, tmp.x, 2*PI, -PI
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
- sin_approx(t, inst->DstReg,
+ sin_approx(c, inst, inst->I.DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
- } else if (inst->Opcode == OPCODE_SIN) {
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ } else if (inst->I.Opcode == OPCODE_SIN) {
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
- sin_approx(t, inst->DstReg,
+ sin_approx(c, inst, inst->I.DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
} else {
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
- struct prog_dst_register dst = inst->DstReg;
+ struct prog_dst_register dst = inst->I.DstReg;
- dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
- sin_approx(t, dst,
+ dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X;
+ sin_approx(c, inst, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
constants);
- dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
- sin_approx(t, dst,
+ dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y;
+ sin_approx(c, inst, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
constants);
}
+ rc_remove_instruction(inst);
+
return GL_TRUE;
}
@@ -560,50 +624,52 @@ GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
*
* @warning This transformation implicitly changes the semantics of SIN and COS!
*/
-GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformTrigScale(struct radeon_compiler* c,
+ struct rc_instruction* inst,
void* unused)
{
- if (inst->Opcode != OPCODE_COS &&
- inst->Opcode != OPCODE_SIN &&
- inst->Opcode != OPCODE_SCS)
+ if (inst->I.Opcode != OPCODE_COS &&
+ inst->I.Opcode != OPCODE_SIN &&
+ inst->I.Opcode != OPCODE_SCS)
return GL_FALSE;
- static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
+ static const GLfloat RCP_2PI = 0.15915494309189535;
GLuint temp;
GLuint constant;
GLuint constant_swizzle;
- temp = radeonFindFreeTemporary(t);
- constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
+ temp = rc_find_free_temporary(c);
+ constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
- emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
srcreg(PROGRAM_TEMPORARY, temp));
- if (inst->Opcode == OPCODE_COS) {
- emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg,
+ if (inst->I.Opcode == OPCODE_COS) {
+ emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->Opcode == OPCODE_SIN) {
- emit1(t->Program, OPCODE_SIN, inst->SaturateMode,
- inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->Opcode == OPCODE_SCS) {
- struct prog_dst_register moddst = inst->DstReg;
+ } else if (inst->I.Opcode == OPCODE_SIN) {
+ emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode,
+ inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ } else if (inst->I.Opcode == OPCODE_SCS) {
+ struct prog_dst_register moddst = inst->I.DstReg;
- if (inst->DstReg.WriteMask & WRITEMASK_X) {
+ if (inst->I.DstReg.WriteMask & WRITEMASK_X) {
moddst.WriteMask = WRITEMASK_X;
- emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst,
+ emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
}
- if (inst->DstReg.WriteMask & WRITEMASK_Y) {
+ if (inst->I.DstReg.WriteMask & WRITEMASK_Y) {
moddst.WriteMask = WRITEMASK_Y;
- emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst,
+ emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
}
}
+ rc_remove_instruction(inst);
+
return GL_TRUE;
}
@@ -615,21 +681,15 @@ GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
* @warning This explicitly changes the form of DDX and DDY!
*/
-GLboolean radeonTransformDeriv(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformDeriv(struct radeon_compiler* c,
+ struct rc_instruction* inst,
void* unused)
{
- if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)
+ if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY)
return GL_FALSE;
- struct prog_src_register B = inst->SrcReg[1];
-
- B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,
- SWIZZLE_ONE, SWIZZLE_ONE);
- B.Negate = NEGATE_XYZW;
-
- emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,
- inst->SrcReg[0], B);
+ inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
+ inst->I.SrcReg[1].Negate = NEGATE_XYZW;
return GL_TRUE;
}
diff --git a/r300/radeon_program_alu.h b/r300/compiler/radeon_program_alu.h
index b459581..147efec 100644
--- a/r300/radeon_program_alu.h
+++ b/r300/compiler/radeon_program_alu.h
@@ -31,23 +31,28 @@
#include "radeon_program.h"
GLboolean radeonTransformALU(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+GLboolean r300_transform_vertex_alu(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
GLboolean radeonTransformTrigSimple(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
GLboolean radeonTransformTrigScale(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
GLboolean radeonTransformDeriv(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/r300/radeon_program_pair.c b/r300/compiler/radeon_program_pair.c
index 2e21f7b..4c26db5 100644
--- a/r300/radeon_program_pair.c
+++ b/r300/compiler/radeon_program_pair.c
@@ -35,19 +35,20 @@
#include "radeon_program_pair.h"
-#include "radeon_context.h"
-
+#include "memory_pool.h"
+#include "radeon_compiler.h"
#include "shader/prog_print.h"
#define error(fmt, args...) do { \
- _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \
+ rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
- s->Error = GL_TRUE; \
} while(0)
struct pair_state_instruction {
+ struct prog_instruction Instruction;
+ GLuint IP; /**< Position of this instruction in original program */
+
GLuint IsTex:1; /**< Is a texture instruction */
- GLuint IsOutput:1; /**< Is output instruction */
GLuint NeedRGB:1; /**< Needs the RGB ALU */
GLuint NeedAlpha:1; /**< Needs the Alpha ALU */
GLuint IsTranscendent:1; /**< Is a special transcendent instruction */
@@ -74,7 +75,7 @@ struct pair_state_instruction {
* Used to keep track of which instructions read a value.
*/
struct reg_value_reader {
- GLuint IP; /**< IP of the instruction that performs this access */
+ struct pair_state_instruction *Reader;
struct reg_value_reader *Next;
};
@@ -83,7 +84,7 @@ struct reg_value_reader {
* PROGRAM_TEMPORARY.
*/
struct reg_value {
- GLuint IP; /**< IP of the instruction that writes this value */
+ struct pair_state_instruction *Writer;
struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
/**
@@ -117,14 +118,10 @@ struct pair_register_translation {
};
struct pair_state {
- GLcontext *Ctx;
- struct gl_program *Program;
+ struct r300_fragment_program_compiler * Compiler;
const struct radeon_pair_handler *Handler;
- GLboolean Error;
- GLboolean Debug;
GLboolean Verbose;
void *UserData;
- GLubyte NumKillInsts;
/**
* Translate Mesa registers to hardware registers
@@ -132,11 +129,6 @@ struct pair_state {
struct pair_register_translation Inputs[FRAG_ATTRIB_MAX];
struct pair_register_translation Temps[MAX_PROGRAM_TEMPS];
- /**
- * Derived information about program instructions.
- */
- struct pair_state_instruction *Instructions;
-
struct {
GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
} HwTemps[128];
@@ -149,19 +141,6 @@ struct pair_state {
struct pair_state_instruction *ReadyRGB;
struct pair_state_instruction *ReadyAlpha;
struct pair_state_instruction *ReadyTEX;
-
- /**
- * Linked list of deferred instructions
- */
- struct pair_state_instruction *DeferredInsts;
-
- /**
- * Pool of @ref reg_value structures for fast allocation.
- */
- struct reg_value *ValuePool;
- GLuint ValuePoolUsed;
- struct reg_value_reader *ReaderPool;
- GLuint ReaderPoolUsed;
};
@@ -190,7 +169,7 @@ static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index)
struct pair_register_translation *t = get_register(s, file, index);
if (!t) {
- _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index);
+ error("get_hw_reg: %i[%i]\n", file, index);
return 0;
}
@@ -228,19 +207,15 @@ static void add_pairinst_to_list(struct pair_state_instruction **list, struct pa
}
/**
- * The instruction at the given IP has become ready. Link it into the ready
+ * The given instruction has become ready. Link it into the ready
* instructions.
*/
-static void instruction_ready(struct pair_state *s, int ip)
+static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
-
if (s->Verbose)
- _mesa_printf("instruction_ready(%i)\n", ip);
+ _mesa_printf("instruction_ready(%i)\n", pairinst->IP);
- if (s->NumKillInsts > 0 && pairinst->IsOutput)
- add_pairinst_to_list(&s->DeferredInsts, pairinst);
- else if (pairinst->IsTex)
+ if (pairinst->IsTex)
add_pairinst_to_list(&s->ReadyTEX, pairinst);
else if (!pairinst->NeedAlpha)
add_pairinst_to_list(&s->ReadyRGB, pairinst);
@@ -305,12 +280,12 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
* Classify an instruction according to which ALUs etc. it needs
*/
static void classify_instruction(struct pair_state *s,
- struct prog_instruction *inst, struct pair_state_instruction *pairinst)
+ struct pair_state_instruction *psi)
{
- pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
- pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
+ psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
+ psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
- switch(inst->Opcode) {
+ switch(psi->Instruction.Opcode) {
case OPCODE_ADD:
case OPCODE_CMP:
case OPCODE_DDX:
@@ -328,28 +303,26 @@ static void classify_instruction(struct pair_state *s,
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
- pairinst->IsTranscendent = 1;
- pairinst->NeedAlpha = 1;
+ psi->IsTranscendent = 1;
+ psi->NeedAlpha = 1;
break;
case OPCODE_DP4:
- pairinst->NeedAlpha = 1;
+ psi->NeedAlpha = 1;
/* fall through */
case OPCODE_DP3:
- pairinst->NeedRGB = 1;
+ psi->NeedRGB = 1;
break;
case OPCODE_KIL:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
case OPCODE_END:
- pairinst->IsTex = 1;
+ psi->IsTex = 1;
break;
default:
- error("Unknown opcode %d\n", inst->Opcode);
+ error("Unknown opcode %d\n", psi->Instruction.Opcode);
break;
}
-
- pairinst->IsOutput = (inst->DstReg.File == PROGRAM_OUTPUT);
}
@@ -359,30 +332,34 @@ static void classify_instruction(struct pair_state *s,
*/
static void scan_instructions(struct pair_state *s)
{
- struct prog_instruction *inst;
- struct pair_state_instruction *pairinst;
+ struct rc_instruction *source;
GLuint ip;
- for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0;
- inst->Opcode != OPCODE_END;
- ++inst, ++pairinst, ++ip) {
- final_rewrite(s, inst);
- classify_instruction(s, inst, pairinst);
+ for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0;
+ source != &s->Compiler->Base.Program.Instructions;
+ source = source->Next, ++ip) {
+ struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst));
+ memset(pairinst, 0, sizeof(struct pair_state_instruction));
- int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ pairinst->Instruction = source->I;
+ pairinst->IP = ip;
+ final_rewrite(s, &pairinst->Instruction);
+ classify_instruction(s, pairinst);
+
+ int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode);
int j;
for(j = 0; j < nsrc; j++) {
struct pair_register_translation *t =
- get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index);
+ get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index);
if (!t)
continue;
t->RefCount++;
- if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) {
int i;
for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i);
+ GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i);
if (swz >= 4)
continue; /* constant or NIL swizzle */
if (!t->Value[swz])
@@ -392,36 +369,37 @@ static void scan_instructions(struct pair_state *s)
* also rewrites the value. The code below adds
* a dependency for the DstReg, which is a superset
* of the SrcReg dependency. */
- if (inst->DstReg.File == PROGRAM_TEMPORARY &&
- inst->DstReg.Index == inst->SrcReg[j].Index &&
- GET_BIT(inst->DstReg.WriteMask, swz))
+ if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY &&
+ pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index &&
+ GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz))
continue;
- struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++];
+ struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r));
pairinst->NumDependencies++;
t->Value[swz]->NumReaders++;
- r->IP = ip;
+ r->Reader = pairinst;
r->Next = t->Value[swz]->Readers;
t->Value[swz]->Readers = r;
}
}
}
- int ndst = _mesa_num_inst_dst_regs(inst->Opcode);
+ int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode);
if (ndst) {
struct pair_register_translation *t =
- get_register(s, inst->DstReg.File, inst->DstReg.Index);
+ get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index);
if (t) {
t->RefCount++;
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) {
int j;
for(j = 0; j < 4; ++j) {
- if (!GET_BIT(inst->DstReg.WriteMask, j))
+ if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j))
continue;
- struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++];
- v->IP = ip;
+ struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v));
+ memset(v, 0, sizeof(struct reg_value));
+ v->Writer = pairinst;
if (t->Value[j]) {
pairinst->NumDependencies++;
t->Value[j]->Next = v;
@@ -437,7 +415,7 @@ static void scan_instructions(struct pair_state *s)
_mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);
if (!pairinst->NumDependencies)
- instruction_ready(s, ip);
+ instruction_ready(s, pairinst);
}
/* Clear the PROGRAM_TEMPORARY state */
@@ -449,70 +427,23 @@ static void scan_instructions(struct pair_state *s)
}
-/**
- * Reserve hardware temporary registers for the program inputs.
- *
- * @note This allocation is performed explicitly, because the order of inputs
- * is determined by the RS hardware.
- */
-static void allocate_input_registers(struct pair_state *s)
-{
- GLuint InputsRead = s->Program->InputsRead;
- int i;
- GLuint hwindex = 0;
-
- /* Primary colour */
- if (InputsRead & FRAG_BIT_COL0)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++);
- InputsRead &= ~FRAG_BIT_COL0;
-
- /* Secondary color */
- if (InputsRead & FRAG_BIT_COL1)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++);
- InputsRead &= ~FRAG_BIT_COL1;
-
- /* Texcoords */
- for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {
- if (InputsRead & (FRAG_BIT_TEX0 << i))
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
- }
- InputsRead &= ~FRAG_BITS_TEX_ANY;
-
- /* Fogcoords treated as a texcoord */
- if (InputsRead & FRAG_BIT_FOGC)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++);
- InputsRead &= ~FRAG_BIT_FOGC;
-
- /* fragment position treated as a texcoord */
- if (InputsRead & FRAG_BIT_WPOS)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
- InputsRead &= ~FRAG_BIT_WPOS;
-
- /* Anything else */
- if (InputsRead)
- error("Don't know how to handle inputs 0x%x\n", InputsRead);
-}
-
-
-static void decrement_dependencies(struct pair_state *s, int ip)
+static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
ASSERT(pairinst->NumDependencies > 0);
if (!--pairinst->NumDependencies)
- instruction_ready(s, ip);
+ instruction_ready(s, pairinst);
}
/**
* Update the dependency tracking state based on what the instruction
* at the given IP does.
*/
-static void commit_instruction(struct pair_state *s, int ip)
+static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst)
{
- struct prog_instruction *inst = s->Program->Instructions + ip;
- struct pair_state_instruction *pairinst = s->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
if (s->Verbose)
- _mesa_printf("commit_instruction(%i)\n", ip);
+ _mesa_printf("commit_instruction(%i)\n", pairinst->IP);
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
struct pair_register_translation *t = &s->Temps[inst->DstReg.Index];
@@ -527,11 +458,11 @@ static void commit_instruction(struct pair_state *s, int ip)
if (t->Value[i]->NumReaders) {
struct reg_value_reader *r;
for(r = pairinst->Values[i]->Readers; r; r = r->Next)
- decrement_dependencies(s, r->IP);
+ decrement_dependencies(s, r->Reader);
} else if (t->Value[i]->Next) {
/* This happens when the only reader writes
* the register at the same time */
- decrement_dependencies(s, t->Value[i]->Next->IP);
+ decrement_dependencies(s, t->Value[i]->Next->Writer);
}
}
}
@@ -565,7 +496,7 @@ static void commit_instruction(struct pair_state *s, int ip)
if (!--t->Value[swz]->NumReaders) {
if (t->Value[swz]->Next)
- decrement_dependencies(s, t->Value[swz]->Next->IP);
+ decrement_dependencies(s, t->Value[swz]->Next->Writer);
}
}
}
@@ -596,38 +527,52 @@ static void emit_all_tex(struct pair_state *s)
// Allocate destination hardware registers in one block to avoid conflicts.
for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- int ip = pairinst - s->Instructions;
- struct prog_instruction *inst = s->Program->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
if (inst->Opcode != OPCODE_KIL)
get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
}
- if (s->Debug)
+ if (s->Compiler->Base.Debug)
_mesa_printf(" BEGIN_TEX\n");
if (s->Handler->BeginTexBlock)
- s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData);
+ s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData);
for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- int ip = pairinst - s->Instructions;
- struct prog_instruction *inst = s->Program->Instructions + ip;
- commit_instruction(s, ip);
+ struct prog_instruction *inst = &pairinst->Instruction;
+ commit_instruction(s, pairinst);
- if (inst->Opcode == OPCODE_KIL)
- --s->NumKillInsts;
- else
+ if (inst->Opcode != OPCODE_KIL)
inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
-
inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
- if (s->Debug) {
+ if (s->Compiler->Base.Debug) {
_mesa_printf(" ");
_mesa_print_instruction(inst);
+ fflush(stderr);
+ }
+
+ struct radeon_pair_texture_instruction rpti;
+
+ switch(inst->Opcode) {
+ case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break;
+ case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break;
+ case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break;
+ default:
+ case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break;
}
- s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst);
+
+ rpti.DestIndex = inst->DstReg.Index;
+ rpti.WriteMask = inst->DstReg.WriteMask;
+ rpti.TexSrcUnit = inst->TexSrcUnit;
+ rpti.TexSrcTarget = inst->TexSrcTarget;
+ rpti.SrcIndex = inst->SrcReg[0].Index;
+ rpti.SrcSwizzle = inst->SrcReg[0].Swizzle;
+
+ s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti);
}
- if (s->Debug)
+ if (s->Compiler->Base.Debug)
_mesa_printf(" END_TEX\n");
}
@@ -650,7 +595,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
index = get_hw_reg(s, src.File, src.Index);
} else {
constant = 1;
- s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index);
+ index = src.Index;
}
for(i = 0; i < 3; ++i) {
@@ -697,10 +642,12 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
* Fill the given ALU instruction's opcodes and source operands into the given pair,
* if possible.
*/
-static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
+static GLboolean fill_instruction_into_pair(
+ struct pair_state *s,
+ struct radeon_pair_instruction *pair,
+ struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
- struct prog_instruction *inst = s->Program->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP);
ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP);
@@ -781,16 +728,18 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_
* we are absolutely certain that we're going to emit a certain
* instruction pairing.
*/
-static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
+static void fill_dest_into_pair(
+ struct pair_state *s,
+ struct radeon_pair_instruction *pair,
+ struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
- struct prog_instruction *inst = s->Program->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
if (inst->DstReg.File == PROGRAM_OUTPUT) {
- if (inst->DstReg.Index == FRAG_RESULT_COLOR) {
+ if (inst->DstReg.Index == s->Compiler->OutputColor) {
pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) {
+ } else if (inst->DstReg.Index == s->Compiler->OutputDepth) {
pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
}
} else {
@@ -817,24 +766,24 @@ static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruc
static void emit_alu(struct pair_state *s)
{
struct radeon_pair_instruction pair;
+ struct pair_state_instruction *psi;
if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
- int ip;
if (s->ReadyFullALU) {
- ip = s->ReadyFullALU - s->Instructions;
+ psi = s->ReadyFullALU;
s->ReadyFullALU = s->ReadyFullALU->NextReady;
} else if (s->ReadyRGB) {
- ip = s->ReadyRGB - s->Instructions;
+ psi = s->ReadyRGB;
s->ReadyRGB = s->ReadyRGB->NextReady;
} else {
- ip = s->ReadyAlpha - s->Instructions;
+ psi = s->ReadyAlpha;
s->ReadyAlpha = s->ReadyAlpha->NextReady;
}
_mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, ip);
- fill_dest_into_pair(s, &pair, ip);
- commit_instruction(s, ip);
+ fill_instruction_into_pair(s, &pair, psi);
+ fill_dest_into_pair(s, &pair, psi);
+ commit_instruction(s, psi);
} else {
struct pair_state_instruction **prgb;
struct pair_state_instruction **palpha;
@@ -843,108 +792,75 @@ static void emit_alu(struct pair_state *s)
* many source slots; try all possible pairings if necessary */
for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
- int rgbip = *prgb - s->Instructions;
- int alphaip = *palpha - s->Instructions;
+ struct pair_state_instruction * psirgb = *prgb;
+ struct pair_state_instruction * psialpha = *palpha;
_mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, rgbip);
- if (!fill_instruction_into_pair(s, &pair, alphaip))
+ fill_instruction_into_pair(s, &pair, psirgb);
+ if (!fill_instruction_into_pair(s, &pair, psialpha))
continue;
*prgb = (*prgb)->NextReady;
*palpha = (*palpha)->NextReady;
- fill_dest_into_pair(s, &pair, rgbip);
- fill_dest_into_pair(s, &pair, alphaip);
- commit_instruction(s, rgbip);
- commit_instruction(s, alphaip);
+ fill_dest_into_pair(s, &pair, psirgb);
+ fill_dest_into_pair(s, &pair, psialpha);
+ commit_instruction(s, psirgb);
+ commit_instruction(s, psialpha);
goto success;
}
}
/* No success in pairing; just take the first RGB instruction */
- int ip = s->ReadyRGB - s->Instructions;
+ psi = s->ReadyRGB;
s->ReadyRGB = s->ReadyRGB->NextReady;
+
_mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, ip);
- fill_dest_into_pair(s, &pair, ip);
- commit_instruction(s, ip);
+ fill_instruction_into_pair(s, &pair, psi);
+ fill_dest_into_pair(s, &pair, psi);
+ commit_instruction(s, psi);
success: ;
}
- if (s->Debug)
+ if (s->Compiler->Base.Debug)
radeonPrintPairInstruction(&pair);
- s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair);
+ s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair);
}
-static GLubyte countKillInsts(struct gl_program *prog)
+/* Callback function for assigning input registers to hardware registers */
+static void alloc_helper(void * data, unsigned input, unsigned hwreg)
{
- GLubyte i, count = 0;
-
- for (i = 0; i < prog->NumInstructions; ++i) {
- if (prog->Instructions[i].Opcode == OPCODE_KIL)
- ++count;
- }
-
- return count;
+ struct pair_state * s = data;
+ alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg);
}
-GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
+void radeonPairProgram(
+ struct r300_fragment_program_compiler * compiler,
const struct radeon_pair_handler* handler, void *userdata)
{
struct pair_state s;
_mesa_bzero(&s, sizeof(s));
- s.Ctx = ctx;
- s.Program = program;
+ s.Compiler = compiler;
s.Handler = handler;
s.UserData = userdata;
- s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
- s.Verbose = GL_FALSE && s.Debug;
- s.NumKillInsts = countKillInsts(program);
-
- s.Instructions = (struct pair_state_instruction*)_mesa_calloc(
- sizeof(struct pair_state_instruction)*s.Program->NumInstructions);
- s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4);
- s.ReaderPool = (struct reg_value_reader*)_mesa_calloc(
- sizeof(struct reg_value_reader)*s.Program->NumInstructions*12);
+ s.Verbose = GL_FALSE && s.Compiler->Base.Debug;
- if (s.Debug)
+ if (s.Compiler->Base.Debug)
_mesa_printf("Emit paired program\n");
scan_instructions(&s);
- allocate_input_registers(&s);
+ s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s);
- while(!s.Error &&
+ while(!s.Compiler->Base.Error &&
(s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
if (s.ReadyTEX)
emit_all_tex(&s);
- if (!s.NumKillInsts) {
- struct pair_state_instruction *pairinst = s.DeferredInsts;
- while (pairinst) {
- if (!pairinst->NeedAlpha)
- add_pairinst_to_list(&s.ReadyRGB, pairinst);
- else if (!pairinst->NeedRGB)
- add_pairinst_to_list(&s.ReadyAlpha, pairinst);
- else
- add_pairinst_to_list(&s.ReadyFullALU, pairinst);
-
- pairinst = pairinst->NextReady;
- }
- s.DeferredInsts = NULL;
- }
-
while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)
emit_alu(&s);
}
- if (s.Debug)
+ if (s.Compiler->Base.Debug)
_mesa_printf(" END\n");
-
- _mesa_free(s.Instructions);
- _mesa_free(s.ValuePool);
- _mesa_free(s.ReaderPool);
-
- return !s.Error;
}
diff --git a/r300/radeon_program_pair.h b/r300/compiler/radeon_program_pair.h
index 4624a24..ff76178 100644
--- a/r300/radeon_program_pair.h
+++ b/r300/compiler/radeon_program_pair.h
@@ -30,6 +30,8 @@
#include "radeon_program.h"
+struct r300_fragment_program_compiler;
+
/**
* Represents a paired instruction, as found in R300 and R500
@@ -82,18 +84,32 @@ struct radeon_pair_instruction {
};
+enum {
+ RADEON_OPCODE_TEX = 0,
+ RADEON_OPCODE_TXB,
+ RADEON_OPCODE_TXP,
+ RADEON_OPCODE_KIL
+};
+
+struct radeon_pair_texture_instruction {
+ GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */
+
+ GLuint DestIndex:8;
+ GLuint WriteMask:4;
+
+ GLuint TexSrcUnit:5;
+ GLuint TexSrcTarget:3;
+
+ GLuint SrcIndex:8;
+ GLuint SrcSwizzle:12;
+};
+
+
/**
*
*/
struct radeon_pair_handler {
/**
- * Fill in the proper hardware index for the given constant register.
- *
- * @return GL_FALSE on error.
- */
- GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex);
-
- /**
* Write a paired instruction to the hardware.
*
* @return GL_FALSE on error.
@@ -107,7 +123,7 @@ struct radeon_pair_handler {
*
* @return GL_FALSE on error.
*/
- GLboolean (*EmitTex)(void*, struct prog_instruction*);
+ GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*);
/**
* Called before a block of contiguous, independent texture
@@ -115,10 +131,11 @@ struct radeon_pair_handler {
*/
GLboolean (*BeginTexBlock)(void*);
- GLuint MaxHwTemps;
+ unsigned MaxHwTemps;
};
-GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
+void radeonPairProgram(
+ struct r300_fragment_program_compiler * compiler,
const struct radeon_pair_handler*, void *userdata);
void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
diff --git a/r300/r300_cmdbuf.c b/r300/r300_cmdbuf.c
index f447275..0fe32a5 100644
--- a/r300/r300_cmdbuf.c
+++ b/r300/r300_cmdbuf.c
@@ -44,247 +44,425 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "drm.h"
#include "radeon_drm.h"
-#include "radeon_ioctl.h"
#include "r300_context.h"
#include "r300_ioctl.h"
#include "radeon_reg.h"
#include "r300_reg.h"
#include "r300_cmdbuf.h"
#include "r300_emit.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_mipmap_tree.h"
#include "r300_state.h"
+#include "radeon_reg.h"
+#include "radeon_queryobj.h"
-// Set this to 1 for extremely verbose debugging of command buffers
-#define DEBUG_CMDBUF 0
-
-/**
- * Send the current command buffer via ioctl to the hardware.
+/** # of dwords reserved for additional instructions that may need to be written
+ * during flushing.
*/
-int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
-{
- int ret;
- int i;
- drm_radeon_cmd_buffer_t cmd;
- int start;
-
- if (r300->radeon.lost_context) {
- start = 0;
- r300->radeon.lost_context = GL_FALSE;
- } else
- start = r300->cmdbuf.count_reemit;
-
- if (RADEON_DEBUG & DEBUG_IOCTL) {
- fprintf(stderr, "%s from %s - %i cliprects\n",
- __FUNCTION__, caller, r300->radeon.numClipRects);
-
- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
- for (i = start; i < r300->cmdbuf.count_used; ++i)
- fprintf(stderr, "%d: %08x\n", i,
- r300->cmdbuf.cmd_buf[i]);
- }
+#define SPACE_FOR_FLUSHING 4
- cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start);
- cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
+static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt)
+{
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ return ((((*pkt) >> 16) & 0x3FFF) + 1);
+ } else {
+ drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt;
+ return t->packet0.count;
+ }
+}
- if (r300->radeon.state.scissor.enabled) {
- cmd.nbox = r300->radeon.state.scissor.numClipRects;
- cmd.boxes =
- (drm_clip_rect_t *) r300->radeon.state.scissor.pClipRects;
- } else {
- cmd.nbox = r300->radeon.numClipRects;
- cmd.boxes = (drm_clip_rect_t *) r300->radeon.pClipRects;
- }
+#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
+#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
- ret = drmCommandWrite(r300->radeon.dri.fd,
- DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
+int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int cnt;
+ int extra = 1;
+ cnt = vpu_count(atom->cmd);
- if (RADEON_DEBUG & DEBUG_SYNC) {
- fprintf(stderr, "Syncing in %s (from %s)\n\n",
- __FUNCTION__, caller);
- radeonWaitForIdleLocked(&r300->radeon);
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ extra = 5;
}
- r300->dma.nr_released_bufs = 0;
- r300->cmdbuf.count_used = 0;
- r300->cmdbuf.count_reemit = 0;
-
- return ret;
+ return cnt ? (cnt * 4) + extra : 0;
}
-int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
-{
- int ret;
- LOCK_HARDWARE(&r300->radeon);
+void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&r300->radeon);
+ drm_r300_cmd_header_t cmd;
+ uint32_t addr, ndw;
- ret = r300FlushCmdBufLocked(r300, caller);
+ cmd.u = atom->cmd[0];
+ addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
+ ndw = atom->check(ctx, atom);
- UNLOCK_HARDWARE(&r300->radeon);
+ BEGIN_BATCH_NO_AUTOSTATE(ndw);
- if (ret) {
- fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret);
- _mesa_exit(ret);
- }
-
- return ret;
+ ndw -= 5;
+ OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr);
+ OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR);
+ OUT_BATCH_TABLE(&atom->cmd[1], ndw);
+ OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+ END_BATCH();
}
-static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state)
+void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom)
{
- int i, j, reg;
- int dwords = (*state->check) (r300, state);
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&r300->radeon);
drm_r300_cmd_header_t cmd;
+ uint32_t addr, ndw, sz;
+ int type, clamp;
+
+ ndw = atom->check(ctx, atom);
+
+ cmd.u = atom->cmd[0];
+ sz = cmd.r500fp.count;
+ addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo;
+ type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
+ clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
+
+ addr |= (type << 16);
+ addr |= (clamp << 17);
+
+ BEGIN_BATCH_NO_AUTOSTATE(ndw);
+ OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0));
+ OUT_BATCH(addr);
+ ndw-=3;
+ OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR);
+ OUT_BATCH_TABLE(&atom->cmd[1], ndw);
+ END_BATCH();
+}
- fprintf(stderr, " emit %s %d/%d\n", state->name, dwords,
- state->cmd_size);
-
- if (RADEON_DEBUG & DEBUG_VERBOSE) {
- for (i = 0; i < dwords;) {
- cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]);
- reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo;
- fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
- state->name, i, reg, cmd.packet0.count);
- ++i;
- for (j = 0; j < cmd.packet0.count; j++) {
- fprintf(stderr, " %s[%d]: 0x%04x = %08x\n",
- state->name, i, reg, state->cmd[i]);
- reg += 4;
- ++i;
- }
- }
+static int check_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
+ int dw = 0, i;
+ if (atom->cmd[0] == CP_PACKET2) {
+ return dw;
+ }
+ for(i = 0; i < numtmus; ++i) {
+ radeonTexObj *t = r300->hw.textures[i];
+ if (!t && !r300->radeon.radeonScreen->kernel_mm) {
+ dw += 0;
+ } else if (t && t->image_override && !t->bo) {
+ if (!r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+ } else
+ dw += 4;
}
+ return dw;
}
-/**
- * Emit all atoms with a dirty field equal to dirty.
- *
- * The caller must have ensured that there is enough space in the command
- * buffer.
- */
-static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
+static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
{
- struct r300_state_atom *atom;
- uint32_t *dest;
- int dwords;
-
- dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
-
- /* Emit WAIT */
- *dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN);
- dest++;
- r300->cmdbuf.count_used++;
-
- /* Emit cache flush */
- *dest = cmdpacket0(R300_TX_INVALTAGS, 1);
- dest++;
- r300->cmdbuf.count_used++;
-
- *dest = R300_TX_FLUSH;
- dest++;
- r300->cmdbuf.count_used++;
-
- /* Emit END3D */
- *dest = cmdpacify();
- dest++;
- r300->cmdbuf.count_used++;
-
- /* Emit actual atoms */
-
- foreach(atom, &r300->hw.atomlist) {
- if ((atom->dirty || r300->hw.all_dirty) == dirty) {
- dwords = (*atom->check) (r300, atom);
- if (dwords) {
- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
- r300PrintStateAtom(r300, atom);
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&r300->radeon);
+ int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
+ int i;
+
+ for(i = 0; i < numtmus; ++i) {
+ radeonTexObj *t = r300->hw.textures[i];
+ if (t && !t->image_override) {
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
+ } else if (!t) {
+ /* Texture unit hasn't a texture bound.
+ * We assign the current color buffer as a fakery to make
+ * KIL work on KMS (without it, the CS checker will complain).
+ */
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&r300->radeon);
+ if (rrb && rrb->bo) {
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH_RELOC(0, rrb->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
}
- memcpy(dest, atom->cmd, dwords * 4);
- dest += dwords;
- r300->cmdbuf.count_used += dwords;
- atom->dirty = GL_FALSE;
+ }
+ } else { /* override cases */
+ if (t->bo) {
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
+ } else if (!r300->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH(t->override_offset);
+ END_BATCH();
} else {
- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
- fprintf(stderr, " skip state %s\n",
- atom->name);
- }
+ /* Texture unit hasn't a texture bound nothings to do */
}
}
}
}
-/**
- * Copy dirty hardware state atoms into the command buffer.
- *
- * We also copy out clean state if we're at the start of a buffer. That makes
- * it easy to recover from lost contexts.
- */
-void r300EmitState(r300ContextPtr r300)
+void r300_emit_scissor(GLcontext *ctx)
{
- if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS))
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- if (r300->cmdbuf.count_used && !r300->hw.is_dirty
- && !r300->hw.all_dirty)
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&r300->radeon);
+ unsigned x1, y1, x2, y2;
+ struct radeon_renderbuffer *rrb;
+
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ return;
+ }
+ rrb = radeon_get_colorbuffer(&r300->radeon);
+ if (!rrb || !rrb->bo) {
+ fprintf(stderr, "no rrb\n");
return;
+ }
+ if (r300->radeon.state.scissor.enabled) {
+ x1 = r300->radeon.state.scissor.rect.x1;
+ y1 = r300->radeon.state.scissor.rect.y1;
+ x2 = r300->radeon.state.scissor.rect.x2;
+ y2 = r300->radeon.state.scissor.rect.y2;
+ } else {
+ x1 = 0;
+ y1 = 0;
+ x2 = rrb->base.Width - 1;
+ y2 = rrb->base.Height - 1;
+ }
+ if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+ x1 += R300_SCISSORS_OFFSET;
+ y1 += R300_SCISSORS_OFFSET;
+ x2 += R300_SCISSORS_OFFSET;
+ y2 += R300_SCISSORS_OFFSET;
+ }
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
+ OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT));
+ OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT));
+ END_BATCH();
+}
+static int check_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ uint32_t dw = 6 + 3 + 16;
+ if (r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ dw -= 3 + 16;
+ }
+ return dw;
+}
- /* To avoid going across the entire set of states multiple times, just check
- * for enough space for the case of emitting all state, and inline the
- * r300AllocCmdBuf code here without all the checks.
- */
- r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__);
+static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&r300->radeon);
+ struct radeon_renderbuffer *rrb;
+ uint32_t cbpitch;
+ uint32_t offset = r300->radeon.state.color.draw_offset;
+ uint32_t dw = 6;
+ int i;
- if (!r300->cmdbuf.count_used) {
- if (RADEON_DEBUG & DEBUG_STATE)
- fprintf(stderr, "Begin reemit state\n");
+ rrb = radeon_get_colorbuffer(&r300->radeon);
+ if (!rrb || !rrb->bo) {
+ fprintf(stderr, "no rrb\n");
+ return;
+ }
- r300EmitAtoms(r300, GL_FALSE);
- r300->cmdbuf.count_reemit = r300->cmdbuf.count_used;
+ if (RADEON_DEBUG & RADEON_STATE)
+ fprintf(stderr,"rrb is %p %d %dx%d\n", rrb, offset, rrb->base.Width, rrb->base.Height);
+ cbpitch = (rrb->pitch / rrb->cpp);
+ if (rrb->cpp == 4)
+ cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+ else switch (rrb->base._ActualFormat) {
+ case GL_RGB5:
+ cbpitch |= R300_COLOR_FORMAT_RGB565;
+ break;
+ case GL_RGBA4:
+ cbpitch |= R300_COLOR_FORMAT_ARGB4444;
+ break;
+ case GL_RGB5_A1:
+ cbpitch |= R300_COLOR_FORMAT_ARGB1555;
+ break;
}
- if (RADEON_DEBUG & DEBUG_STATE)
- fprintf(stderr, "Begin dirty state\n");
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ cbpitch |= R300_COLOR_TILE_ENABLE;
+
+ if (r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+ BEGIN_BATCH_NO_AUTOSTATE(dw);
+ OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
+ OUT_BATCH_RELOC(offset, rrb->bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
+ if (!r300->radeon.radeonScreen->kernel_mm)
+ OUT_BATCH(cbpitch);
+ else
+ OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+ if (r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
+ OUT_BATCH(0);
+ OUT_BATCH(((rrb->base.Width - 1) << R300_SCISSORS_X_SHIFT) |
+ ((rrb->base.Height - 1) << R300_SCISSORS_Y_SHIFT));
+ END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(16);
+ for (i = 0; i < 4; i++) {
+ OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
+ OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT));
+ OUT_BATCH(((rrb->base.Width - 1) << R300_CLIPRECT_X_SHIFT) | ((rrb->base.Height - 1) << R300_CLIPRECT_Y_SHIFT));
+ }
+ OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1);
+ OUT_BATCH(0xAAAA);
+ OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1);
+ OUT_BATCH(0xffffff);
+ END_BATCH();
+ } else {
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
+ OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) |
+ (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT));
+ OUT_BATCH(((rrb->base.Width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) |
+ ((rrb->base.Height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT));
+ END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(16);
+ for (i = 0; i < 4; i++) {
+ OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
+ OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT));
+ OUT_BATCH(((R300_SCISSORS_OFFSET + rrb->base.Width - 1) << R300_CLIPRECT_X_SHIFT) |
+ ((R300_SCISSORS_OFFSET + rrb->base.Height - 1) << R300_CLIPRECT_Y_SHIFT));
+ }
+ OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1);
+ OUT_BATCH(0xAAAA);
+ OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1);
+ OUT_BATCH(0xffffff);
+ END_BATCH();
+ }
+ }
+}
+
+static int check_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ uint32_t dw;
+ dw = 6;
+ if (r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+ return dw;
+}
+
+static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&r300->radeon);
+ struct radeon_renderbuffer *rrb;
+ uint32_t zbpitch;
+ uint32_t dw = atom->check(ctx, atom);
+
+ rrb = radeon_get_depthbuffer(&r300->radeon);
+ if (!rrb)
+ return;
+
+ zbpitch = (rrb->pitch / rrb->cpp);
+ if (!r300->radeon.radeonScreen->kernel_mm) {
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+ zbpitch |= R300_DEPTHMACROTILE_ENABLE;
+ }
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
+ zbpitch |= R300_DEPTHMICROTILE_TILED;
+ }
+ }
- r300EmitAtoms(r300, GL_TRUE);
+ BEGIN_BATCH_NO_AUTOSTATE(dw);
+ OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
+ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1);
+ if (!r300->radeon.radeonScreen->kernel_mm)
+ OUT_BATCH(zbpitch);
+ else
+ OUT_BATCH_RELOC(cbpitch, rrb->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+}
- assert(r300->cmdbuf.count_used < r300->cmdbuf.size);
+static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&r300->radeon);
+ struct radeon_renderbuffer *rrb;
+ uint32_t format = 0;
+
+ rrb = radeon_get_depthbuffer(&r300->radeon);
+ if (!rrb)
+ format = 0;
+ else {
+ if (rrb->cpp == 2)
+ format = R300_DEPTHFORMAT_16BIT_INT_Z;
+ else if (rrb->cpp == 4)
+ format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ }
- r300->hw.is_dirty = GL_FALSE;
- r300->hw.all_dirty = GL_FALSE;
+ BEGIN_BATCH_NO_AUTOSTATE(atom->cmd_size);
+ OUT_BATCH(atom->cmd[0]);
+ atom->cmd[1] &= ~0xf;
+ atom->cmd[1] |= format;
+ OUT_BATCH(atom->cmd[1]);
+ OUT_BATCH(atom->cmd[2]);
+ OUT_BATCH(atom->cmd[3]);
+ OUT_BATCH(atom->cmd[4]);
+ END_BATCH();
}
-#define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count)
-#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
-#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
+static int check_never(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ return 0;
+}
-static int check_always(r300ContextPtr r300, struct r300_state_atom *atom)
+static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
{
return atom->cmd_size;
}
-static int check_variable(r300ContextPtr r300, struct r300_state_atom *atom)
+static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom)
{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
int cnt;
- cnt = packet0_count(atom->cmd);
+ if (atom->cmd[0] == CP_PACKET2) {
+ return 0;
+ }
+ cnt = packet0_count(r300, atom->cmd);
return cnt ? cnt + 1 : 0;
}
-static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom)
+int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom)
{
int cnt;
- cnt = vpu_count(atom->cmd);
- return cnt ? (cnt * 4) + 1 : 0;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int extra = 1;
+ cnt = r500fp_count(atom->cmd);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ extra = 3;
+
+ return cnt ? (cnt * 6) + extra : 0;
}
-static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom)
+int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom)
{
int cnt;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int extra = 1;
cnt = r500fp_count(atom->cmd);
- return cnt ? (cnt * 6) + 1 : 0;
-}
+ if (r300->radeon.radeonScreen->kernel_mm)
+ extra = 3;
-static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
-{
- int cnt;
cnt = r500fp_count(atom->cmd);
- return cnt ? (cnt * 4) + 1 : 0;
+ return cnt ? (cnt * 4) + extra : 0;
}
#define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \
@@ -295,8 +473,8 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
r300->hw.ATOM.idx = (IDX); \
r300->hw.ATOM.check = check_##CHK; \
r300->hw.ATOM.dirty = GL_FALSE; \
- r300->hw.max_state_size += (SZ); \
- insert_at_tail(&r300->hw.atomlist, &r300->hw.ATOM); \
+ r300->radeon.hw.max_state_size += (SZ); \
+ insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM); \
} while (0)
/**
* Allocate memory for the command buffer and initialize the state atom
@@ -304,251 +482,306 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
*/
void r300InitCmdBuf(r300ContextPtr r300)
{
- int size, mtu;
- int has_tcl = 1;
+ int mtu;
+ int has_tcl;
int is_r500 = 0;
- int i;
- if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- has_tcl = 0;
+ has_tcl = r300->options.hw_tcl_enabled;
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
is_r500 = 1;
- r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */
+ r300->radeon.hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */
mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
- if (RADEON_DEBUG & DEBUG_TEXTURE) {
+ if (RADEON_DEBUG & RADEON_TEXTURE) {
fprintf(stderr, "Using %d maximum texture units..\n", mtu);
}
/* Setup the atom linked list */
- make_empty_list(&r300->hw.atomlist);
- r300->hw.atomlist.name = "atom-list";
+ make_empty_list(&r300->radeon.hw.atomlist);
+ r300->radeon.hw.atomlist.name = "atom-list";
/* Initialize state atoms */
ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0);
- r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6);
+ r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6);
ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0);
- r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1);
+ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0;
- r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1);
- if (is_r500) {
+ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1);
+ if (is_r500 && !r300->radeon.radeonScreen->kernel_mm) {
ALLOC_STATE(vap_index_offset, always, 2, 0);
- r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1);
+ r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1);
r300->hw.vap_index_offset.cmd[1] = 0;
}
ALLOC_STATE(vte, always, 3, 0);
- r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2);
+ r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2);
ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0);
- r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(R300_VAP_VF_MAX_VTX_INDX, 2);
+ r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2);
ALLOC_STATE(vap_cntl_status, always, 2, 0);
- r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1);
+ r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1);
ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0);
r300->hw.vir[0].cmd[R300_VIR_CMD_0] =
- cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1);
+ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1);
ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1);
r300->hw.vir[1].cmd[R300_VIR_CMD_0] =
- cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
+ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0);
- r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2);
+ r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2);
ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0);
- r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
+ r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
if (has_tcl) {
ALLOC_STATE(vap_clip_cntl, always, 2, 0);
- r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1);
+ r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1);
ALLOC_STATE(vap_clip, always, 5, 0);
- r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4);
+ r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4);
ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0);
- r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1);
+ r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1);
}
ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0);
r300->hw.vof.cmd[R300_VOF_CMD_0] =
- cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2);
+ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2);
if (has_tcl) {
ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0);
r300->hw.pvs.cmd[R300_PVS_CMD_0] =
- cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3);
+ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3);
}
ALLOC_STATE(gb_enable, always, 2, 0);
- r300->hw.gb_enable.cmd[0] = cmdpacket0(R300_GB_ENABLE, 1);
- ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
- r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5);
+ r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1);
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
+ } else {
+ ALLOC_STATE(gb_misc, never, R300_GB_MISC_CMDSIZE, 0);
+ }
+ r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 3);
+ ALLOC_STATE(gb_misc2, always, R300_GB_MISC2_CMDSIZE, 0);
+ r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2);
ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
- r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1);
+ r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1);
ALLOC_STATE(ga_point_s0, always, 5, 0);
- r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4);
+ r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4);
ALLOC_STATE(ga_triangle_stipple, always, 2, 0);
- r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1);
+ r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1);
ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0);
- r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1);
+ r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1);
ALLOC_STATE(ga_point_minmax, always, 4, 0);
- r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3);
+ r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3);
ALLOC_STATE(lcntl, always, 2, 0);
- r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1);
+ r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1);
ALLOC_STATE(ga_line_stipple, always, 4, 0);
- r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3);
- ALLOC_STATE(shade, always, 5, 0);
- r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4);
+ r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3);
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ ALLOC_STATE(shade, always, 2, 0);
+ } else {
+ ALLOC_STATE(shade, never, 2, 0);
+ }
+ r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 1);
+ ALLOC_STATE(shade2, always, 4, 0);
+ r300->hw.shade2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4278, 3);
ALLOC_STATE(polygon_mode, always, 4, 0);
- r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3);
+ r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3);
ALLOC_STATE(fogp, always, 3, 0);
- r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2);
+ r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2);
ALLOC_STATE(zbias_cntl, always, 2, 0);
- r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1);
+ r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1);
ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0);
r300->hw.zbs.cmd[R300_ZBS_CMD_0] =
- cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
ALLOC_STATE(occlusion_cntl, always, 2, 0);
- r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1);
+ r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1);
ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0);
- r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1);
+ r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1);
ALLOC_STATE(su_depth_scale, always, 3, 0);
- r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2);
+ r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2);
ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0);
- r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2);
+ r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2);
if (is_r500) {
- ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0);
- r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16);
- for (i = 0; i < 8; i++) {
- r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] =
- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
- (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
- }
+ ALLOC_STATE(ri, variable, R500_RI_CMDSIZE, 0);
+ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16);
ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1);
} else {
- ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0);
- r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8);
+ ALLOC_STATE(ri, variable, R300_RI_CMDSIZE, 0);
+ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8);
ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1);
}
ALLOC_STATE(sc_hyperz, always, 3, 0);
- r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2);
+ r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2);
ALLOC_STATE(sc_screendoor, always, 2, 0);
- r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1);
+ r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
ALLOC_STATE(us_out_fmt, always, 6, 0);
- r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5);
+ r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5);
if (is_r500) {
ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0);
- r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2);
+ r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2);
r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO;
- r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3);
- r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1);
+ r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3);
+ r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1);
r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */
ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0);
- r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0);
+ r300->hw.r500fp.cmd[R300_FPI_CMD_0] =
+ cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.r500fp.emit = emit_r500fp;
+
ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0);
- r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0);
+ r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] =
+ cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.r500fp_const.emit = emit_r500fp;
} else {
ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
- r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3);
- r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4);
+ r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3);
+ r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4);
+
ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
- r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0);
+ r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0);
ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
- r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1);
+ r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1);
ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
- r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1);
+ r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1);
ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
- r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1);
+ r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1);
ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
- r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1);
+ r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1);
ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
- r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0);
+ r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0);
}
ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0);
- r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1);
+ r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1);
ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0);
- r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3);
+ r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3);
ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0);
- r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2);
+ r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2);
ALLOC_STATE(fg_depth_src, always, 2, 0);
- r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1);
+ r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1);
ALLOC_STATE(rb3d_cctl, always, 2, 0);
- r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1);
+ r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1);
ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0);
- r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2);
+ r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2);
ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0);
- r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(RB3D_COLOR_CHANNEL_MASK, 1);
+ r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1);
if (is_r500) {
ALLOC_STATE(blend_color, always, 3, 0);
- r300->hw.blend_color.cmd[0] = cmdpacket0(R500_RB3D_CONSTANT_COLOR_AR, 2);
+ r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2);
} else {
ALLOC_STATE(blend_color, always, 2, 0);
- r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 1);
+ r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1);
}
ALLOC_STATE(rop, always, 2, 0);
- r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1);
- ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
- r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
- r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
+ r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1);
+ ALLOC_STATE(cb, cb_offset, R300_CB_CMDSIZE, 0);
+ r300->hw.cb.emit = &emit_cb_offset;
ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
- r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9);
+ r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
- r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1);
- ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
- r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
+ r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
+ if ((r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ||
+ ( !r300->radeon.radeonScreen->kernel_mm && (
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS400) ||
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) ) ) ) {
+ ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
+ } else {
+ ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, never, 3, 0);
+ }
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
r300->hw.zs.cmd[R300_ZS_CMD_0] =
- cmdpacket0(R300_ZB_CNTL, 3);
+ cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
+
ALLOC_STATE(zstencil_format, always, 5, 0);
r300->hw.zstencil_format.cmd[0] =
- cmdpacket0(R300_ZB_FORMAT, 4);
- ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
- r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2);
+ cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4);
+ r300->hw.zstencil_format.emit = emit_zstencil_format;
+
+ ALLOC_STATE(zb, zb_offset, R300_ZB_CMDSIZE, 0);
+ r300->hw.zb.emit = emit_zb_offset;
ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
- r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1);
- ALLOC_STATE(unk4F30, always, 3, 0);
- r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2);
+ r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1);
+ ALLOC_STATE(zb_zmask, always, 3, 0);
+ r300->hw.zb_zmask.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZMASK_OFFSET, 2);
ALLOC_STATE(zb_hiz_offset, always, 2, 0);
- r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1);
+ r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1);
ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
- r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1);
+ r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1);
/* VPU only on TCL */
if (has_tcl) {
- int i;
+ int i;
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE(vap_flush, always, 10, 0);
+ /* flush processing vertices */
+ r300->hw.vap_flush.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
+ r300->hw.vap_flush.cmd[1] = 0;
+ r300->hw.vap_flush.cmd[2] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DSTCACHE_CTLSTAT, 1);
+ r300->hw.vap_flush.cmd[3] = R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D;
+ r300->hw.vap_flush.cmd[4] = cmdpacket0(r300->radeon.radeonScreen, RADEON_WAIT_UNTIL, 1);
+ r300->hw.vap_flush.cmd[5] = RADEON_WAIT_3D_IDLECLEAN;
+ r300->hw.vap_flush.cmd[6] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
+ r300->hw.vap_flush.cmd[7] = 0xffffff;
+ r300->hw.vap_flush.cmd[8] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
+ r300->hw.vap_flush.cmd[9] = 0;
+ } else {
+ ALLOC_STATE(vap_flush, never, 10, 0);
+ }
+
+
ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
- r300->hw.vpi.cmd[R300_VPI_CMD_0] =
- cmdvpu(R300_PVS_CODE_START, 0);
+ r300->hw.vpi.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpi.emit = emit_vpu;
if (is_r500) {
- ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
- r300->hw.vpp.cmd[R300_VPP_CMD_0] =
- cmdvpu(R500_PVS_CONST_START, 0);
-
- ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
- r300->hw.vps.cmd[R300_VPS_CMD_0] =
- cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1);
+ ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+ r300->hw.vpp.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpp.emit = emit_vpu;
+
+ ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+ r300->hw.vps.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vps.emit = emit_vpu;
for (i = 0; i < 6; i++) {
ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
- r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
- cmdvpu(R500_PVS_UCP_START + i, 1);
+ r300->hw.vpucp[i].cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen,
+ R500_PVS_UCP_START + i, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpucp[i].emit = emit_vpu;
}
} else {
- ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
- r300->hw.vpp.cmd[R300_VPP_CMD_0] =
- cmdvpu(R300_PVS_CONST_START, 0);
-
- ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
- r300->hw.vps.cmd[R300_VPS_CMD_0] =
- cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1);
+ ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+ r300->hw.vpp.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpp.emit = emit_vpu;
+
+ ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+ r300->hw.vps.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vps.emit = emit_vpu;
for (i = 0; i < 6; i++) {
ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
- r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
- cmdvpu(R300_PVS_UCP_START + i, 1);
+ r300->hw.vpucp[i].cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen,
+ R300_PVS_UCP_START + i, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpucp[i].emit = emit_vpu;
}
}
}
@@ -556,130 +789,48 @@ void r300InitCmdBuf(r300ContextPtr r300)
/* Textures */
ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_FILTER0_0, 0);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0);
ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_FILTER1_0, 0);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0);
ALLOC_STATE(tex.size, variable, mtu + 1, 0);
- r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0);
+ r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0);
ALLOC_STATE(tex.format, variable, mtu + 1, 0);
r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_FORMAT_0, 0);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0);
ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
- r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0);
+ r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0);
- ALLOC_STATE(tex.offset, variable, mtu + 1, 0);
+ ALLOC_STATE(tex.offset, tex_offsets, 1, 0);
r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_OFFSET_0, 0);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0);
+ r300->hw.tex.offset.emit = &emit_tex_offsets;
ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_CHROMA_KEY_0, 0);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0);
ALLOC_STATE(tex.border_color, variable, mtu + 1, 0);
r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_BORDER_COLOR_0, 0);
-
- r300->hw.is_dirty = GL_TRUE;
- r300->hw.all_dirty = GL_TRUE;
-
- /* Initialize command buffer */
- size =
- 256 * driQueryOptioni(&r300->radeon.optionCache,
- "command_buffer_size");
- if (size < 2 * r300->hw.max_state_size) {
- size = 2 * r300->hw.max_state_size + 65535;
- }
- if (size > 64 * 256)
- size = 64 * 256;
-
- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) {
- fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n",
- sizeof(drm_r300_cmd_header_t));
- fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n",
- sizeof(drm_radeon_cmd_buffer_t));
- fprintf(stderr,
- "Allocating %d bytes command buffer (max state is %d bytes)\n",
- size * 4, r300->hw.max_state_size * 4);
- }
-
- r300->cmdbuf.size = size;
- r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4);
- r300->cmdbuf.count_used = 0;
- r300->cmdbuf.count_reemit = 0;
-}
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0);
-/**
- * Destroy the command buffer and state atoms.
- */
-void r300DestroyCmdBuf(r300ContextPtr r300)
-{
- struct r300_state_atom *atom;
-
- FREE(r300->cmdbuf.cmd_buf);
-
- foreach(atom, &r300->hw.atomlist) {
- FREE(atom->cmd);
+ radeon_init_query_stateobj(&r300->radeon, R300_QUERYOBJ_CMDSIZE);
+ if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) {
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RV530_FG_ZBREG_DEST, 1);
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL;
+ } else {
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_REG_DEST, 1);
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = R300_RASTER_PIPE_SELECT_ALL;
}
-}
-
-void r300EmitBlit(r300ContextPtr rmesa,
- GLuint color_fmt,
- GLuint src_pitch,
- GLuint src_offset,
- GLuint dst_pitch,
- GLuint dst_offset,
- GLint srcx, GLint srcy,
- GLint dstx, GLint dsty, GLuint w, GLuint h)
-{
- drm_r300_cmd_header_t *cmd;
-
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr,
- "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
- __FUNCTION__, src_pitch, src_offset, srcx, srcy,
- dst_pitch, dst_offset, dstx, dsty, w, h);
-
- assert((src_pitch & 63) == 0);
- assert((dst_pitch & 63) == 0);
- assert((src_offset & 1023) == 0);
- assert((dst_offset & 1023) == 0);
- assert(w < (1 << 16));
- assert(h < (1 << 16));
-
- cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__);
-
- cmd[0].header.cmd_type = R300_CMD_PACKET3;
- cmd[0].header.pad0 = R300_CMD_PACKET3_RAW;
- cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16);
- cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
- RADEON_GMC_DST_PITCH_OFFSET_CNTL |
- RADEON_GMC_BRUSH_NONE |
- (color_fmt << 8) |
- RADEON_GMC_SRC_DATATYPE_COLOR |
- RADEON_ROP3_S |
- RADEON_DP_SRC_SOURCE_MEMORY |
- RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
-
- cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10);
- cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
- cmd[5].u = (srcx << 16) | srcy;
- cmd[6].u = (dstx << 16) | dsty; /* dst */
- cmd[7].u = (w << 16) | h;
-}
-
-void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
-{
- drm_r300_cmd_header_t *cmd;
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZPASS_DATA, 1);
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_1] = 0;
- assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D)));
+ r300->radeon.hw.is_dirty = GL_TRUE;
+ r300->radeon.hw.all_dirty = GL_TRUE;
- cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
- cmd[0].u = 0;
- cmd[0].wait.cmd_type = R300_CMD_WAIT;
- cmd[0].wait.flags = flags;
+ rcommonInitCmdBuf(&r300->radeon);
}
diff --git a/r300/r300_cmdbuf.h b/r300/r300_cmdbuf.h
index a8eaa58..1b703e5 100644
--- a/r300/r300_cmdbuf.h
+++ b/r300/r300_cmdbuf.h
@@ -38,79 +38,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_context.h"
-extern int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller);
-extern int r300FlushCmdBuf(r300ContextPtr r300, const char *caller);
-
-extern void r300EmitState(r300ContextPtr r300);
+#define CACHE_FLUSH_BUFSZ (4*2)
+#define PRE_EMIT_STATE_BUFSZ (2+2)
+#define AOS_BUFSZ(nr) (3+(nr >>1)*3 + (nr&1)*2 + (nr*2))
+#define FIREAOS_BUFSZ (3)
+#define SCISSORS_BUFSZ (3)
extern void r300InitCmdBuf(r300ContextPtr r300);
-extern void r300DestroyCmdBuf(r300ContextPtr r300);
-
-/**
- * Make sure that enough space is available in the command buffer
- * by flushing if necessary.
- *
- * \param dwords The number of dwords we need to be free on the command buffer
- */
-static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300,
- int dwords, const char *caller)
-{
- assert(dwords < r300->cmdbuf.size);
-
- if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size)
- r300FlushCmdBuf(r300, caller);
-}
-
-/**
- * Allocate the given number of dwords in the command buffer and return
- * a pointer to the allocated area.
- * When necessary, these functions cause a flush. r300AllocCmdBuf() also
- * causes state reemission after a flush. This is necessary to ensure
- * correct hardware state after an unlock.
- */
-static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
- int dwords, const char *caller)
-{
- uint32_t *ptr;
-
- r300EnsureCmdBufSpace(r300, dwords, caller);
-
- ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
- r300->cmdbuf.count_used += dwords;
- return ptr;
-}
-
-static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300,
- int dwords, const char *caller)
-{
- uint32_t *ptr;
-
- r300EnsureCmdBufSpace(r300, dwords, caller);
-
- if (!r300->cmdbuf.count_used) {
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr,
- "Reemit state after flush (from %s)\n", caller);
- r300EmitState(r300);
- }
-
- ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
- r300->cmdbuf.count_used += dwords;
- return ptr;
-}
+void r300_emit_scissor(GLcontext *ctx);
-extern void r300EmitBlit(r300ContextPtr rmesa,
- GLuint color_fmt,
- GLuint src_pitch,
- GLuint src_offset,
- GLuint dst_pitch,
- GLuint dst_offset,
- GLint srcx, GLint srcy,
- GLint dstx, GLint dsty, GLuint w, GLuint h);
+void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom);
+int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom);
-extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags);
-extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start);
-extern void r300EmitVertexShader(r300ContextPtr rmesa);
-extern void r300EmitPixelShader(r300ContextPtr rmesa);
+void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom);
+int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom);
+int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom);
#endif /* __R300_CMDBUF_H__ */
diff --git a/r300/r300_context.c b/r300/r300_context.c
index 7d67050..2ea1b82 100644
--- a/r300/r300_context.c
+++ b/r300/r300_context.c
@@ -43,8 +43,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/matrix.h"
#include "main/extensions.h"
#include "main/state.h"
-#include "main/texobj.h"
#include "main/bufferobj.h"
+#include "main/texobj.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
@@ -56,46 +56,47 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "drivers/common/driverfuncs.h"
-#include "radeon_ioctl.h"
-#include "radeon_span.h"
#include "r300_context.h"
+#include "radeon_context.h"
+#include "radeon_span.h"
#include "r300_cmdbuf.h"
#include "r300_state.h"
#include "r300_ioctl.h"
#include "r300_tex.h"
#include "r300_emit.h"
#include "r300_swtcl.h"
-
-#ifdef USER_BUFFERS
-#include "r300_mem.h"
-#endif
+#include "radeon_bocs_wrapper.h"
+#include "radeon_buffer_objects.h"
+#include "radeon_queryobj.h"
#include "vblank.h"
#include "utils.h"
#include "xmlpool.h" /* for symbolic values of enum-type options */
-/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */
-int future_hw_tcl_on = 1;
-int hw_tcl_on = 1;
-
#define need_GL_VERSION_2_0
+#define need_GL_ARB_occlusion_query
#define need_GL_ARB_point_parameters
#define need_GL_ARB_vertex_program
#define need_GL_EXT_blend_equation_separate
#define need_GL_EXT_blend_func_separate
#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_framebuffer_blit
+#define need_GL_EXT_framebuffer_object
#define need_GL_EXT_fog_coord
#define need_GL_EXT_gpu_program_parameters
#define need_GL_EXT_secondary_color
#define need_GL_EXT_stencil_two_side
#define need_GL_ATI_separate_stencil
#define need_GL_NV_vertex_program
+
#include "extension_helper.h"
+
const struct dri_extension card_extensions[] = {
/* *INDENT-OFF* */
{"GL_ARB_depth_texture", NULL},
{"GL_ARB_fragment_program", NULL},
+ {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions},
{"GL_ARB_multitexture", NULL},
{"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
{"GL_ARB_shadow", NULL},
@@ -112,6 +113,7 @@ const struct dri_extension card_extensions[] = {
{"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
{"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
{"GL_EXT_blend_subtract", NULL},
+ {"GL_EXT_packed_depth_stencil", NULL},
{"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
{"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
{"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
@@ -125,6 +127,8 @@ const struct dri_extension card_extensions[] = {
{"GL_EXT_texture_lod_bias", NULL},
{"GL_EXT_texture_mirror_clamp", NULL},
{"GL_EXT_texture_rectangle", NULL},
+ {"GL_EXT_texture_sRGB", NULL},
+ {"GL_EXT_vertex_array_bgra", NULL},
{"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions},
{"GL_ATI_texture_env_combine3", NULL},
{"GL_ATI_texture_mirror_once", NULL},
@@ -139,6 +143,12 @@ const struct dri_extension card_extensions[] = {
};
+const struct dri_extension mm_extensions[] = {
+ { "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions },
+ { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+ { NULL, NULL }
+};
+
/**
* The GL 2.0 functions are needed to make display lists work with
* functions added by GL_ATI_separate_stencil.
@@ -147,16 +157,7 @@ const struct dri_extension gl_20_extension[] = {
{"GL_VERSION_2_0", GL_VERSION_2_0_functions },
};
-
-extern struct tnl_pipeline_stage _r300_render_stage;
-extern const struct tnl_pipeline_stage _r300_tcl_stage;
-
static const struct tnl_pipeline_stage *r300_pipeline[] = {
-
- /* Try and go straight to t&l
- */
- &_r300_tcl_stage,
-
/* Catch any t&l fallbacks
*/
&_tnl_vertex_transform_stage,
@@ -165,147 +166,188 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = {
&_tnl_fog_coordinate_stage,
&_tnl_texgen_stage,
&_tnl_texture_transform_stage,
+ &_tnl_point_attenuation_stage,
&_tnl_vertex_program_stage,
-
- /* Try again to go to tcl?
- * - no good for asymmetric-twoside (do with multipass)
- * - no good for asymmetric-unfilled (do with multipass)
- * - good for material
- * - good for texgen
- * - need to manipulate a bit of state
- *
- * - worth it/not worth it?
- */
-
- /* Else do them here.
- */
- &_r300_render_stage,
- &_tnl_render_stage, /* FALLBACK */
+ &_tnl_render_stage,
0,
};
-/* Create the device specific rendering context.
- */
-GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- __DRIcontextPrivate * driContextPriv,
- void *sharedContextPrivate)
+static void r300_get_lock(radeonContextPtr rmesa)
{
- __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
- radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
- struct dd_function_table functions;
- r300ContextPtr r300;
- GLcontext *ctx;
- int tcl_mode, i;
-
- assert(glVisual);
- assert(driContextPriv);
- assert(screen);
+ drm_radeon_sarea_t *sarea = rmesa->sarea;
- /* Allocate the R300 context */
- r300 = (r300ContextPtr) CALLOC(sizeof(*r300));
- if (!r300)
- return GL_FALSE;
+ if (sarea->ctx_owner != rmesa->dri.hwContext) {
+ sarea->ctx_owner = rmesa->dri.hwContext;
+ if (!rmesa->radeonScreen->kernel_mm)
+ radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
+ }
+}
- if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
- hw_tcl_on = future_hw_tcl_on = 0;
+static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
+{
+ /* please flush pipe do all pending work */
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_SC_SCREENDOOR, 1));
+ radeon_cs_write_dword(cs, 0x0);
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_SC_SCREENDOOR, 1));
+ radeon_cs_write_dword(cs, 0x00FFFFFF);
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_SC_HYPERZ, 1));
+ radeon_cs_write_dword(cs, 0x0);
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_US_CONFIG, 1));
+ radeon_cs_write_dword(cs, 0x0);
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_ZB_CNTL, 1));
+ radeon_cs_write_dword(cs, 0x0);
+ radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D));
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_RB3D_DSTCACHE_CTLSTAT, 1));
+ radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_ZB_ZCACHE_CTLSTAT, 1));
+ radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
+ radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen,
+ R300_WAIT_3D | R300_WAIT_3D_CLEAN));
+}
- /* Parse configuration files.
- * Do this here so that initialMaxAnisotropy is set before we create
- * the default textures.
- */
- driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache,
- screen->driScreen->myNum, "r300");
- r300->initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache,
- "def_max_anisotropy");
+static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon)
+{
+ BATCH_LOCALS(radeon);
- /* Init default driver functions then plug in our R300-specific functions
- * (the texture functions are especially important)
- */
- _mesa_init_driver_functions(&functions);
- r300InitIoctlFuncs(&functions);
- r300InitStateFuncs(&functions);
- r300InitTextureFuncs(&functions);
- r300InitShaderFuncs(&functions);
+ cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH);
+ END_BATCH();
+ end_3d(radeon);
+}
-#ifdef USER_BUFFERS
- r300_mem_init(r300);
-#endif
+static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ if (mode)
+ r300->radeon.Fallback |= bit;
+ else
+ r300->radeon.Fallback &= ~bit;
+}
- if (!radeonInitContext(&r300->radeon, &functions,
- glVisual, driContextPriv,
- sharedContextPrivate)) {
- FREE(r300);
- return GL_FALSE;
+static void r300_emit_query_finish(radeonContextPtr radeon)
+{
+ r300ContextPtr r300 = (r300ContextPtr)radeon;
+ struct radeon_query_object *query = radeon->query.current;
+ BATCH_LOCALS(radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 * 2 *r300->radeon.radeonScreen->num_gb_pipes + 2);
+ switch (r300->radeon.radeonScreen->num_gb_pipes) {
+ case 4:
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset+3*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ case 3:
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_2);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset+2*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ case 2:
+ if (r300->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV380) {
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3);
+ } else {
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_1);
+ }
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset+1*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ case 1:
+ default:
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_0);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ break;
}
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);
+ END_BATCH();
+ query->curr_offset += r300->radeon.radeonScreen->num_gb_pipes * sizeof(uint32_t);
+ assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
- /* Init r300 context data */
- r300->dma.buf0_address =
- r300->radeon.radeonScreen->buffers->list[0].address;
-
- (void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps));
- make_empty_list(&r300->swapped);
-
- r300->nr_heaps = 1 /* screen->numTexHeaps */ ;
- assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS);
- for (i = 0; i < r300->nr_heaps; i++) {
- /* *INDENT-OFF* */
- r300->texture_heaps[i] = driCreateTextureHeap(i, r300,
- screen->
- texSize[i], 12,
- RADEON_NR_TEX_REGIONS,
- (drmTextureRegionPtr)
- r300->radeon.sarea->
- tex_list[i],
- &r300->radeon.sarea->
- tex_age[i],
- &r300->swapped,
- sizeof
- (r300TexObj),
- (destroy_texture_object_t
- *)
- r300DestroyTexObj);
- /* *INDENT-ON* */
- }
- r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache,
- "texture_depth");
- if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
- r300->texture_depth = (screen->cpp == 4) ?
- DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
-
- /* Set the maximum texture size small enough that we can guarentee that
- * all texture units can bind a maximal texture and have them both in
- * texturable memory at once.
- */
+static void rv530_emit_query_finish_single_z(radeonContextPtr radeon)
+{
+ BATCH_LOCALS(radeon);
+ struct radeon_query_object *query = radeon->query.current;
+
+ BEGIN_BATCH_NO_AUTOSTATE(8);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ END_BATCH();
+
+ query->curr_offset += sizeof(uint32_t);
+ assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
- ctx = r300->radeon.glCtx;
+static void rv530_emit_query_finish_double_z(radeonContextPtr radeon)
+{
+ BATCH_LOCALS(radeon);
+ struct radeon_query_object *query = radeon->query.current;
+
+ BEGIN_BATCH_NO_AUTOSTATE(14);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ END_BATCH();
+
+ query->curr_offset += 2 * sizeof(uint32_t);
+ assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
+
+static void r300_init_vtbl(radeonContextPtr radeon)
+{
+ radeon->vtbl.get_lock = r300_get_lock;
+ radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset;
+ radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header;
+ radeon->vtbl.swtcl_flush = r300_swtcl_flush;
+ radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms;
+ radeon->vtbl.fallback = r300_fallback;
+ if (radeon->radeonScreen->chip_family == CHIP_FAMILY_RV530) {
+ if (radeon->radeonScreen->num_z_pipes == 2)
+ radeon->vtbl.emit_query_finish = rv530_emit_query_finish_double_z;
+ else
+ radeon->vtbl.emit_query_finish = rv530_emit_query_finish_single_z;
+ } else
+ radeon->vtbl.emit_query_finish = r300_emit_query_finish;
+}
+
+static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
ctx->Const.MaxTextureImageUnits =
driQueryOptioni(&r300->radeon.optionCache, "texture_image_units");
ctx->Const.MaxTextureCoordUnits =
driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units");
- ctx->Const.MaxTextureUnits =
- MIN2(ctx->Const.MaxTextureImageUnits,
+ ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureImageUnits,
ctx->Const.MaxTextureCoordUnits);
+
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
ctx->Const.MaxTextureLodBias = 16.0;
- if (screen->chip_family >= CHIP_FAMILY_RV515)
- ctx->Const.MaxTextureLevels = 13;
- else
- ctx->Const.MaxTextureLevels = 12;
-
- driCalculateMaxTextureLevels( r300->texture_heaps,
- r300->nr_heaps,
- & ctx->Const,
- 4,
- ctx->Const.MaxTextureLevels - 1,
- MIN2(ctx->Const.MaxTextureLevels,
- MAX_3D_TEXTURE_LEVELS) - 1,
- ctx->Const.MaxTextureLevels - 1,
- ctx->Const.MaxTextureLevels - 1,
- ctx->Const.MaxTextureLevels - 1,
- GL_FALSE,
- 2 );
+ if (screen->chip_family >= CHIP_FAMILY_RV515) {
+ ctx->Const.MaxTextureLevels = 13;
+ ctx->Const.MaxCubeTextureLevels = 13;
+ ctx->Const.MaxTextureRectSize = 4096;
+ }
+ else {
+ ctx->Const.MaxTextureLevels = 12;
+ ctx->Const.MaxCubeTextureLevels = 12;
+ ctx->Const.MaxTextureRectSize = 2048;
+ }
ctx->Const.MinPointSize = 1.0;
ctx->Const.MinPointSizeAA = 1.0;
@@ -317,263 +359,173 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
-#ifdef USER_BUFFERS
- /* Needs further modifications */
-#if 0
- ctx->Const.MaxArrayLockSize =
- ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
-#endif
-#endif
-
ctx->Const.MaxDrawBuffers = 1;
- _mesa_set_mvp_with_dp4( ctx, GL_TRUE );
-
- /* Initialize the software rasterizer and helper modules.
- */
- _swrast_CreateContext(ctx);
- _vbo_CreateContext(ctx);
- _tnl_CreateContext(ctx);
- _swsetup_CreateContext(ctx);
- _swsetup_Wakeup(ctx);
- _ae_create_context(ctx);
-
- /* Install the customized pipeline:
- */
- _tnl_destroy_pipeline(ctx);
- _tnl_install_pipeline(ctx, r300_pipeline);
-
- /* Try and keep materials and vertices separate:
- */
-/* _tnl_isolate_materials(ctx, GL_TRUE); */
-
- /* Configure swrast and TNL to match hardware characteristics:
- */
- _swrast_allow_pixel_fog(ctx, GL_FALSE);
- _swrast_allow_vertex_fog(ctx, GL_TRUE);
- _tnl_allow_pixel_fog(ctx, GL_FALSE);
- _tnl_allow_vertex_fog(ctx, GL_TRUE);
-
/* currently bogus data */
- if (screen->chip_flags & RADEON_CHIPSET_TCL) {
- ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
- ctx->Const.VertexProgram.MaxNativeInstructions =
- VSF_MAX_FRAGMENT_LENGTH / 4;
+ if (r300->options.hw_tcl_enabled) {
+ ctx->Const.VertexProgram.MaxNativeInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
+ ctx->Const.VertexProgram.MaxNativeAluInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */
- ctx->Const.VertexProgram.MaxTemps = 32;
- ctx->Const.VertexProgram.MaxNativeTemps =
- /*VSF_MAX_FRAGMENT_TEMPS */ 32;
+ ctx->Const.VertexProgram.MaxNativeTemps = 32;
ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */
ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
}
- ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS;
- ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */
- ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS;
- ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST;
- ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST;
- ctx->Const.FragmentProgram.MaxNativeInstructions =
- PFS_MAX_ALU_INST + PFS_MAX_TEX_INST;
- ctx->Const.FragmentProgram.MaxNativeTexIndirections =
- PFS_MAX_TEX_INDIRECT;
- ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */
- ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
- ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+ if (screen->chip_family >= CHIP_FAMILY_RV515) {
+ ctx->Const.FragmentProgram.MaxNativeTemps = R500_PFS_NUM_TEMP_REGS;
+ ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */
+ ctx->Const.FragmentProgram.MaxNativeParameters = R500_PFS_NUM_CONST_REGS;
+ ctx->Const.FragmentProgram.MaxNativeAluInstructions = R500_PFS_MAX_INST;
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = R500_PFS_MAX_INST;
+ ctx->Const.FragmentProgram.MaxNativeInstructions = R500_PFS_MAX_INST;
+ ctx->Const.FragmentProgram.MaxNativeTexIndirections = R500_PFS_MAX_INST;
+ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
+ } else {
+ ctx->Const.FragmentProgram.MaxNativeTemps = R300_PFS_NUM_TEMP_REGS;
+ ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */
+ ctx->Const.FragmentProgram.MaxNativeParameters = R300_PFS_NUM_CONST_REGS;
+ ctx->Const.FragmentProgram.MaxNativeAluInstructions = R300_PFS_MAX_ALU_INST;
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = R300_PFS_MAX_TEX_INST;
+ ctx->Const.FragmentProgram.MaxNativeInstructions = R300_PFS_MAX_ALU_INST + R300_PFS_MAX_TEX_INST;
+ ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT;
+ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
+ }
- driInitExtensions(ctx, card_extensions, GL_TRUE);
+}
- if (driQueryOptionb
- (&r300->radeon.optionCache, "disable_stencil_two_side"))
- _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
+static void r300ParseOptions(r300ContextPtr r300, radeonScreenPtr screen)
+{
+ struct r300_options options = { 0 };
- if (r300->radeon.glCtx->Mesa_DXTn
- && !driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc")) {
- _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
- _mesa_enable_extension(ctx, "GL_S3_s3tc");
- } else
- if (driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable"))
- {
- _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
- }
+ driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache,
+ screen->driScreen->myNum, "r300");
- r300->disable_lowimpact_fallback =
- driQueryOptionb(&r300->radeon.optionCache,
- "disable_lowimpact_fallback");
+ r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, "def_max_anisotropy");
- radeonInitSpanFuncs(ctx);
- r300InitCmdBuf(r300);
- r300InitState(r300);
- if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
- r300InitSwtcl(ctx);
+ options.stencil_two_side_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_stencil_two_side");
+ options.s3tc_force_enabled = driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable");
+ options.s3tc_force_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc");
- TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+ if (!(screen->chip_flags & RADEON_CHIPSET_TCL) || driQueryOptioni(&r300->radeon.optionCache, "tcl_mode") == DRI_CONF_TCL_SW)
+ options.hw_tcl_enabled = 0;
+ else
+ options.hw_tcl_enabled = 1;
- tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode");
- if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) {
- fprintf(stderr, "disabling 3D acceleration\n");
-#if R200_MERGED
- FALLBACK(&r300->radeon, RADEON_FALLBACK_DISABLE, 1);
-#endif
- }
- if (tcl_mode == DRI_CONF_TCL_SW ||
- !(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
- if (r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
- r300->radeon.radeonScreen->chip_flags &=
- ~RADEON_CHIPSET_TCL;
- fprintf(stderr, "Disabling HW TCL support\n");
- }
- TCL_FALLBACK(r300->radeon.glCtx,
- RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
- }
+ options.conformance_mode = !driQueryOptionb(&r300->radeon.optionCache, "disable_lowimpact_fallback");
- return GL_TRUE;
+ r300->options = options;
}
-static void r300FreeGartAllocations(r300ContextPtr r300)
+static void r300InitGLExtensions(GLcontext *ctx)
{
- int i, ret, tries = 0, done_age, in_use = 0;
- drm_radeon_mem_free_t memfree;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
- memfree.region = RADEON_MEM_REGION_GART;
+ driInitExtensions(ctx, card_extensions, GL_TRUE);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ driInitExtensions(ctx, mm_extensions, GL_FALSE);
-#ifdef USER_BUFFERS
- for (i = r300->rmm->u_last; i > 0; i--) {
- if (r300->rmm->u_list[i].ptr == NULL) {
- continue;
- }
+ if (r300->options.stencil_two_side_disabled)
+ _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
- /* check whether this buffer is still in use */
- if (r300->rmm->u_list[i].pending) {
- in_use++;
- }
+ if (r300->options.s3tc_force_enabled) {
+ _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+ _mesa_enable_extension(ctx, "GL_S3_s3tc");
+ } else if (r300->options.s3tc_force_disabled) {
+ _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc");
}
- /* Cannot flush/lock if no context exists. */
- if (in_use)
- r300FlushCmdBuf(r300, __FUNCTION__);
- done_age = radeonGetAge((radeonContextPtr) r300);
+ if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) {
+ _mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
+ }
+}
- for (i = r300->rmm->u_last; i > 0; i--) {
- if (r300->rmm->u_list[i].ptr == NULL) {
- continue;
- }
+/* Create the device specific rendering context.
+ */
+GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ __DRIcontextPrivate * driContextPriv,
+ void *sharedContextPrivate)
+{
+ __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+ radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+ struct dd_function_table functions;
+ r300ContextPtr r300;
+ GLcontext *ctx;
- /* check whether this buffer is still in use */
- if (!r300->rmm->u_list[i].pending) {
- continue;
- }
+ assert(glVisual);
+ assert(driContextPriv);
+ assert(screen);
- assert(r300->rmm->u_list[i].h_pending == 0);
+ r300 = (r300ContextPtr) CALLOC(sizeof(*r300));
+ if (!r300)
+ return GL_FALSE;
- tries = 0;
- while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
- usleep(10);
- done_age = radeonGetAge((radeonContextPtr) r300);
- }
- if (tries >= 1000) {
- WARN_ONCE("Failed to idle region!");
- }
+ r300ParseOptions(r300, screen);
- memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
- (char *)r300->radeon.radeonScreen->gartTextures.map;
+ r300->radeon.radeonScreen = screen;
+ r300_init_vtbl(&r300->radeon);
- ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
- DRM_RADEON_FREE, &memfree,
- sizeof(memfree));
- if (ret) {
- fprintf(stderr, "Failed to free at %p\nret = %s\n",
- r300->rmm->u_list[i].ptr, strerror(-ret));
- } else {
- if (i == r300->rmm->u_last)
- r300->rmm->u_last--;
+ _mesa_init_driver_functions(&functions);
+ r300InitIoctlFuncs(&functions);
+ r300InitStateFuncs(&functions);
+ r300InitTextureFuncs(&functions);
+ r300InitShaderFuncs(&functions);
+ radeonInitQueryObjFunctions(&functions);
+ radeonInitBufferObjectFuncs(&functions);
- r300->rmm->u_list[i].pending = 0;
- r300->rmm->u_list[i].ptr = NULL;
- }
+ if (!radeonInitContext(&r300->radeon, &functions,
+ glVisual, driContextPriv,
+ sharedContextPrivate)) {
+ FREE(r300);
+ return GL_FALSE;
}
- r300->rmm->u_head = i;
-#endif /* USER_BUFFERS */
-}
-/* Destroy the device specific context.
- */
-void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
-{
- GET_CURRENT_CONTEXT(ctx);
- r300ContextPtr r300 = (r300ContextPtr) driContextPriv->driverPrivate;
- radeonContextPtr radeon = (radeonContextPtr) r300;
- radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
- int i;
-
- if (RADEON_DEBUG & DEBUG_DRI) {
- fprintf(stderr, "Destroying context !\n");
- }
+ ctx = r300->radeon.glCtx;
- /* check if we're deleting the currently bound context */
- if (&r300->radeon == current) {
- radeonFlush(r300->radeon.glCtx);
- _mesa_make_current(NULL, NULL, NULL);
- }
+ r300->fallback = 0;
+ if (r300->options.hw_tcl_enabled)
+ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
- /* Free r300 context resources */
- assert(r300); /* should never be null */
-
- if (r300) {
- GLboolean release_texture_heaps;
-
- release_texture_heaps =
- (r300->radeon.glCtx->Shared->RefCount == 1);
- _swsetup_DestroyContext(r300->radeon.glCtx);
- _tnl_DestroyContext(r300->radeon.glCtx);
- _vbo_DestroyContext(r300->radeon.glCtx);
- _swrast_DestroyContext(r300->radeon.glCtx);
-
- if (r300->dma.current.buf) {
- r300ReleaseDmaRegion(r300, &r300->dma.current,
- __FUNCTION__);
-#ifndef USER_BUFFERS
- r300FlushCmdBuf(r300, __FUNCTION__);
-#endif
- }
- r300FreeGartAllocations(r300);
- r300DestroyCmdBuf(r300);
+ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
- if (radeon->state.scissor.pClipRects) {
- FREE(radeon->state.scissor.pClipRects);
- radeon->state.scissor.pClipRects = NULL;
- }
+ r300InitConstValues(ctx, screen);
- if (release_texture_heaps) {
- /* This share group is about to go away, free our private
- * texture object data.
- */
- int i;
+ _mesa_set_mvp_with_dp4( ctx, GL_TRUE );
- for (i = 0; i < r300->nr_heaps; i++) {
- driDestroyTextureHeap(r300->texture_heaps[i]);
- r300->texture_heaps[i] = NULL;
- }
+ /* Initialize the software rasterizer and helper modules.
+ */
+ _swrast_CreateContext(ctx);
+ _vbo_CreateContext(ctx);
+ _tnl_CreateContext(ctx);
+ _swsetup_CreateContext(ctx);
+ _swsetup_Wakeup(ctx);
- assert(is_empty_list(&r300->swapped));
- }
+ /* Install the customized pipeline:
+ */
+ _tnl_destroy_pipeline(ctx);
+ _tnl_install_pipeline(ctx, r300_pipeline);
+ TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
- /* Drop texture object references from current hardware state */
- for (i = 0; i < 8; i++) {
- _mesa_reference_texobj(&r300->state.texture.unit[i].texobj, NULL);
- }
+ /* Configure swrast and TNL to match hardware characteristics:
+ */
+ _swrast_allow_pixel_fog(ctx, GL_FALSE);
+ _swrast_allow_vertex_fog(ctx, GL_TRUE);
+ _tnl_allow_pixel_fog(ctx, GL_FALSE);
+ _tnl_allow_vertex_fog(ctx, GL_TRUE);
- radeonCleanupContext(&r300->radeon);
+ if (r300->options.hw_tcl_enabled) {
+ r300InitDraw(ctx);
+ } else {
+ r300InitSwtcl(ctx);
+ }
-#ifdef USER_BUFFERS
- /* the memory manager might be accessed when Mesa frees the shared
- * state, so don't destroy it earlier
- */
- r300_mem_destroy(r300);
-#endif
+ radeon_fbo_init(&r300->radeon);
+ radeonInitSpanFuncs( ctx );
+ r300InitCmdBuf(r300);
+ r300InitState(r300);
+ r300InitShaderFunctions(r300);
- /* free the option cache */
- driDestroyOptionCache(&r300->radeon.optionCache);
+ r300InitGLExtensions(ctx);
- FREE(r300);
- }
+ return GL_TRUE;
}
+
diff --git a/r300/r300_context.h b/r300/r300_context.h
index 96a3205..1dadcc0 100644
--- a/r300/r300_context.h
+++ b/r300/r300_context.h
@@ -37,216 +37,29 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#ifndef __R300_CONTEXT_H__
#define __R300_CONTEXT_H__
-#include "tnl/t_vertex.h"
#include "drm.h"
#include "radeon_drm.h"
#include "dri_util.h"
-#include "texmem.h"
+#include "radeon_common.h"
-#include "main/macros.h"
#include "main/mtypes.h"
-#include "main/colormac.h"
-
-#define USER_BUFFERS
+#include "shader/prog_instruction.h"
+#include "compiler/radeon_code.h"
struct r300_context;
typedef struct r300_context r300ContextRec;
typedef struct r300_context *r300ContextPtr;
-#include "radeon_lock.h"
-#include "main/mm.h"
-
-/* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
- I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
- with other compilers ... GLUE!
-*/
-#define WARN_ONCE(a, ...) { \
- static int warn##__LINE__=1; \
- if(warn##__LINE__){ \
- fprintf(stderr, "*********************************WARN_ONCE*********************************\n"); \
- fprintf(stderr, "File %s function %s line %d\n", \
- __FILE__, __FUNCTION__, __LINE__); \
- fprintf(stderr, a, ## __VA_ARGS__);\
- fprintf(stderr, "***************************************************************************\n"); \
- warn##__LINE__=0;\
- } \
- }
#include "r300_vertprog.h"
-#include "r500_fragprog.h"
-
-/**
- * This function takes a float and packs it into a uint32_t
- */
-static INLINE uint32_t r300PackFloat32(float fl)
-{
- union {
- float fl;
- uint32_t u;
- } u;
-
- u.fl = fl;
- return u.u;
-}
-
-/* This is probably wrong for some values, I need to test this
- * some more. Range checking would be a good idea also..
- *
- * But it works for most things. I'll fix it later if someone
- * else with a better clue doesn't
- */
-static INLINE uint32_t r300PackFloat24(float f)
-{
- float mantissa;
- int exponent;
- uint32_t float24 = 0;
-
- if (f == 0.0)
- return 0;
-
- mantissa = frexpf(f, &exponent);
-
- /* Handle -ve */
- if (mantissa < 0) {
- float24 |= (1 << 23);
- mantissa = mantissa * -1.0;
- }
- /* Handle exponent, bias of 63 */
- exponent += 62;
- float24 |= (exponent << 16);
- /* Kill 7 LSB of mantissa */
- float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF) >> 7;
-
- return float24;
-}
-
-/************ DMA BUFFERS **************/
-
-/* Need refcounting on dma buffers:
- */
-struct r300_dma_buffer {
- int refcount; /**< the number of retained regions in buf */
- drmBufPtr buf;
- int id;
-};
-#undef GET_START
-#ifdef USER_BUFFERS
-#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start))
-#else
-#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset + \
- (rvb)->address - rmesa->dma.buf0_address + \
- (rvb)->start)
-#endif
-/* A retained region, eg vertices for indexed vertices.
- */
-struct r300_dma_region {
- struct r300_dma_buffer *buf;
- char *address; /* == buf->address */
- int start, end, ptr; /* offsets from start of buf */
-
- int aos_offset; /* address in GART memory */
- int aos_stride; /* distance between elements, in dwords */
- int aos_size; /* number of components (1-4) */
-};
-
-struct r300_dma {
- /* Active dma region. Allocations for vertices and retained
- * regions come from here. Also used for emitting random vertices,
- * these may be flushed by calling flush_current();
- */
- struct r300_dma_region current;
- void (*flush) (r300ContextPtr);
-
- char *buf0_address; /* start of buf[0], for index calcs */
-
- /* Number of "in-flight" DMA buffers, i.e. the number of buffers
- * for which a DISCARD command is currently queued in the command buffer.
- */
- GLuint nr_released_bufs;
-};
-
- /* Texture related */
-
-typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr;
-
-/* Maximum number of mipmap levels supported by any supported GPU
- */
-#define R300_MAX_TEXTURE_LEVELS 13
-
-/* Texture object in locally shared texture space.
- */
-struct r300_tex_obj {
- driTextureObject base;
-
- GLuint bufAddr; /* Offset to start of locally
- shared texture block */
-
- drm_radeon_tex_image_t image[6][R300_MAX_TEXTURE_LEVELS];
- /* Six, for the cube faces */
-
- GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
-
- GLuint pitch; /* this isn't sent to hardware just used in calculations */
- /* hardware register values */
- /* Note that R200 has 8 registers per texture and R300 only 7 */
- GLuint filter;
- GLuint filter_1;
- GLuint pitch_reg;
- GLuint size; /* npot only */
- GLuint format;
- GLuint offset; /* Image location in the card's address space.
- All cube faces follow. */
- GLuint unknown4;
- GLuint unknown5;
- /* end hardware registers */
-
- /* registers computed by r200 code - keep them here to
- compare against what is actually written.
-
- to be removed later.. */
- GLuint pp_border_color;
- GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */
- GLuint format_x;
-
- GLboolean border_fallback;
-
- GLuint tile_bits; /* hw texture tile bits used on this texture */
-};
-
-struct r300_texture_env_state {
- struct gl_texture_object *texobj;
- GLenum format;
- GLenum envMode;
-};
/* The blit width for texture uploads
*/
#define R300_BLIT_WIDTH_BYTES 1024
#define R300_MAX_TEXTURE_UNITS 8
-struct r300_texture_state {
- struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS];
- int tc_count; /* number of incoming texture coordinates from VAP */
-};
-/**
- * A block of hardware state.
- *
- * When check returns non-zero, the returned number of dwords must be
- * copied verbatim into the command buffer in order to update a state atom
- * when it is dirty.
- */
-struct r300_state_atom {
- struct r300_state_atom *next, *prev;
- const char *name; /* for debug */
- int cmd_size; /* maximum size in dwords */
- GLuint idx; /* index in an array (e.g. textures) */
- uint32_t *cmd;
- GLboolean dirty;
-
- int (*check) (r300ContextPtr, struct r300_state_atom * atom);
-};
#define R300_VPT_CMD_0 0
#define R300_VPT_XSCALE 1
@@ -288,9 +101,11 @@ struct r300_state_atom {
#define R300_GB_MISC_MSPOS_0 1
#define R300_GB_MISC_MSPOS_1 2
#define R300_GB_MISC_TILE_CONFIG 3
-#define R300_GB_MISC_SELECT 4
-#define R300_GB_MISC_AA_CONFIG 5
-#define R300_GB_MISC_CMDSIZE 6
+#define R300_GB_MISC_CMDSIZE 4
+#define R300_GB_MISC2_CMD_0 0
+#define R300_GB_MISC2_SELECT 1
+#define R300_GB_MISC2_AA_CONFIG 2
+#define R300_GB_MISC2_CMDSIZE 3
#define R300_TXE_CMD_0 0
#define R300_TXE_ENABLE 1
@@ -459,423 +274,180 @@ struct r300_state_atom {
#define R300_TEX_CMDSIZE (MAX_TEXTURE_UNITS+1)
*/
+#define R300_QUERYOBJ_CMD_0 0
+#define R300_QUERYOBJ_DATA_0 1
+#define R300_QUERYOBJ_CMD_1 2
+#define R300_QUERYOBJ_DATA_1 3
+#define R300_QUERYOBJ_CMDSIZE 4
+
/**
* Cache for hardware register state.
*/
struct r300_hw_state {
- struct r300_state_atom atomlist;
-
- GLboolean is_dirty;
- GLboolean all_dirty;
- int max_state_size; /* in dwords */
-
- struct r300_state_atom vpt; /* viewport (1D98) */
- struct r300_state_atom vap_cntl;
- struct r300_state_atom vap_index_offset; /* 0x208c r5xx only */
- struct r300_state_atom vof; /* VAP output format register 0x2090 */
- struct r300_state_atom vte; /* (20B0) */
- struct r300_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */
- struct r300_state_atom vap_cntl_status;
- struct r300_state_atom vir[2]; /* vap input route (2150/21E0) */
- struct r300_state_atom vic; /* vap input control (2180) */
- struct r300_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
- struct r300_state_atom vap_clip_cntl;
- struct r300_state_atom vap_clip;
- struct r300_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */
- struct r300_state_atom pvs; /* pvs_cntl (22D0) */
- struct r300_state_atom gb_enable; /* (4008) */
- struct r300_state_atom gb_misc; /* Multisampling position shifts ? (4010) */
- struct r300_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
- struct r300_state_atom ga_triangle_stipple; /* (4214) */
- struct r300_state_atom ps; /* pointsize (421C) */
- struct r300_state_atom ga_point_minmax; /* (4230) */
- struct r300_state_atom lcntl; /* line control */
- struct r300_state_atom ga_line_stipple; /* (4260) */
- struct r300_state_atom shade;
- struct r300_state_atom polygon_mode;
- struct r300_state_atom fogp; /* fog parameters (4294) */
- struct r300_state_atom ga_soft_reset; /* (429C) */
- struct r300_state_atom zbias_cntl;
- struct r300_state_atom zbs; /* zbias (42A4) */
- struct r300_state_atom occlusion_cntl;
- struct r300_state_atom cul; /* cull cntl (42B8) */
- struct r300_state_atom su_depth_scale; /* (42C0) */
- struct r300_state_atom rc; /* rs control (4300) */
- struct r300_state_atom ri; /* rs interpolators (4310) */
- struct r300_state_atom rr; /* rs route (4330) */
- struct r300_state_atom sc_hyperz; /* (43A4) */
- struct r300_state_atom sc_screendoor; /* (43E8) */
- struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */
- struct r300_state_atom fpt; /* texi - (4620) */
- struct r300_state_atom us_out_fmt; /* (46A4) */
- struct r300_state_atom r500fp; /* r500 fp instructions */
- struct r300_state_atom r500fp_const; /* r500 fp constants */
- struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */
- struct r300_state_atom fogs; /* fog state (4BC0) */
- struct r300_state_atom fogc; /* fog color (4BC8) */
- struct r300_state_atom at; /* alpha test (4BD4) */
- struct r300_state_atom fg_depth_src; /* (4BD8) */
- struct r300_state_atom fpp; /* 0x4C00 and following */
- struct r300_state_atom rb3d_cctl; /* (4E00) */
- struct r300_state_atom bld; /* blending (4E04) */
- struct r300_state_atom cmk; /* colormask (4E0C) */
- struct r300_state_atom blend_color; /* constant blend color */
- struct r300_state_atom rop; /* ropcntl */
- struct r300_state_atom cb; /* colorbuffer (4E28) */
- struct r300_state_atom rb3d_dither_ctl; /* (4E50) */
- struct r300_state_atom rb3d_aaresolve_ctl; /* (4E88) */
- struct r300_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */
- struct r300_state_atom zs; /* zstencil control (4F00) */
- struct r300_state_atom zstencil_format;
- struct r300_state_atom zb; /* z buffer (4F20) */
- struct r300_state_atom zb_depthclearvalue; /* (4F28) */
- struct r300_state_atom unk4F30; /* (4F30) */
- struct r300_state_atom zb_hiz_offset; /* (4F44) */
- struct r300_state_atom zb_hiz_pitch; /* (4F54) */
-
- struct r300_state_atom vpi; /* vp instructions */
- struct r300_state_atom vpp; /* vp parameters */
- struct r300_state_atom vps; /* vertex point size (?) */
- struct r300_state_atom vpucp[6]; /* vp user clip plane - 6 */
+ struct radeon_state_atom vpt; /* viewport (1D98) */
+ struct radeon_state_atom vap_cntl;
+ struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */
+ struct radeon_state_atom vof; /* VAP output format register 0x2090 */
+ struct radeon_state_atom vte; /* (20B0) */
+ struct radeon_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */
+ struct radeon_state_atom vap_cntl_status;
+ struct radeon_state_atom vir[2]; /* vap input route (2150/21E0) */
+ struct radeon_state_atom vic; /* vap input control (2180) */
+ struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
+ struct radeon_state_atom vap_clip_cntl;
+ struct radeon_state_atom vap_clip;
+ struct radeon_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */
+ struct radeon_state_atom pvs; /* pvs_cntl (22D0) */
+ struct radeon_state_atom gb_enable; /* (4008) */
+ struct radeon_state_atom gb_misc; /* Multisampling position shifts ? (4010) */
+ struct radeon_state_atom gb_misc2; /* Multisampling position shifts ? (4010) */
+ struct radeon_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
+ struct radeon_state_atom ga_triangle_stipple; /* (4214) */
+ struct radeon_state_atom ps; /* pointsize (421C) */
+ struct radeon_state_atom ga_point_minmax; /* (4230) */
+ struct radeon_state_atom lcntl; /* line control */
+ struct radeon_state_atom ga_line_stipple; /* (4260) */
+ struct radeon_state_atom shade;
+ struct radeon_state_atom shade2;
+ struct radeon_state_atom polygon_mode;
+ struct radeon_state_atom fogp; /* fog parameters (4294) */
+ struct radeon_state_atom ga_soft_reset; /* (429C) */
+ struct radeon_state_atom zbias_cntl;
+ struct radeon_state_atom zbs; /* zbias (42A4) */
+ struct radeon_state_atom occlusion_cntl;
+ struct radeon_state_atom cul; /* cull cntl (42B8) */
+ struct radeon_state_atom su_depth_scale; /* (42C0) */
+ struct radeon_state_atom rc; /* rs control (4300) */
+ struct radeon_state_atom ri; /* rs interpolators (4310) */
+ struct radeon_state_atom rr; /* rs route (4330) */
+ struct radeon_state_atom sc_hyperz; /* (43A4) */
+ struct radeon_state_atom sc_screendoor; /* (43E8) */
+ struct radeon_state_atom fp; /* fragment program cntl + nodes (4600) */
+ struct radeon_state_atom fpt; /* texi - (4620) */
+ struct radeon_state_atom us_out_fmt; /* (46A4) */
+ struct radeon_state_atom r500fp; /* r500 fp instructions */
+ struct radeon_state_atom r500fp_const; /* r500 fp constants */
+ struct radeon_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */
+ struct radeon_state_atom fogs; /* fog state (4BC0) */
+ struct radeon_state_atom fogc; /* fog color (4BC8) */
+ struct radeon_state_atom at; /* alpha test (4BD4) */
+ struct radeon_state_atom fg_depth_src; /* (4BD8) */
+ struct radeon_state_atom fpp; /* 0x4C00 and following */
+ struct radeon_state_atom rb3d_cctl; /* (4E00) */
+ struct radeon_state_atom bld; /* blending (4E04) */
+ struct radeon_state_atom cmk; /* colormask (4E0C) */
+ struct radeon_state_atom blend_color; /* constant blend color */
+ struct radeon_state_atom rop; /* ropcntl */
+ struct radeon_state_atom cb; /* colorbuffer (4E28) */
+ struct radeon_state_atom rb3d_dither_ctl; /* (4E50) */
+ struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */
+ struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */
+ struct radeon_state_atom zs; /* zstencil control (4F00) */
+ struct radeon_state_atom zstencil_format;
+ struct radeon_state_atom zb; /* z buffer (4F20) */
+ struct radeon_state_atom zb_depthclearvalue; /* (4F28) */
+ struct radeon_state_atom zb_zmask; /* (4F30) */
+ struct radeon_state_atom zb_hiz_offset; /* (4F44) */
+ struct radeon_state_atom zb_hiz_pitch; /* (4F54) */
+
+ struct radeon_state_atom vap_flush;
+ struct radeon_state_atom vpi; /* vp instructions */
+ struct radeon_state_atom vpp; /* vp parameters */
+ struct radeon_state_atom vps; /* vertex point size (?) */
+ struct radeon_state_atom vpucp[6]; /* vp user clip plane - 6 */
/* 8 texture units */
/* the state is grouped by function and not by
texture unit. This makes single unit updates
really awkward - we are much better off
updating the whole thing at once */
struct {
- struct r300_state_atom filter;
- struct r300_state_atom filter_1;
- struct r300_state_atom size;
- struct r300_state_atom format;
- struct r300_state_atom pitch;
- struct r300_state_atom offset;
- struct r300_state_atom chroma_key;
- struct r300_state_atom border_color;
+ struct radeon_state_atom filter;
+ struct radeon_state_atom filter_1;
+ struct radeon_state_atom size;
+ struct radeon_state_atom format;
+ struct radeon_state_atom pitch;
+ struct radeon_state_atom offset;
+ struct radeon_state_atom chroma_key;
+ struct radeon_state_atom border_color;
} tex;
- struct r300_state_atom txe; /* tex enable (4104) */
-};
-
-/**
- * This structure holds the command buffer while it is being constructed.
- *
- * The first batch of commands in the buffer is always the state that needs
- * to be re-emitted when the context is lost. This batch can be skipped
- * otherwise.
- */
-struct r300_cmdbuf {
- int size; /* DWORDs allocated for buffer */
- uint32_t *cmd_buf;
- int count_used; /* DWORDs filled so far */
- int count_reemit; /* size of re-emission batch */
+ struct radeon_state_atom txe; /* tex enable (4104) */
+ radeonTexObj *textures[R300_MAX_TEXTURE_UNITS];
};
/**
* State cache
*/
-struct r300_depthbuffer_state {
- GLfloat scale;
-};
-
-struct r300_stencilbuffer_state {
- GLboolean hw_stencil;
-};
-
/* Vertex shader state */
-/* Perhaps more if we store programs in vmem? */
-/* drm_r300_cmd_header_t->vpu->count is unsigned char */
-#define VSF_MAX_FRAGMENT_LENGTH (255*4)
-
-/* Can be tested with colormat currently. */
-#define VSF_MAX_FRAGMENT_TEMPS (14)
-
-#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
-#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
-
-struct r300_vertex_shader_fragment {
- int length;
- union {
- GLuint d[VSF_MAX_FRAGMENT_LENGTH];
- float f[VSF_MAX_FRAGMENT_LENGTH];
- GLuint i[VSF_MAX_FRAGMENT_LENGTH];
- } body;
-};
-
-struct r300_vertex_shader_state {
- struct r300_vertex_shader_fragment program;
-};
-
-extern int hw_tcl_on;
-
#define COLOR_IS_RGBA
#define TAG(x) r300##x
#include "tnl_dd/t_dd_vertex.h"
#undef TAG
-//#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current)
-#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp)
-
-/* Should but doesnt work */
-//#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp)
-
-/* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday.
- * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly.
- */
-
struct r300_vertex_program_key {
- GLuint InputsRead;
- GLuint OutputsWritten;
- GLuint OutputsAdded;
+ GLbitfield FpReads;
+ GLuint FogAttr;
+ GLuint WPosAttr;
};
struct r300_vertex_program {
+ struct gl_vertex_program *Base;
struct r300_vertex_program *next;
+
struct r300_vertex_program_key key;
- int translated;
-
- struct r300_vertex_shader_fragment program;
-
- int pos_end;
- int num_temporaries; /* Number of temp vars used by program */
- int wpos_idx;
- int inputs[VERT_ATTRIB_MAX];
- int outputs[VERT_RESULT_MAX];
- int native;
- int ref_count;
- int use_ref_count;
+ struct r300_vertex_program_code code;
+
+ GLboolean error;
};
struct r300_vertex_program_cont {
- struct gl_vertex_program mesa_program; /* Must be first */
- struct r300_vertex_shader_fragment params;
+ /* This is the unmodified vertex program mesa provided us with.
+ * We need to keep it unchanged because we may need to create another
+ * hw specific vertex program based on this.
+ */
+ struct gl_vertex_program mesa_program;
+ /* This is the list of hw specific vertex programs derived from mesa_program */
struct r300_vertex_program *progs;
};
-#define PFS_MAX_ALU_INST 64
-#define PFS_MAX_TEX_INST 64
-#define PFS_MAX_TEX_INDIRECT 4
-#define PFS_NUM_TEMP_REGS 32
-#define PFS_NUM_CONST_REGS 16
-
-struct r300_pfs_compile_state;
-
/**
- * Stores state that influences the compilation of a fragment program.
- */
-struct r300_fragment_program_external_state {
- struct {
- /**
- * If the sampler is used as a shadow sampler,
- * this field is:
- * 0 - GL_LUMINANCE
- * 1 - GL_INTENSITY
- * 2 - GL_ALPHA
- * depending on the depth texture mode.
- */
- GLuint depth_texture_mode : 2;
-
- /**
- * If the sampler is used as a shadow sampler,
- * this field is (texture_compare_func - GL_NEVER).
- * [e.g. if compare function is GL_LEQUAL, this field is 3]
- *
- * Otherwise, this field is 0.
- */
- GLuint texture_compare_func : 3;
- } unit[16];
-};
-
-
-struct r300_fragment_program_node {
- int tex_offset; /**< first tex instruction */
- int tex_end; /**< last tex instruction, relative to tex_offset */
- int alu_offset; /**< first ALU instruction */
- int alu_end; /**< last ALU instruction, relative to alu_offset */
- int flags;
-};
-
-/**
- * Stores an R300 fragment program in its compiled-to-hardware form.
- */
-struct r300_fragment_program_code {
- struct {
- int length; /**< total # of texture instructions used */
- GLuint inst[PFS_MAX_TEX_INST];
- } tex;
-
- struct {
- int length; /**< total # of ALU instructions used */
- struct {
- GLuint inst0;
- GLuint inst1;
- GLuint inst2;
- GLuint inst3;
- } inst[PFS_MAX_ALU_INST];
- } alu;
-
- struct r300_fragment_program_node node[4];
- int cur_node;
- int first_node_has_tex;
-
- /**
- * Remember which program register a given hardware constant
- * belongs to.
- */
- struct prog_src_register constant[PFS_NUM_CONST_REGS];
- int const_nr;
-
- int max_temp_idx;
-};
-
-/**
- * Store everything about a fragment program that is needed
- * to render with that program.
- */
+* Store everything about a fragment program that is needed
+* to render with that program.
+*/
struct r300_fragment_program {
- struct gl_fragment_program mesa_program;
-
- GLboolean translated;
GLboolean error;
-
+ struct r300_fragment_program *next;
struct r300_fragment_program_external_state state;
- struct r300_fragment_program_code code;
- GLboolean WritesDepth;
- GLuint optimization;
-};
-
-struct r500_pfs_compile_state;
+ struct rX00_fragment_program_code code;
+ GLbitfield InputsRead;
-struct r500_fragment_program_external_state {
- struct {
- /**
- * If the sampler is used as a shadow sampler,
- * this field is:
- * 0 - GL_LUMINANCE
- * 1 - GL_INTENSITY
- * 2 - GL_ALPHA
- * depending on the depth texture mode.
- */
- GLuint depth_texture_mode : 2;
-
- /**
- * If the sampler is used as a shadow sampler,
- * this field is (texture_compare_func - GL_NEVER).
- * [e.g. if compare function is GL_LEQUAL, this field is 3]
- *
- * Otherwise, this field is 0.
- */
- GLuint texture_compare_func : 3;
- } unit[16];
+ /* attribute that we are sending the WPOS in */
+ gl_frag_attrib wpos_attr;
+ /* attribute that we are sending the fog coordinate in */
+ gl_frag_attrib fog_attr;
};
-struct r500_fragment_program_code {
- struct {
- GLuint inst0;
- GLuint inst1;
- GLuint inst2;
- GLuint inst3;
- GLuint inst4;
- GLuint inst5;
- } inst[512];
-
- int inst_offset;
- int inst_end;
-
- /**
- * Remember which program register a given hardware constant
- * belongs to.
+struct r300_fragment_program_cont {
+ /* This is the unmodified fragment program mesa provided us with.
+ * We need to keep it unchanged because we may need to create another
+ * hw specific fragment program based on this.
*/
- struct prog_src_register constant[PFS_NUM_CONST_REGS];
- int const_nr;
-
- int max_temp_idx;
+ struct gl_fragment_program Base;
+ /* This is the list of hw specific fragment programs derived from Base */
+ struct r300_fragment_program *progs;
};
-struct r500_fragment_program {
- struct gl_fragment_program mesa_program;
-
- GLcontext *ctx;
- GLboolean translated;
- GLboolean error;
-
- struct r500_fragment_program_external_state state;
- struct r500_fragment_program_code code;
-
- GLboolean writes_depth;
-
- GLuint optimization;
-};
#define R300_MAX_AOS_ARRAYS 16
-#define REG_COORDS 0
-#define REG_COLOR0 1
-#define REG_TEX0 2
-
-struct r300_state {
- struct r300_depthbuffer_state depth;
- struct r300_texture_state texture;
- int sw_tcl_inputs[VERT_ATTRIB_MAX];
- struct r300_vertex_shader_state vertex_shader;
- struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
- int aos_count;
-
- GLuint *Elts;
- struct r300_dma_region elt_dma;
-
- struct r300_dma_region swtcl_dma;
- DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for.
- They are the same as tnl->render_inputs for fixed pipeline */
-
- struct r300_stencilbuffer_state stencil;
-
-};
-
-#define R300_FALLBACK_NONE 0
-#define R300_FALLBACK_TCL 1
-#define R300_FALLBACK_RAST 2
/* r300_swtcl.c
*/
struct r300_swtcl_info {
- GLuint RenderIndex;
-
- /**
- * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is
- * installed in the Mesa state vector.
- */
- GLuint vertex_size;
-
- /**
- * Attributes instructing the Mesa TCL pipeline where / how to put vertex
- * data in the hardware buffer.
- */
- struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
-
- /**
- * Number of elements of \c ::vertex_attrs that are actually used.
- */
- GLuint vertex_attr_count;
-
- /**
- * Cached pointer to the buffer where Mesa will store vertex data.
- */
- GLubyte *verts;
-
- /* Fallback rasterization functions
- */
- // r200_point_func draw_point;
- // r200_line_func draw_line;
- // r200_tri_func draw_tri;
-
- GLuint hw_primitive;
- GLenum render_primitive;
- GLuint numverts;
-
- /**
+ /*
* Offset of the 4UB color data within a hardware (swtcl) vertex.
*/
GLuint coloroffset;
@@ -884,13 +456,43 @@ struct r300_swtcl_info {
* Offset of the 3UB specular color data within a hardware (swtcl) vertex.
*/
GLuint specoffset;
+};
- /**
- * Should Mesa project vertex data or will the hardware do it?
- */
- GLboolean needproj;
+struct r300_vtable {
+ void (* SetupRSUnit)(GLcontext *ctx);
+ void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings);
+ void (* SetupPixelShader)(GLcontext *ctx);
+};
- struct r300_dma_region indexed_verts;
+struct r300_vertex_buffer {
+ struct vertex_attribute {
+ /* generic */
+ GLubyte element;
+ GLuint stride;
+ GLuint dwords;
+ GLubyte size; /* number of components */
+ GLboolean is_named_bo;
+ struct radeon_bo *bo;
+ GLint bo_offset;
+
+ /* hw specific */
+ uint32_t data_type:4;
+ uint32_t dst_loc:5;
+ uint32_t _signed:1;
+ uint32_t normalize:1;
+ uint32_t swizzle:12;
+ uint32_t write_mask:4;
+ } attribs[VERT_ATTRIB_MAX];
+
+ GLubyte num_attribs;
+};
+
+struct r300_index_buffer {
+ struct radeon_bo *bo;
+ int bo_offset;
+
+ GLboolean is_32bit;
+ GLuint count;
};
@@ -900,46 +502,33 @@ struct r300_swtcl_info {
struct r300_context {
struct radeon_context radeon; /* parent class, must be first */
+ struct r300_vtable vtbl;
+
struct r300_hw_state hw;
- struct r300_cmdbuf cmdbuf;
- struct r300_state state;
- struct gl_vertex_program *curr_vp;
+
struct r300_vertex_program *selected_vp;
+ struct r300_fragment_program *selected_fp;
/* Vertex buffers
*/
- struct r300_dma dma;
- GLboolean save_on_next_unlock;
- GLuint NewGLState;
-
- /* Texture object bookkeeping
- */
- unsigned nr_heaps;
- driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
- driTextureObject swapped;
- int texture_depth;
- float initialMaxAnisotropy;
-
- /* Clientdata textures;
- */
- GLuint prefer_gart_client_texturing;
-
-#ifdef USER_BUFFERS
- struct r300_memory_manager *rmm;
-#endif
-
GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
- GLboolean disable_lowimpact_fallback;
+ struct r300_options {
+ uint32_t conformance_mode:1;
+ uint32_t hw_tcl_enabled:1;
+ uint32_t s3tc_force_enabled:1;
+ uint32_t s3tc_force_disabled:1;
+ uint32_t stencil_two_side_disabled:1;
+ } options;
- DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */
struct r300_swtcl_info swtcl;
-};
+ struct r300_vertex_buffer vbuf;
+ struct r300_index_buffer ind_buf;
-struct r300_buffer_object {
- struct gl_buffer_object mesa_obj;
- int id;
+ uint32_t fallback;
+
+ DECLARE_RENDERINPUTS(render_inputs_bitset);
};
#define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx))
@@ -949,15 +538,13 @@ extern GLboolean r300CreateContext(const __GLcontextModes * glVisual,
__DRIcontextPrivate * driContextPriv,
void *sharedContextPrivate);
-extern void r300SelectVertexShader(r300ContextPtr r300);
extern void r300InitShaderFuncs(struct dd_function_table *functions);
-extern int r300VertexProgUpdateParams(GLcontext * ctx,
- struct r300_vertex_program_cont *vp,
- float *dst);
-
-#define RADEON_D_CAPTURE 0
-#define RADEON_D_PLAYBACK 1
-#define RADEON_D_PLAYBACK_RAW 2
-#define RADEON_D_T 3
+
+extern void r300InitShaderFunctions(r300ContextPtr r300);
+
+extern void r300InitDraw(GLcontext *ctx);
+
+#define r300PackFloat32 radeonPackFloat32
+#define r300PackFloat24 radeonPackFloat24
#endif /* __R300_CONTEXT_H__ */
diff --git a/r300/r300_draw.c b/r300/r300_draw.c
new file mode 100644
index 0000000..e9968f9
--- /dev/null
+++ b/r300/r300_draw.c
@@ -0,0 +1,718 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Maciej Cencora
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHOR(S) AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdlib.h>
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/api_validate.h"
+#include "main/enums.h"
+#include "main/simple_list.h"
+
+#include "r300_reg.h"
+#include "r300_context.h"
+#include "r300_emit.h"
+#include "r300_render.h"
+#include "r300_state.h"
+#include "r300_tex.h"
+#include "r300_cmdbuf.h"
+
+#include "radeon_buffer_objects.h"
+#include "radeon_common_context.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_vp_build.h"
+#include "vbo/vbo_context.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+
+
+static int getTypeSize(GLenum type)
+{
+ switch (type) {
+ case GL_DOUBLE:
+ return sizeof(GLdouble);
+ case GL_FLOAT:
+ return sizeof(GLfloat);
+ case GL_INT:
+ return sizeof(GLint);
+ case GL_UNSIGNED_INT:
+ return sizeof(GLuint);
+ case GL_SHORT:
+ return sizeof(GLshort);
+ case GL_UNSIGNED_SHORT:
+ return sizeof(GLushort);
+ case GL_BYTE:
+ return sizeof(GLbyte);
+ case GL_UNSIGNED_BYTE:
+ return sizeof(GLubyte);
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLvoid *src_ptr;
+ GLuint *out;
+ int i;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ mapped_named_bo = GL_TRUE;
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ }
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT,
+ "%s: Fixing index buffer format. type %d\n",
+ __func__, mesa_ind_buf->type);
+
+ if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) {
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+ GLubyte *in = (GLubyte *)src_ptr;
+
+ radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4);
+
+ assert(r300->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count) {
+ *out++ = in[i];
+ }
+
+#if MESA_BIG_ENDIAN
+ } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
+ GLushort *in = (GLushort *)src_ptr;
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+
+ radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo,
+ &r300->ind_buf.bo_offset, size, 4);
+
+ assert(r300->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count) {
+ *out++ = in[i];
+ }
+#endif
+ }
+
+ r300->ind_buf.is_32bit = GL_FALSE;
+ r300->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+}
+
+
+static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ if (!mesa_ind_buf) {
+ r300->ind_buf.bo = NULL;
+ return;
+ }
+ radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__);
+
+#if MESA_BIG_ENDIAN
+ if (mesa_ind_buf->type == GL_UNSIGNED_INT) {
+#else
+ if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) {
+#endif
+ const GLvoid *src_ptr;
+ GLvoid *dst_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
+
+ radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4);
+
+ assert(r300->ind_buf.bo->ptr != NULL);
+ dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
+ _mesa_memcpy(dst_ptr, src_ptr, size);
+
+ r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
+ r300->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+ } else {
+ r300FixupIndexBuffer(ctx, mesa_ind_buf);
+ }
+}
+
+#define CONVERT( TYPE, MACRO ) do { \
+ GLuint i, j, sz; \
+ sz = input->Size; \
+ if (input->Normalized) { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = MACRO(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } else { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = (GLfloat)(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } \
+} while (0)
+
+/**
+ * Convert attribute data type to float
+ * If the attribute uses named buffer object replace the bo with newly allocated bo
+ */
+static void r300ConvertAttrib(GLcontext *ctx, int count, const struct gl_client_array *input, struct vertex_attribute *attr)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ const GLvoid *src_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+ GLfloat *dst_ptr;
+ GLuint stride;
+
+ stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
+
+ /* Convert value for first element only */
+ if (input->StrideB == 0)
+ count = 1;
+
+ if (input->BufferObj->Name) {
+ if (!input->BufferObj->Pointer) {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ } else {
+ src_ptr = input->Ptr;
+ }
+
+ radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32);
+ dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+
+ radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT,
+ "%s: Converting vertex attributes, attribute data format %x,"
+ "stride %d, components %d\n"
+ , __FUNCTION__, input->Type
+ , stride, input->Size);
+
+ assert(src_ptr != NULL);
+
+ switch (input->Type) {
+ case GL_DOUBLE:
+ CONVERT(GLdouble, (GLfloat));
+ break;
+ case GL_UNSIGNED_INT:
+ CONVERT(GLuint, UINT_TO_FLOAT);
+ break;
+ case GL_INT:
+ CONVERT(GLint, INT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_SHORT:
+ CONVERT(GLushort, USHORT_TO_FLOAT);
+ break;
+ case GL_SHORT:
+ CONVERT(GLshort, SHORT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_BYTE:
+ assert(input->Format != GL_BGRA);
+ CONVERT(GLubyte, UBYTE_TO_FLOAT);
+ break;
+ case GL_BYTE:
+ CONVERT(GLbyte, BYTE_TO_FLOAT);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+}
+
+static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *input, int count, struct vertex_attribute *attr)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ const int dst_stride = (input->StrideB + 3) & ~3;
+ const int size = getTypeSize(input->Type) * input->Size * count;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, size, 32);
+
+ if (!input->BufferObj->Pointer) {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, "%s. Vertex alignment doesn't match hw requirements.\n", __func__);
+
+ {
+ GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+ int i;
+
+ for (i = 0; i < count; ++i) {
+ _mesa_memcpy(dst_ptr, src_ptr, input->StrideB);
+ src_ptr += input->StrideB;
+ dst_ptr += dst_stride;
+ }
+ }
+
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+
+ attr->stride = dst_stride;
+}
+
+static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_buffer *vbuf = &r300->vbuf;
+ struct vertex_attribute r300_attr;
+ GLenum type;
+ GLuint stride;
+
+ radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__);
+ stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
+
+ if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+ getTypeSize(input->Type) != 4 ||
+#endif
+ stride < 4) {
+
+ type = GL_FLOAT;
+
+ if (input->StrideB == 0) {
+ r300_attr.stride = 0;
+ } else {
+ r300_attr.stride = sizeof(GLfloat) * input->Size;
+ }
+ r300_attr.dwords = input->Size;
+ r300_attr.is_named_bo = GL_FALSE;
+ } else {
+ type = input->Type;
+ r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4;
+ if (!input->BufferObj->Name) {
+
+ if (input->StrideB == 0) {
+ r300_attr.stride = 0;
+ } else {
+ r300_attr.stride = (getTypeSize(type) * input->Size + 3) & ~3;
+ }
+
+ r300_attr.is_named_bo = GL_FALSE;
+ }
+ }
+
+ r300_attr.size = input->Size;
+ r300_attr.element = attr;
+ r300_attr.dst_loc = vbuf->num_attribs;
+
+ switch (type) {
+ case GL_FLOAT:
+ switch (input->Size) {
+ case 1: r300_attr.data_type = R300_DATA_TYPE_FLOAT_1; break;
+ case 2: r300_attr.data_type = R300_DATA_TYPE_FLOAT_2; break;
+ case 3: r300_attr.data_type = R300_DATA_TYPE_FLOAT_3; break;
+ case 4: r300_attr.data_type = R300_DATA_TYPE_FLOAT_4; break;
+ }
+ r300_attr._signed = 0;
+ r300_attr.normalize = 0;
+ break;
+ case GL_SHORT:
+ r300_attr._signed = 1;
+ r300_attr.normalize = input->Normalized;
+ switch (input->Size) {
+ case 1:
+ case 2:
+ r300_attr.data_type = R300_DATA_TYPE_SHORT_2;
+ break;
+ case 3:
+ case 4:
+ r300_attr.data_type = R300_DATA_TYPE_SHORT_4;
+ break;
+ }
+ break;
+ case GL_BYTE:
+ r300_attr._signed = 1;
+ r300_attr.normalize = input->Normalized;
+ r300_attr.data_type = R300_DATA_TYPE_BYTE;
+ break;
+ case GL_UNSIGNED_SHORT:
+ r300_attr._signed = 0;
+ r300_attr.normalize = input->Normalized;
+ switch (input->Size) {
+ case 1:
+ case 2:
+ r300_attr.data_type = R300_DATA_TYPE_SHORT_2;
+ break;
+ case 3:
+ case 4:
+ r300_attr.data_type = R300_DATA_TYPE_SHORT_4;
+ break;
+ }
+ break;
+ case GL_UNSIGNED_BYTE:
+ r300_attr._signed = 0;
+ r300_attr.normalize = input->Normalized;
+ if (input->Format == GL_BGRA)
+ r300_attr.data_type = R300_DATA_TYPE_D3DCOLOR;
+ else
+ r300_attr.data_type = R300_DATA_TYPE_BYTE;
+ break;
+
+ default:
+ case GL_DOUBLE:
+ case GL_INT:
+ case GL_UNSIGNED_INT:
+ assert(0);
+ break;
+ }
+
+ switch (input->Size) {
+ case 4:
+ r300_attr.swizzle = SWIZZLE_XYZW;
+ break;
+ case 3:
+ r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+ break;
+ case 2:
+ r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE);
+ break;
+ case 1:
+ r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+ break;
+ }
+
+ r300_attr.write_mask = MASK_XYZW;
+
+ vbuf->attribs[vbuf->num_attribs] = r300_attr;
+ ++vbuf->num_attribs;
+}
+
+static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_buffer *vbuf = &r300->vbuf;
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__);
+ {
+ int i, tmp;
+
+ tmp = r300->selected_vp->code.InputsRead;
+ i = 0;
+ vbuf->num_attribs = 0;
+ while (tmp) {
+ /* find first enabled bit */
+ while (!(tmp & 1)) {
+ tmp >>= 1;
+ ++i;
+ }
+
+ r300TranslateAttrib(ctx, i, count, arrays[i]);
+
+ tmp >>= 1;
+ ++i;
+ }
+ }
+
+ r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS);
+ if (r300->fallback)
+ return;
+}
+
+static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *input[], int count)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_buffer *vbuf = &r300->vbuf;
+ GLuint stride;
+ int ret;
+ int i, index;
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE,
+ "%s: count %d num_attribs %d\n",
+ __func__, count, vbuf->num_attribs);
+
+ for (index = 0; index < vbuf->num_attribs; index++) {
+ struct radeon_aos *aos = &r300->radeon.tcl.aos[index];
+ i = vbuf->attribs[index].element;
+
+ stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
+
+ if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+ getTypeSize(input[i]->Type) != 4 ||
+#endif
+ stride < 4) {
+
+ r300ConvertAttrib(ctx, count, input[i], &vbuf->attribs[index]);
+ } else {
+ if (input[i]->BufferObj->Name) {
+ if (stride % 4 != 0) {
+ assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
+ r300AlignDataToDword(ctx, input[i], count, &vbuf->attribs[index]);
+ vbuf->attribs[index].is_named_bo = GL_FALSE;
+ } else {
+ vbuf->attribs[index].stride = input[i]->StrideB;
+ vbuf->attribs[index].bo_offset = (intptr_t) input[i]->Ptr;
+ vbuf->attribs[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+ vbuf->attribs[index].is_named_bo = GL_TRUE;
+ }
+ } else {
+
+ int size;
+ int local_count = count;
+ uint32_t *dst;
+
+ if (input[i]->StrideB == 0) {
+ size = getTypeSize(input[i]->Type) * input[i]->Size;
+ local_count = 1;
+ } else {
+ size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
+ }
+
+ radeonAllocDmaRegion(&r300->radeon, &vbuf->attribs[index].bo, &vbuf->attribs[index].bo_offset, size, 32);
+ assert(vbuf->attribs[index].bo->ptr != NULL);
+ dst = (uint32_t *)ADD_POINTERS(vbuf->attribs[index].bo->ptr, vbuf->attribs[index].bo_offset);
+ switch (vbuf->attribs[index].dwords) {
+ case 1: radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ case 3: radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ default: assert(0); break;
+ }
+
+ }
+ }
+
+ aos->count = vbuf->attribs[index].stride == 0 ? 1 : count;
+ aos->stride = vbuf->attribs[index].stride / sizeof(float);
+ aos->components = vbuf->attribs[index].dwords;
+ aos->bo = vbuf->attribs[index].bo;
+ aos->offset = vbuf->attribs[index].bo_offset;
+
+ if (vbuf->attribs[index].is_named_bo) {
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[index].bo, RADEON_GEM_DOMAIN_GTT, 0);
+ }
+ }
+
+ r300->radeon.tcl.aos_count = vbuf->num_attribs;
+ ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, first_elem(&r300->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+ r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, ret);
+
+}
+
+static void r300FreeData(GLcontext *ctx)
+{
+ /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
+ * to prevent double unref in radeonReleaseArrays
+ * called during context destroy
+ */
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__);
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ {
+ int i;
+
+ for (i = 0; i < r300->vbuf.num_attribs; i++) {
+ if (!r300->vbuf.attribs[i].is_named_bo) {
+ radeon_bo_unref(r300->vbuf.attribs[i].bo);
+ }
+ r300->radeon.tcl.aos[i].bo = NULL;
+ }
+ }
+
+ {
+ if (r300->ind_buf.bo != NULL) {
+ radeon_bo_unref(r300->ind_buf.bo);
+ }
+ }
+}
+
+static GLuint r300PredictTryDrawPrimsSize(GLcontext *ctx, GLuint nr_prims)
+{
+ struct r300_context *r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_buffer *vbuf = &r300->vbuf;
+ GLboolean flushed;
+ GLuint dwords;
+ GLuint state_size;
+
+ dwords = 2*CACHE_FLUSH_BUFSZ;
+ dwords += PRE_EMIT_STATE_BUFSZ;
+ dwords += (AOS_BUFSZ(vbuf->num_attribs)
+ + SCISSORS_BUFSZ*2
+ + FIREAOS_BUFSZ )*nr_prims;
+
+ state_size = radeonCountStateEmitSize(&r300->radeon);
+ flushed = rcommonEnsureCmdBufSpace(&r300->radeon,
+ dwords + state_size,
+ __FUNCTION__);
+ if (flushed)
+ dwords += radeonCountStateEmitSize(&r300->radeon);
+ else
+ dwords += state_size;
+
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
+ return dwords;
+}
+
+static GLboolean r300TryDrawPrims(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint min_index,
+ GLuint max_index )
+{
+ struct r300_context *r300 = R300_CONTEXT(ctx);
+ GLuint i;
+
+ radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: %u (%d-%d) cs begin at %d\n",
+ __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw );
+
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ if (r300->options.hw_tcl_enabled)
+ _tnl_UpdateFixedFunctionProgram(ctx);
+
+ r300UpdateShaders(r300);
+
+ r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx));
+
+ r300SetVertexFormat(ctx, arrays, max_index + 1);
+
+ if (r300->fallback)
+ return GL_FALSE;
+
+ r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten);
+
+ r300UpdateShaderStates(r300);
+
+ /* ensure we have the cmd buf space in advance to cover
+ * the state + DMA AOS pointers */
+ GLuint emit_end = r300PredictTryDrawPrimsSize(ctx, nr_prims)
+ + r300->radeon.cmdbuf.cs->cdw;
+
+ r300SetupIndexBuffer(ctx, ib);
+
+ r300AllocDmaRegions(ctx, arrays, max_index + 1);
+
+ if (r300->fallback)
+ return GL_FALSE;
+
+ r300EmitCacheFlush(r300);
+ radeonEmitState(&r300->radeon);
+
+ for (i = 0; i < nr_prims; ++i) {
+ r300RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode);
+ }
+
+ r300EmitCacheFlush(r300);
+
+ r300FreeData(ctx);
+
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: %u (%d-%d) cs ending at %d\n",
+ __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw );
+
+ if (emit_end < r300->radeon.cmdbuf.cs->cdw)
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", r300->radeon.cmdbuf.cs->cdw - emit_end);
+
+ return GL_TRUE;
+}
+
+static void r300DrawPrims(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index)
+{
+ GLboolean retval;
+
+ /* This check should get folded into just the places that
+ * min/max index are really needed.
+ */
+ if (!index_bounds_valid) {
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+ }
+
+ if (min_index) {
+ radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT,
+ "%s: Rebasing primitives. %p nr_prims %d min_index %u max_index %u\n",
+ __func__, prim, nr_prims, min_index, max_index);
+ vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims );
+ return;
+ }
+
+ /* Make an attempt at drawing */
+ retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+
+ /* If failed run tnl pipeline - it should take care of fallbacks */
+ if (!retval)
+ _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+}
+
+void r300InitDraw(GLcontext *ctx)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+
+ vbo->draw_prims = r300DrawPrims;
+}
diff --git a/r300/r300_emit.c b/r300/r300_emit.c
index 28c3157..07e6223 100644
--- a/r300/r300_emit.c
+++ b/r300/r300_emit.c
@@ -31,6 +31,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* \file
*
* \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Maciej Cencora <m.cencora@gmail.com>
*/
#include "main/glheader.h"
@@ -46,222 +47,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "tnl/t_context.h"
#include "r300_context.h"
-#include "radeon_ioctl.h"
#include "r300_state.h"
#include "r300_emit.h"
#include "r300_ioctl.h"
-
-#ifdef USER_BUFFERS
-#include "r300_mem.h"
-#endif
-
-#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
- SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
- SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
- SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
- SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
- SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
-#error Cannot change these!
-#endif
-
-#define DEBUG_ALL DEBUG_VERTS
-
-#if defined(USE_X86_ASM)
-#define COPY_DWORDS( dst, src, nr ) \
-do { \
- int __tmp; \
- __asm__ __volatile__( "rep ; movsl" \
- : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
- : "0" (nr), \
- "D" ((long)dst), \
- "S" ((long)src) ); \
-} while (0)
-#else
-#define COPY_DWORDS( dst, src, nr ) \
-do { \
- int j; \
- for ( j = 0 ; j < nr ; j++ ) \
- dst[j] = ((int *)src)[j]; \
- dst += nr; \
-} while (0)
-#endif
-
-static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
- GLvoid * data, int stride, int count)
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
- __FUNCTION__, count, stride, (void *)out, (void *)data);
-
- if (stride == 4)
- COPY_DWORDS(out, data, count);
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out++;
- data += stride;
- }
-}
-
-static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
- GLvoid * data, int stride, int count)
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
- __FUNCTION__, count, stride, (void *)out, (void *)data);
-
- if (stride == 8)
- COPY_DWORDS(out, data, count * 2);
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data + 4);
- out += 2;
- data += stride;
- }
-}
-
-static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
- GLvoid * data, int stride, int count)
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
- __FUNCTION__, count, stride, (void *)out, (void *)data);
-
- if (stride == 12)
- COPY_DWORDS(out, data, count * 3);
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data + 4);
- out[2] = *(int *)(data + 8);
- out += 3;
- data += stride;
- }
-}
-
-static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
- GLvoid * data, int stride, int count)
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
- __FUNCTION__, count, stride, (void *)out, (void *)data);
-
- if (stride == 16)
- COPY_DWORDS(out, data, count * 4);
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data + 4);
- out[2] = *(int *)(data + 8);
- out[3] = *(int *)(data + 12);
- out += 4;
- data += stride;
- }
-}
-
-static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
- GLvoid * data, int size, int stride, int count)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
-
- if (stride == 0) {
- r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
- count = 1;
- rvb->aos_offset = GET_START(rvb);
- rvb->aos_stride = 0;
- } else {
- r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
- rvb->aos_offset = GET_START(rvb);
- rvb->aos_stride = size;
- }
-
- switch (size) {
- case 1:
- r300EmitVec4(ctx, rvb, data, stride, count);
- break;
- case 2:
- r300EmitVec8(ctx, rvb, data, stride, count);
- break;
- case 3:
- r300EmitVec12(ctx, rvb, data, stride, count);
- break;
- case 4:
- r300EmitVec16(ctx, rvb, data, stride, count);
- break;
- default:
- assert(0);
- break;
- }
-}
-
-#define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \
- (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT)
-
-GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
- int *inputs, GLint * tab, GLuint nr)
-{
- GLuint i, dw;
-
- /* type, inputs, stop bit, size */
- for (i = 0; i < nr; i += 2) {
- /* make sure input is valid, would lockup the gpu */
- assert(inputs[tab[i]] != -1);
- dw = (R300_SIGNED | DW_SIZE(i));
- if (i + 1 == nr) {
- dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
- } else {
- assert(inputs[tab[i + 1]] != -1);
- dw |= (R300_SIGNED |
- DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT;
- if (i + 2 == nr) {
- dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
- }
- }
- dst[i >> 1] = dw;
- }
-
- return (nr + 1) >> 1;
-}
-
-static GLuint r300VAPInputRoute1Swizzle(int swizzle[4])
-{
- return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) |
- (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) |
- (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) |
- (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT);
-}
-
-GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
-{
- GLuint i, dw;
-
- for (i = 0; i < nr; i += 2) {
- dw = (r300VAPInputRoute1Swizzle(swizzle[i]) |
- ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
- R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT;
- if (i + 1 < nr) {
- dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) |
- ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
- R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
- }
- dst[i >> 1] = dw;
- }
-
- return (nr + 1) >> 1;
-}
+#include "r300_render.h"
+#include "r300_swtcl.h"
GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
{
@@ -272,7 +62,6 @@ GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
GLuint i, vic_1 = 0;
if (InputsRead & (1 << VERT_ATTRIB_POS))
@@ -284,281 +73,68 @@ GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
vic_1 |= R300_INPUT_CNTL_COLOR;
- rmesa->state.texture.tc_count = 0;
for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
- rmesa->state.texture.tc_count++;
vic_1 |= R300_INPUT_CNTL_TC0 << i;
}
return vic_1;
}
-GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
+GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes)
{
GLuint ret = 0;
- if (OutputsWritten & (1 << VERT_RESULT_HPOS))
+ if (vp_writes & (1 << VERT_RESULT_HPOS))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
- if (OutputsWritten & (1 << VERT_RESULT_COL0))
+ if (vp_writes & (1 << VERT_RESULT_COL0))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
- if (OutputsWritten & (1 << VERT_RESULT_COL1))
+ if (vp_writes & (1 << VERT_RESULT_COL1))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
- if (OutputsWritten & (1 << VERT_RESULT_BFC0)
- || OutputsWritten & (1 << VERT_RESULT_BFC1))
- ret |=
- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT |
- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
+ /* Two sided lighting works only if all 4 colors are written */
+ if (vp_writes & (1 << VERT_RESULT_BFC0) || vp_writes & (1 << VERT_RESULT_BFC1))
+ ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT |
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
- if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ if (vp_writes & (1 << VERT_RESULT_PSIZ))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
return ret;
}
-GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
+GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes)
{
GLuint i, ret = 0, first_free_texcoord = 0;
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) {
- ret |= (4 << (3 * i));
+ if (vp_writes & (1 << (VERT_RESULT_TEX0 + i))) {
+ ret |= (4 << (3 * first_free_texcoord));
++first_free_texcoord;
}
}
- if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
- if (first_free_texcoord > 8) {
- fprintf(stderr, "\tout of free texcoords to write fog coord\n");
- _mesa_exit(-1);
- }
- ret |= 4 << (3 * first_free_texcoord);
+ if (first_free_texcoord > 8) {
+ fprintf(stderr, "\tout of free texcoords\n");
+ _mesa_exit(-1);
}
return ret;
}
-/* Emit vertex data to GART memory
- * Route inputs to the vertex processor
- * This function should never return R300_FALLBACK_TCL when using software tcl.
- */
-int r300EmitArrays(GLcontext * ctx)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
- GLuint nr;
- GLuint count = vb->Count;
- GLuint i;
- GLuint InputsRead = 0, OutputsWritten = 0;
- int *inputs = NULL;
- int vir_inputs[VERT_ATTRIB_MAX];
- GLint tab[VERT_ATTRIB_MAX];
- int swizzle[VERT_ATTRIB_MAX][4];
- struct r300_vertex_program *prog =
- (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
-
- if (hw_tcl_on) {
- inputs = prog->inputs;
- InputsRead = prog->key.InputsRead;
- OutputsWritten = prog->key.OutputsWritten;
- } else {
- inputs = rmesa->state.sw_tcl_inputs;
-
- DECLARE_RENDERINPUTS(render_inputs_bitset);
- RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
-
- vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr;
-
- assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
- assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
- //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
-
- if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
- InputsRead |= 1 << VERT_ATTRIB_POS;
- OutputsWritten |= 1 << VERT_RESULT_HPOS;
- }
-
- if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
- InputsRead |= 1 << VERT_ATTRIB_COLOR0;
- OutputsWritten |= 1 << VERT_RESULT_COL0;
- }
-
- if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) {
- InputsRead |= 1 << VERT_ATTRIB_COLOR1;
- OutputsWritten |= 1 << VERT_RESULT_COL1;
- }
-
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) {
- InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
- OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
- }
- }
-
- for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
- if (InputsRead & (1 << i)) {
- inputs[i] = nr++;
- } else {
- inputs[i] = -1;
- }
- }
-
- /* Fixed, apply to vir0 only */
- memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int));
- inputs = vir_inputs;
- if (InputsRead & VERT_ATTRIB_POS)
- inputs[VERT_ATTRIB_POS] = 0;
- if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
- inputs[VERT_ATTRIB_COLOR0] = 2;
- if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
- inputs[VERT_ATTRIB_COLOR1] = 3;
- for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
- if (InputsRead & (1 << i))
- inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
-
- RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
- }
-
- assert(InputsRead);
- assert(OutputsWritten);
-
- for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
- if (InputsRead & (1 << i)) {
- tab[nr++] = i;
- }
- }
-
- if (nr > R300_MAX_AOS_ARRAYS) {
- return R300_FALLBACK_TCL;
- }
-
- for (i = 0; i < nr; i++) {
- int ci, fix, found = 0;
-
- swizzle[i][0] = SWIZZLE_ZERO;
- swizzle[i][1] = SWIZZLE_ZERO;
- swizzle[i][2] = SWIZZLE_ZERO;
- swizzle[i][3] = SWIZZLE_ONE;
-
- for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
- swizzle[i][ci] = ci;
- }
-
- if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
- if (vb->AttribPtr[tab[i]]->stride % 4) {
- return R300_FALLBACK_TCL;
- }
- rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
- rmesa->state.aos[i].start = 0;
- rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
- rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
- rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
- } else {
- r300EmitVec(ctx, &rmesa->state.aos[i],
- vb->AttribPtr[tab[i]]->data,
- vb->AttribPtr[tab[i]]->size,
- vb->AttribPtr[tab[i]]->stride, count);
- }
-
- rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
-
- for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
- if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
- continue;
- }
- found = 1;
- break;
- }
-
- if (found) {
- if (fix > 0) {
- WARN_ONCE("Feeling lucky?\n");
- }
- rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
- for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
- swizzle[i][ci] += fix;
- }
- } else {
- WARN_ONCE
- ("Cannot handle offset %x with stride %d, comp %d\n",
- rmesa->state.aos[i].aos_offset,
- rmesa->state.aos[i].aos_stride,
- vb->AttribPtr[tab[i]]->size);
- return R300_FALLBACK_TCL;
- }
- }
-
- /* Setup INPUT_ROUTE. */
- R300_STATECHANGE(rmesa, vir[0]);
- ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
- r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
- vb->AttribPtr, inputs, tab, nr);
- R300_STATECHANGE(rmesa, vir[1]);
- ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
- r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
- nr);
-
- /* Setup INPUT_CNTL. */
- R300_STATECHANGE(rmesa, vic);
- rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
- rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
-
- /* Setup OUTPUT_VTX_FMT. */
- R300_STATECHANGE(rmesa, vof);
- rmesa->hw.vof.cmd[R300_VOF_CNTL_0] =
- r300VAPOutputCntl0(ctx, OutputsWritten);
- rmesa->hw.vof.cmd[R300_VOF_CNTL_1] =
- r300VAPOutputCntl1(ctx, OutputsWritten);
-
- rmesa->state.aos_count = nr;
-
- return R300_FALLBACK_NONE;
-}
-
-#ifdef USER_BUFFERS
-void r300UseArrays(GLcontext * ctx)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- int i;
-
- if (rmesa->state.elt_dma.buf)
- r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
-
- for (i = 0; i < rmesa->state.aos_count; i++) {
- if (rmesa->state.aos[i].buf)
- r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
- }
-}
-#endif
-
-void r300ReleaseArrays(GLcontext * ctx)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- int i;
-
- r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
- for (i = 0; i < rmesa->state.aos_count; i++) {
- r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
- }
-}
-
void r300EmitCacheFlush(r300ContextPtr rmesa)
{
- int cmd_reserved = 0;
- int cmd_written = 0;
-
- drm_radeon_cmd_header_t *cmd = NULL;
-
- reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
- e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
- R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-
- reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
- e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+ BATCH_LOCALS(&rmesa->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+ OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+ END_BATCH();
+ COMMIT_BATCH();
}
diff --git a/r300/r300_emit.h b/r300/r300_emit.h
index 89d7383..8e57e35 100644
--- a/r300/r300_emit.h
+++ b/r300/r300_emit.h
@@ -44,28 +44,31 @@
#include "r300_cmdbuf.h"
#include "radeon_reg.h"
-/* TODO: move these defines (and the ones from DRM) into r300_reg.h and sync up
- * with DRM */
-#define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
-#define CP_PACKET3( pkt, n ) \
- (RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
-
-static INLINE uint32_t cmdpacket0(int reg, int count)
+static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn,
+ int reg, int count)
{
- drm_r300_cmd_header_t cmd;
-
- cmd.packet0.cmd_type = R300_CMD_PACKET0;
- cmd.packet0.count = count;
- cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
- cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
-
- return cmd.u;
+ if (!rscrn->kernel_mm) {
+ drm_r300_cmd_header_t cmd;
+
+ cmd.u = 0;
+ cmd.packet0.cmd_type = R300_CMD_PACKET0;
+ cmd.packet0.count = count;
+ cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
+ cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
+
+ return cmd.u;
+ }
+ if (count) {
+ return CP_PACKET0(reg, count - 1);
+ }
+ return CP_PACKET2;
}
-static INLINE uint32_t cmdvpu(int addr, int count)
+static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count)
{
drm_r300_cmd_header_t cmd;
+ cmd.u = 0;
cmd.vpu.cmd_type = R300_CMD_VPU;
cmd.vpu.count = count;
cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8;
@@ -74,10 +77,12 @@ static INLINE uint32_t cmdvpu(int addr, int count)
return cmd.u;
}
-static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
+static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn,
+ int addr, int count, int type, int clamp)
{
drm_r300_cmd_header_t cmd;
+ cmd.u = 0;
cmd.r500fp.cmd_type = R300_CMD_R500FP;
cmd.r500fp.count = count;
cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8;
@@ -88,181 +93,137 @@ static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
return cmd.u;
}
-static INLINE uint32_t cmdpacket3(int packet)
+static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet)
{
drm_r300_cmd_header_t cmd;
+ cmd.u = 0;
cmd.packet3.cmd_type = R300_CMD_PACKET3;
cmd.packet3.packet = packet;
return cmd.u;
}
-static INLINE uint32_t cmdcpdelay(unsigned short count)
+static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn,
+ unsigned short count)
{
drm_r300_cmd_header_t cmd;
+ cmd.u = 0;
+
cmd.delay.cmd_type = R300_CMD_CP_DELAY;
cmd.delay.count = count;
return cmd.u;
}
-static INLINE uint32_t cmdwait(unsigned char flags)
+static INLINE uint32_t cmdwait(struct radeon_screen *rscrn,
+ unsigned char flags)
{
drm_r300_cmd_header_t cmd;
+ cmd.u = 0;
cmd.wait.cmd_type = R300_CMD_WAIT;
cmd.wait.flags = flags;
return cmd.u;
}
-static INLINE uint32_t cmdpacify(void)
+static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn)
{
drm_r300_cmd_header_t cmd;
+ cmd.u = 0;
cmd.header.cmd_type = R300_CMD_END3D;
return cmd.u;
}
/**
- * Prepare to write a register value to register at address reg.
- * If num_extra > 0 then the following extra values are written
- * to registers with address +4, +8 and so on..
- */
-#define reg_start(reg, num_extra) \
- do { \
- int _n; \
- _n=(num_extra); \
- cmd = (drm_radeon_cmd_header_t*) \
- r300AllocCmdBuf(rmesa, \
- (_n+2), \
- __FUNCTION__); \
- cmd_reserved=_n+2; \
- cmd_written=1; \
- cmd[0].i=cmdpacket0((reg), _n+1); \
- } while (0);
-
-/**
- * Emit GLuint freestyle
+ * Write the header of a packet3 to the command buffer.
+ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
*/
-#define e32(dword) \
- do { \
- if(cmd_written<cmd_reserved) { \
- cmd[cmd_written].i=(dword); \
- cmd_written++; \
- } else { \
- fprintf(stderr, \
- "e32 but no previous packet " \
- "declaration.\n" \
- "Aborting! in %s::%s at line %d, " \
- "cmd_written=%d cmd_reserved=%d\n", \
- __FILE__, __FUNCTION__, __LINE__, \
- cmd_written, cmd_reserved); \
- _mesa_exit(-1); \
- } \
+#define OUT_BATCH_PACKET3(packet, num_extra) do {\
+ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
+ OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\
+ R300_CMD_PACKET3_RAW)); \
+ } else b_l_rmesa->cmdbuf.cs->section_cdw++;\
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
} while(0)
-#define efloat(f) e32(r300PackFloat32(f))
-
-#define vsf_start_fragment(dest, length) \
- do { \
- int _n; \
- _n = (length); \
- cmd = (drm_radeon_cmd_header_t*) \
- r300AllocCmdBuf(rmesa, \
- (_n+1), \
- __FUNCTION__); \
- cmd_reserved = _n+2; \
- cmd_written =1; \
- cmd[0].i = cmdvpu((dest), _n/4); \
- } while (0);
-
-#define r500fp_start_fragment(dest, length) \
- do { \
- int _n; \
- _n = (length); \
- cmd = (drm_radeon_cmd_header_t*) \
- r300AllocCmdBuf(rmesa, \
- (_n+1), \
- __FUNCTION__); \
- cmd_reserved = _n+1; \
- cmd_written =1; \
- cmd[0].i = cmdr500fp((dest), _n/6, 0, 0); \
- } while (0);
-
-#define start_packet3(packet, count) \
- { \
- int _n; \
- GLuint _p; \
- _n = (count); \
- _p = (packet); \
- cmd = (drm_radeon_cmd_header_t*) \
- r300AllocCmdBuf(rmesa, \
- (_n+3), \
- __FUNCTION__); \
- cmd_reserved = _n+3; \
- cmd_written = 2; \
- if(_n > 0x3fff) { \
- fprintf(stderr,"Too big packet3 %08x: cannot " \
- "store %d dwords\n", \
- _p, _n); \
- _mesa_exit(-1); \
- } \
- cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW); \
- cmd[1].i = _p | ((_n & 0x3fff)<<16); \
- }
-
/**
* Must be sent to switch to 2d commands
*/
-void static INLINE end_3d(r300ContextPtr rmesa)
+void static INLINE end_3d(radeonContextPtr radeon)
{
- drm_radeon_cmd_header_t *cmd = NULL;
+ BATCH_LOCALS(radeon);
- cmd =
- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
- cmd[0].header.cmd_type = R300_CMD_END3D;
+ if (!radeon->radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(1);
+ OUT_BATCH(cmdpacify(radeon->radeonScreen));
+ END_BATCH();
+ }
}
void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
{
- drm_radeon_cmd_header_t *cmd = NULL;
+ BATCH_LOCALS(&rmesa->radeon);
- cmd =
- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
- cmd[0].i = cmdcpdelay(count);
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(1);
+ OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count));
+ END_BATCH();
+ }
}
-void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags)
+void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags)
{
- drm_radeon_cmd_header_t *cmd = NULL;
-
- cmd =
- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
- cmd[0].i = cmdwait(flags);
+ BATCH_LOCALS(radeon);
+ uint32_t wait_until;
+
+ if (!radeon->radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(1);
+ OUT_BATCH(cmdwait(radeon->radeonScreen, flags));
+ END_BATCH();
+ } else {
+ switch(flags) {
+ case R300_WAIT_2D:
+ wait_until = (1 << 14);
+ break;
+ case R300_WAIT_3D:
+ wait_until = (1 << 15);
+ break;
+ case R300_NEW_WAIT_2D_3D:
+ wait_until = (1 << 14) | (1 << 15);
+ break;
+ case R300_NEW_WAIT_2D_2D_CLEAN:
+ wait_until = (1 << 14) | (1 << 16) | (1 << 18);
+ break;
+ case R300_NEW_WAIT_3D_3D_CLEAN:
+ wait_until = (1 << 15) | (1 << 17) | (1 << 18);
+ break;
+ case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
+ wait_until = (1 << 14) | (1 << 16) | (1 << 18);
+ wait_until |= (1 << 15) | (1 << 17) | (1 << 18);
+ break;
+ default:
+ return;
+ }
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_BATCH(wait_until);
+ END_BATCH();
+ }
}
-extern int r300EmitArrays(GLcontext * ctx);
-
-#ifdef USER_BUFFERS
-void r300UseArrays(GLcontext * ctx);
-#endif
-
-extern void r300ReleaseArrays(GLcontext * ctx);
extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
extern void r300EmitCacheFlush(r300ContextPtr rmesa);
-extern GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
- int *inputs, GLint * tab, GLuint nr);
-extern GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr);
extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead);
extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead);
-extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten);
-extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten);
+extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes);
+extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes);
#endif
diff --git a/r300/r300_fragprog.c b/r300/r300_fragprog.c
deleted file mode 100644
index 873cde4..0000000
--- a/r300/r300_fragprog.c
+++ /dev/null
@@ -1,699 +0,0 @@
-/*
- * Copyright (C) 2005 Ben Skeggs.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * \file
- *
- * Fragment program compiler. Perform transformations on the intermediate
- * representation until the program is in a form where we can translate
- * it more or less directly into machine-readable form.
- *
- * \author Ben Skeggs <darktama@iinet.net.au>
- * \author Jerome Glisse <j.glisse@gmail.com>
- */
-
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/prog_instruction.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-
-#include "r300_context.h"
-#include "r300_fragprog.h"
-#include "r300_fragprog_swizzle.h"
-#include "r300_state.h"
-
-#include "radeon_nqssadce.h"
-#include "radeon_program_alu.h"
-
-
-static void reset_srcreg(struct prog_src_register* reg)
-{
- _mesa_bzero(reg, sizeof(*reg));
- reg->Swizzle = SWIZZLE_NOOP;
-}
-
-static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
-{
- gl_state_index fail_value_tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
- };
- struct prog_src_register reg = { 0, };
-
- fail_value_tokens[2] = tmu;
- reg.File = PROGRAM_STATE_VAR;
- reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
- reg.Swizzle = SWIZZLE_WWWW;
- return reg;
-}
-
-/**
- * Transform TEX, TXP, TXB, and KIL instructions in the following way:
- * - premultiply texture coordinates for RECT
- * - extract operand swizzles
- * - introduce a temporary register when write masks are needed
- *
- * \todo If/when r5xx uses the radeon_program architecture, this can probably
- * be reused.
- */
-static GLboolean transform_TEX(
- struct radeon_transform_context *t,
- struct prog_instruction* orig_inst, void* data)
-{
- struct r300_fragment_program_compiler *compiler =
- (struct r300_fragment_program_compiler*)data;
- struct prog_instruction inst = *orig_inst;
- struct prog_instruction* tgt;
- GLboolean destredirect = GL_FALSE;
-
- if (inst.Opcode != OPCODE_TEX &&
- inst.Opcode != OPCODE_TXB &&
- inst.Opcode != OPCODE_TXP &&
- inst.Opcode != OPCODE_KIL)
- return GL_FALSE;
-
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
-
- if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = inst.DstReg;
- if (comparefunc == GL_ALWAYS) {
- tgt->SrcReg[0].File = PROGRAM_BUILTIN;
- tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
- } else {
- tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
- }
- return GL_TRUE;
- }
-
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = radeonFindFreeTemporary(t);
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- }
-
-
- /* Hardware uses [0..1]x[0..1] range for rectangle textures
- * instead of [0..Width]x[0..Height].
- * Add a scaling instruction.
- */
- if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
- gl_state_index tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
- 0
- };
-
- int tempreg = radeonFindFreeTemporary(t);
- int factor_index;
-
- tokens[2] = inst.TexSrcUnit;
- factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);
-
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MUL;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tempreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
- tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
- tgt->SrcReg[1].Index = factor_index;
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tempreg;
- }
-
- if (inst.Opcode != OPCODE_KIL) {
- if (inst.DstReg.File != PROGRAM_TEMPORARY ||
- inst.DstReg.WriteMask != WRITEMASK_XYZW) {
- int tempreg = radeonFindFreeTemporary(t);
-
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = tempreg;
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- destredirect = GL_TRUE;
- }
- }
-
- if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
- int tmpreg = radeonFindFreeTemporary(t);
- tgt = radeonAppendInstructions(t->Program, 1);
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tmpreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tmpreg;
- }
-
- tgt = radeonAppendInstructions(t->Program, 1);
- _mesa_copy_instructions(tgt, &inst, 1);
-
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
- int rcptemp = radeonFindFreeTemporary(t);
- int pass, fail;
-
- tgt = radeonAppendInstructions(t->Program, 3);
-
- tgt[0].Opcode = OPCODE_RCP;
- tgt[0].DstReg.File = PROGRAM_TEMPORARY;
- tgt[0].DstReg.Index = rcptemp;
- tgt[0].DstReg.WriteMask = WRITEMASK_W;
- tgt[0].SrcReg[0] = inst.SrcReg[0];
- tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- tgt[1].Opcode = OPCODE_MAD;
- tgt[1].DstReg = inst.DstReg;
- tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
- tgt[1].SrcReg[0] = inst.SrcReg[0];
- tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[1].Index = rcptemp;
- tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[2].Index = inst.DstReg.Index;
- if (depthmode == 0) /* GL_LUMINANCE */
- tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
- else if (depthmode == 2) /* GL_ALPHA */
- tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
-
- /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
- * r < tex <=> -tex+r < 0
- * r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
- else
- tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
-
- tgt[2].Opcode = OPCODE_CMP;
- tgt[2].DstReg = orig_inst->DstReg;
- tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
-
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
- pass = 1;
- fail = 2;
- } else {
- pass = 2;
- fail = 1;
- }
-
- tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
- tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
- tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
- } else if (destredirect) {
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = orig_inst->DstReg;
- tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt->SrcReg[0].Index = inst.DstReg.Index;
- }
-
- return GL_TRUE;
-}
-
-
-static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp)
-{
- struct gl_fragment_program *mp = &fp->mesa_program;
-
- /* Ask Mesa nicely to fill in ParameterValues for us */
- if (mp->Base.Parameters)
- _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters);
-}
-
-
-/**
- * Transform the program to support fragment.position.
- *
- * Introduce a small fragment at the start of the program that will be
- * the only code that directly reads the FRAG_ATTRIB_WPOS input.
- * All other code pieces that reference that input will be rewritten
- * to read from a newly allocated temporary.
- *
- * \todo if/when r5xx supports the radeon_program architecture, this is a
- * likely candidate for code sharing.
- */
-static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
-{
- GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead;
-
- if (!(InputsRead & FRAG_BIT_WPOS))
- return;
-
- static gl_state_index tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
- };
- struct prog_instruction *fpi;
- GLuint window_index;
- int i = 0;
- GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
-
- _mesa_insert_instructions(compiler->program, 0, 3);
- fpi = compiler->program->Instructions;
-
- /* perspective divide */
- fpi[i].Opcode = OPCODE_RCP;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_W;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
- fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
- i++;
-
- fpi[i].Opcode = OPCODE_MUL;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
- fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[1].Index = tempregi;
- fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- i++;
-
- /* viewport transformation */
- window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
-
- fpi[i].Opcode = OPCODE_MAD;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[0].Index = tempregi;
- fpi[i].SrcReg[0].Swizzle =
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
- fpi[i].SrcReg[1].Index = window_index;
- fpi[i].SrcReg[1].Swizzle =
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
- fpi[i].SrcReg[2].Index = window_index;
- fpi[i].SrcReg[2].Swizzle =
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
- i++;
-
- for (; i < compiler->program->NumInstructions; ++i) {
- int reg;
- for (reg = 0; reg < 3; reg++) {
- if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
- fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
- fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[reg].Index = tempregi;
- }
- }
- }
-}
-
-
-static void nqssadce_init(struct nqssadce_state* s)
-{
- s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
- s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
-}
-
-
-static GLuint build_dtm(GLuint depthmode)
-{
- switch(depthmode) {
- default:
- case GL_LUMINANCE: return 0;
- case GL_INTENSITY: return 1;
- case GL_ALPHA: return 2;
- }
-}
-
-static GLuint build_func(GLuint comparefunc)
-{
- return comparefunc - GL_NEVER;
-}
-
-
-/**
- * Collect all external state that is relevant for compiling the given
- * fragment program.
- */
-static void build_state(
- r300ContextPtr r300,
- struct r300_fragment_program *fp,
- struct r300_fragment_program_external_state *state)
-{
- int unit;
-
- _mesa_bzero(state, sizeof(*state));
-
- for(unit = 0; unit < 16; ++unit) {
- if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) {
- struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
-
- state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
- state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
- }
- }
-}
-
-
-void r300TranslateFragmentShader(r300ContextPtr r300,
- struct r300_fragment_program *fp)
-{
- struct r300_fragment_program_external_state state;
-
- build_state(r300, fp, &state);
- if (_mesa_memcmp(&fp->state, &state, sizeof(state))) {
- /* TODO: cache compiled programs */
- fp->translated = GL_FALSE;
- _mesa_memcpy(&fp->state, &state, sizeof(state));
- }
-
- if (!fp->translated) {
- struct r300_fragment_program_compiler compiler;
-
- compiler.r300 = r300;
- compiler.fp = fp;
- compiler.code = &fp->code;
- compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base);
-
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Fragment Program: Initial program:\n");
- _mesa_print_program(compiler.program);
- }
-
- insert_WPOS_trailer(&compiler);
-
- struct radeon_program_transformation transformations[] = {
- { &transform_TEX, &compiler },
- { &radeonTransformALU, 0 },
- { &radeonTransformTrigSimple, 0 }
- };
- radeonLocalTransform(
- r300->radeon.glCtx,
- compiler.program,
- 3, transformations);
-
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Fragment Program: After native rewrite:\n");
- _mesa_print_program(compiler.program);
- }
-
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r300FPIsNativeSwizzle,
- .BuildSwizzle = &r300FPBuildSwizzle,
- .RewriteDepthOut = GL_TRUE
- };
- radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
-
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Compiler: after NqSSA-DCE:\n");
- _mesa_print_program(compiler.program);
- }
-
- if (!r300FragmentProgramEmit(&compiler))
- fp->error = GL_TRUE;
-
- /* Subtle: Rescue any parameters that have been added during transformations */
- _mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
- fp->mesa_program.Base.Parameters = compiler.program->Parameters;
- compiler.program->Parameters = 0;
-
- _mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL);
-
- if (!fp->error)
- fp->translated = GL_TRUE;
- if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
- r300FragmentProgramDump(fp, &fp->code);
- r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
- }
-
- update_params(r300, fp);
-}
-
-/* just some random things... */
-void r300FragmentProgramDump(
- struct r300_fragment_program *fp,
- struct r300_fragment_program_code *code)
-{
- int n, i, j;
- static int pc = 0;
-
- fprintf(stderr, "pc=%d*************************************\n", pc++);
-
- fprintf(stderr, "Hardware program\n");
- fprintf(stderr, "----------------\n");
-
- for (n = 0; n < (code->cur_node + 1); n++) {
- fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
- "alu_end: %d, tex_end: %d, flags: %08x\n", n,
- code->node[n].alu_offset,
- code->node[n].tex_offset,
- code->node[n].alu_end, code->node[n].tex_end,
- code->node[n].flags);
-
- if (n > 0 || code->first_node_has_tex) {
- fprintf(stderr, " TEX:\n");
- for (i = code->node[n].tex_offset;
- i <= code->node[n].tex_offset + code->node[n].tex_end;
- ++i) {
- const char *instr;
-
- switch ((code->tex.
- inst[i] >> R300_TEX_INST_SHIFT) &
- 15) {
- case R300_TEX_OP_LD:
- instr = "TEX";
- break;
- case R300_TEX_OP_KIL:
- instr = "KIL";
- break;
- case R300_TEX_OP_TXP:
- instr = "TXP";
- break;
- case R300_TEX_OP_TXB:
- instr = "TXB";
- break;
- default:
- instr = "UNKNOWN";
- }
-
- fprintf(stderr,
- " %s t%i, %c%i, texture[%i] (%08x)\n",
- instr,
- (code->tex.
- inst[i] >> R300_DST_ADDR_SHIFT) & 31,
- 't',
- (code->tex.
- inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
- (code->tex.
- inst[i] & R300_TEX_ID_MASK) >>
- R300_TEX_ID_SHIFT,
- code->tex.inst[i]);
- }
- }
-
- for (i = code->node[n].alu_offset;
- i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) {
- char srcc[3][10], dstc[20];
- char srca[3][10], dsta[20];
- char argc[3][20];
- char arga[3][20];
- char flags[5], tmp[10];
-
- for (j = 0; j < 3; ++j) {
- int regc = code->alu.inst[i].inst1 >> (j * 6);
- int rega = code->alu.inst[i].inst3 >> (j * 6);
-
- sprintf(srcc[j], "%c%i",
- (regc & 32) ? 'c' : 't', regc & 31);
- sprintf(srca[j], "%c%i",
- (rega & 32) ? 'c' : 't', rega & 31);
- }
-
- dstc[0] = 0;
- sprintf(flags, "%s%s%s",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_X) ? "x" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_Z) ? "z" : "");
- if (flags[0] != 0) {
- sprintf(dstc, "t%i.%s ",
- (code->alu.inst[i].
- inst1 >> R300_ALU_DSTC_SHIFT) & 31,
- flags);
- }
- sprintf(flags, "%s%s%s",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
- if (flags[0] != 0) {
- sprintf(tmp, "o%i.%s",
- (code->alu.inst[i].
- inst1 >> R300_ALU_DSTC_SHIFT) & 31,
- flags);
- strcat(dstc, tmp);
- }
-
- dsta[0] = 0;
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) {
- sprintf(dsta, "t%i.w ",
- (code->alu.inst[i].
- inst3 >> R300_ALU_DSTA_SHIFT) & 31);
- }
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) {
- sprintf(tmp, "o%i.w ",
- (code->alu.inst[i].
- inst3 >> R300_ALU_DSTA_SHIFT) & 31);
- strcat(dsta, tmp);
- }
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) {
- strcat(dsta, "Z");
- }
-
- fprintf(stderr,
- "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
- " w: %3s %3s %3s -> %-20s (%08x)\n", i,
- srcc[0], srcc[1], srcc[2], dstc,
- code->alu.inst[i].inst1, srca[0], srca[1],
- srca[2], dsta, code->alu.inst[i].inst3);
-
- for (j = 0; j < 3; ++j) {
- int regc = code->alu.inst[i].inst0 >> (j * 7);
- int rega = code->alu.inst[i].inst2 >> (j * 7);
- int d;
- char buf[20];
-
- d = regc & 31;
- if (d < 12) {
- switch (d % 4) {
- case R300_ALU_ARGC_SRC0C_XYZ:
- sprintf(buf, "%s.xyz",
- srcc[d / 4]);
- break;
- case R300_ALU_ARGC_SRC0C_XXX:
- sprintf(buf, "%s.xxx",
- srcc[d / 4]);
- break;
- case R300_ALU_ARGC_SRC0C_YYY:
- sprintf(buf, "%s.yyy",
- srcc[d / 4]);
- break;
- case R300_ALU_ARGC_SRC0C_ZZZ:
- sprintf(buf, "%s.zzz",
- srcc[d / 4]);
- break;
- }
- } else if (d < 15) {
- sprintf(buf, "%s.www", srca[d - 12]);
- } else if (d == 20) {
- sprintf(buf, "0.0");
- } else if (d == 21) {
- sprintf(buf, "1.0");
- } else if (d == 22) {
- sprintf(buf, "0.5");
- } else if (d >= 23 && d < 32) {
- d -= 23;
- switch (d / 3) {
- case 0:
- sprintf(buf, "%s.yzx",
- srcc[d % 3]);
- break;
- case 1:
- sprintf(buf, "%s.zxy",
- srcc[d % 3]);
- break;
- case 2:
- sprintf(buf, "%s.Wzy",
- srcc[d % 3]);
- break;
- }
- } else {
- sprintf(buf, "%i", d);
- }
-
- sprintf(argc[j], "%s%s%s%s",
- (regc & 32) ? "-" : "",
- (regc & 64) ? "|" : "",
- buf, (regc & 64) ? "|" : "");
-
- d = rega & 31;
- if (d < 9) {
- sprintf(buf, "%s.%c", srcc[d / 3],
- 'x' + (char)(d % 3));
- } else if (d < 12) {
- sprintf(buf, "%s.w", srca[d - 9]);
- } else if (d == 16) {
- sprintf(buf, "0.0");
- } else if (d == 17) {
- sprintf(buf, "1.0");
- } else if (d == 18) {
- sprintf(buf, "0.5");
- } else {
- sprintf(buf, "%i", d);
- }
-
- sprintf(arga[j], "%s%s%s%s",
- (rega & 32) ? "-" : "",
- (rega & 64) ? "|" : "",
- buf, (rega & 64) ? "|" : "");
- }
-
- fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
- " w: %8s %8s %8s op: %08x\n",
- argc[0], argc[1], argc[2],
- code->alu.inst[i].inst0, arga[0], arga[1],
- arga[2], code->alu.inst[i].inst2);
- }
- }
-}
diff --git a/r300/r300_fragprog.h b/r300/r300_fragprog.h
deleted file mode 100644
index 94fb554..0000000
--- a/r300/r300_fragprog.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (C) 2005 Ben Skeggs.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/*
- * Authors:
- * Ben Skeggs <darktama@iinet.net.au>
- * Jerome Glisse <j.glisse@gmail.com>
- */
-#ifndef __R300_FRAGPROG_H_
-#define __R300_FRAGPROG_H_
-
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-#include "shader/prog_instruction.h"
-
-#include "r300_context.h"
-#include "radeon_program.h"
-
-#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
-#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
-
-#if 1
-
-/**
- * Fragment program helper macros
- */
-
-/* Produce unshifted source selectors */
-#define FP_TMP(idx) (idx)
-#define FP_CONST(idx) ((idx) | (1 << 5))
-
-/* Produce source/dest selector dword */
-#define FP_SELC_MASK_NO 0
-#define FP_SELC_MASK_X 1
-#define FP_SELC_MASK_Y 2
-#define FP_SELC_MASK_XY 3
-#define FP_SELC_MASK_Z 4
-#define FP_SELC_MASK_XZ 5
-#define FP_SELC_MASK_YZ 6
-#define FP_SELC_MASK_XYZ 7
-
-#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \
- (((destidx) << R300_ALU_DSTC_SHIFT) | \
- (FP_SELC_MASK_##regmask << 23) | \
- (FP_SELC_MASK_##outmask << 26) | \
- ((src0) << R300_ALU_SRC0C_SHIFT) | \
- ((src1) << R300_ALU_SRC1C_SHIFT) | \
- ((src2) << R300_ALU_SRC2C_SHIFT))
-
-#define FP_SELA_MASK_NO 0
-#define FP_SELA_MASK_W 1
-
-#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \
- (((destidx) << R300_ALU_DSTA_SHIFT) | \
- (FP_SELA_MASK_##regmask << 23) | \
- (FP_SELA_MASK_##outmask << 24) | \
- ((src0) << R300_ALU_SRC0A_SHIFT) | \
- ((src1) << R300_ALU_SRC1A_SHIFT) | \
- ((src2) << R300_ALU_SRC2A_SHIFT))
-
-/* Produce unshifted argument selectors */
-#define FP_ARGC(source) R300_ALU_ARGC_##source
-#define FP_ARGA(source) R300_ALU_ARGA_##source
-#define FP_ABS(arg) ((arg) | (1 << 6))
-#define FP_NEG(arg) ((arg) ^ (1 << 5))
-
-/* Produce instruction dword */
-#define FP_INSTRC(opcode,arg0,arg1,arg2) \
- (R300_ALU_OUTC_##opcode | \
- ((arg0) << R300_ALU_ARG0C_SHIFT) | \
- ((arg1) << R300_ALU_ARG1C_SHIFT) | \
- ((arg2) << R300_ALU_ARG2C_SHIFT))
-
-#define FP_INSTRA(opcode,arg0,arg1,arg2) \
- (R300_ALU_OUTA_##opcode | \
- ((arg0) << R300_ALU_ARG0A_SHIFT) | \
- ((arg1) << R300_ALU_ARG1A_SHIFT) | \
- ((arg2) << R300_ALU_ARG2A_SHIFT))
-
-#endif
-
-struct r300_fragment_program;
-
-extern void r300TranslateFragmentShader(r300ContextPtr r300,
- struct r300_fragment_program *fp);
-
-
-/**
- * Used internally by the r300 fragment program code to store compile-time
- * only data.
- */
-struct r300_fragment_program_compiler {
- r300ContextPtr r300;
- struct r300_fragment_program *fp;
- struct r300_fragment_program_code *code;
- struct gl_program *program;
-};
-
-extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler);
-
-
-extern void r300FragmentProgramDump(
- struct r300_fragment_program *fp,
- struct r300_fragment_program_code *code);
-
-#endif
diff --git a/r300/r300_fragprog_common.c b/r300/r300_fragprog_common.c
new file mode 100644
index 0000000..0bdc90b
--- /dev/null
+++ b/r300/r300_fragprog_common.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * Fragment program compiler. Perform transformations on the intermediate
+ * representation until the program is in a form where we can translate
+ * it more or less directly into machine-readable form.
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ */
+
+#include "r300_fragprog_common.h"
+
+#include "shader/program.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+
+#include "compiler/radeon_compiler.h"
+
+#include "r300_state.h"
+
+
+static GLuint build_dtm(GLuint depthmode)
+{
+ switch(depthmode) {
+ default:
+ case GL_LUMINANCE: return 0;
+ case GL_INTENSITY: return 1;
+ case GL_ALPHA: return 2;
+ }
+}
+
+static GLuint build_func(GLuint comparefunc)
+{
+ return comparefunc - GL_NEVER;
+}
+
+/**
+ * Collect all external state that is relevant for compiling the given
+ * fragment program.
+ */
+static void build_state(
+ r300ContextPtr r300,
+ struct gl_fragment_program *fp,
+ struct r300_fragment_program_external_state *state)
+{
+ int unit;
+
+ _mesa_bzero(state, sizeof(*state));
+
+ for(unit = 0; unit < 16; ++unit) {
+ if (fp->Base.ShadowSamplers & (1 << unit)) {
+ struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
+
+ state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
+ state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
+ }
+ }
+}
+
+
+/**
+ * Transform the program to support fragment.position.
+ *
+ * Introduce a small fragment at the start of the program that will be
+ * the only code that directly reads the FRAG_ATTRIB_WPOS input.
+ * All other code pieces that reference that input will be rewritten
+ * to read from a newly allocated temporary.
+ *
+ */
+static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
+{
+ int i;
+
+ fp->wpos_attr = FRAG_ATTRIB_MAX;
+ if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) {
+ return;
+ }
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(compiler->Base.Program.InputsRead & (1 << i))) {
+ fp->wpos_attr = i;
+ break;
+ }
+ }
+
+ /* No free texcoord found, fall-back to software rendering */
+ if (fp->wpos_attr == FRAG_ATTRIB_MAX)
+ {
+ compiler->Base.Error = 1;
+ return;
+ }
+
+ rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr);
+}
+
+/**
+ * Rewrite fragment.fogcoord to use a texture coordinate slot.
+ * Note that fogcoord is forced into an X001 pattern, and this enforcement
+ * is done here.
+ *
+ * See also the counterpart rewriting for vertex programs.
+ */
+static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
+{
+ struct prog_src_register src;
+ int i;
+
+ fp->fog_attr = FRAG_ATTRIB_MAX;
+ if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) {
+ return;
+ }
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(compiler->Base.Program.InputsRead & (1 << i))) {
+ fp->fog_attr = i;
+ break;
+ }
+ }
+
+ /* No free texcoord found, fall-back to software rendering */
+ if (fp->fog_attr == FRAG_ATTRIB_MAX)
+ {
+ compiler->Base.Error = 1;
+ return;
+ }
+
+ memset(&src, 0, sizeof(src));
+ src.File = PROGRAM_INPUT;
+ src.Index = fp->fog_attr;
+ src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+ rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src);
+}
+
+
+/**
+ * Reserve hardware temporary registers for the program inputs.
+ *
+ * @note This allocation is performed explicitly, because the order of inputs
+ * is determined by the RS hardware.
+ */
+static void allocate_hw_inputs(
+ struct r300_fragment_program_compiler * c,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata)
+{
+ GLuint InputsRead = c->Base.Program.InputsRead;
+ int i;
+ GLuint hwindex = 0;
+
+ /* Primary colour */
+ if (InputsRead & FRAG_BIT_COL0)
+ allocate(mydata, FRAG_ATTRIB_COL0, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL0;
+
+ /* Secondary color */
+ if (InputsRead & FRAG_BIT_COL1)
+ allocate(mydata, FRAG_ATTRIB_COL1, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL1;
+
+ /* Texcoords */
+ for (i = 0; i < 8; i++) {
+ if (InputsRead & (FRAG_BIT_TEX0 << i))
+ allocate(mydata, FRAG_ATTRIB_TEX0+i, hwindex++);
+ }
+ InputsRead &= ~FRAG_BITS_TEX_ANY;
+
+ /* Fogcoords treated as a texcoord */
+ if (InputsRead & FRAG_BIT_FOGC)
+ allocate(mydata, FRAG_ATTRIB_FOGC, hwindex++);
+ InputsRead &= ~FRAG_BIT_FOGC;
+
+ /* fragment position treated as a texcoord */
+ if (InputsRead & FRAG_BIT_WPOS)
+ allocate(mydata, FRAG_ATTRIB_WPOS, hwindex++);
+ InputsRead &= ~FRAG_BIT_WPOS;
+
+ /* Anything else */
+ if (InputsRead)
+ rc_error(&c->Base, "Don't know how to handle inputs 0x%x\n", InputsRead);
+}
+
+
+static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_program_cont *cont, struct r300_fragment_program *fp)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_fragment_program_compiler compiler;
+
+ rc_init(&compiler.Base);
+ compiler.Base.Debug = (RADEON_DEBUG & RADEON_PIXEL) ? GL_TRUE : GL_FALSE;
+
+ compiler.code = &fp->code;
+ compiler.state = fp->state;
+ compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
+ compiler.OutputDepth = FRAG_RESULT_DEPTH;
+ compiler.OutputColor = FRAG_RESULT_COLOR;
+ compiler.AllocateHwInputs = &allocate_hw_inputs;
+
+ if (compiler.Base.Debug) {
+ fflush(stderr);
+ _mesa_printf("Fragment Program: Initial program:\n");
+ _mesa_print_program(&cont->Base.Base);
+ fflush(stderr);
+ }
+
+ rc_mesa_to_rc_program(&compiler.Base, &cont->Base.Base);
+
+ insert_WPOS_trailer(&compiler, fp);
+
+ rewriteFog(&compiler, fp);
+
+ r3xx_compile_fragment_program(&compiler);
+ fp->error = compiler.Base.Error;
+
+ fp->InputsRead = compiler.Base.Program.InputsRead;
+
+ rc_destroy(&compiler.Base);
+}
+
+struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_fragment_program_cont *fp_list;
+ struct r300_fragment_program *fp;
+ struct r300_fragment_program_external_state state;
+
+ fp_list = (struct r300_fragment_program_cont *)ctx->FragmentProgram._Current;
+ build_state(r300, ctx->FragmentProgram._Current, &state);
+
+ fp = fp_list->progs;
+ while (fp) {
+ if (_mesa_memcmp(&fp->state, &state, sizeof(state)) == 0) {
+ return r300->selected_fp = fp;
+ }
+ fp = fp->next;
+ }
+
+ fp = _mesa_calloc(sizeof(struct r300_fragment_program));
+
+ fp->state = state;
+
+ fp->next = fp_list->progs;
+ fp_list->progs = fp;
+
+ translate_fragment_program(ctx, fp_list, fp);
+
+ return r300->selected_fp = fp;
+}
diff --git a/r300/r300_fragprog_common.h b/r300/r300_fragprog_common.h
new file mode 100644
index 0000000..3d64c08
--- /dev/null
+++ b/r300/r300_fragprog_common.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_FRAGPROG_COMMON_H_
+#define __R300_FRAGPROG_COMMON_H_
+
+#include "main/mtypes.h"
+
+#include "r300_context.h"
+
+struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx);
+
+#endif
diff --git a/r300/r300_ioctl.c b/r300/r300_ioctl.c
index ee85e22..5cb04e2 100644
--- a/r300/r300_ioctl.c
+++ b/r300/r300_ioctl.c
@@ -44,82 +44,160 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/imports.h"
#include "main/macros.h"
#include "main/context.h"
+#include "main/simple_list.h"
#include "swrast/swrast.h"
+#include "radeon_common.h"
+#include "radeon_lock.h"
#include "r300_context.h"
-#include "radeon_ioctl.h"
#include "r300_ioctl.h"
#include "r300_cmdbuf.h"
#include "r300_state.h"
#include "r300_vertprog.h"
#include "radeon_reg.h"
#include "r300_emit.h"
-#include "r300_fragprog.h"
+#include "r300_context.h"
#include "vblank.h"
+#define R200_3D_DRAW_IMMD_2 0xC0003500
+
#define CLEARBUFFER_COLOR 0x1
#define CLEARBUFFER_DEPTH 0x2
#define CLEARBUFFER_STENCIL 0x4
-static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
-{
- GLcontext *ctx = r300->radeon.glCtx;
- __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
- GLuint cboffset, cbpitch;
- drm_r300_cmd_header_t *cmd2;
- int cmd_reserved = 0;
- int cmd_written = 0;
- drm_radeon_cmd_header_t *cmd = NULL;
- r300ContextPtr rmesa = r300;
+#if 1
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n",
- __FUNCTION__, buffer ? "back" : "front",
- dPriv->x, dPriv->y, dPriv->w, dPriv->h);
+/**
+ * Fragment program helper macros
+ */
- if (buffer) {
- cboffset = r300->radeon.radeonScreen->backOffset;
- cbpitch = r300->radeon.radeonScreen->backPitch;
- } else {
- cboffset = r300->radeon.radeonScreen->frontOffset;
- cbpitch = r300->radeon.radeonScreen->frontPitch;
- }
+/* Produce unshifted source selectors */
+#define FP_TMP(idx) (idx)
+#define FP_CONST(idx) ((idx) | (1 << 5))
+
+/* Produce source/dest selector dword */
+#define FP_SELC_MASK_NO 0
+#define FP_SELC_MASK_X 1
+#define FP_SELC_MASK_Y 2
+#define FP_SELC_MASK_XY 3
+#define FP_SELC_MASK_Z 4
+#define FP_SELC_MASK_XZ 5
+#define FP_SELC_MASK_YZ 6
+#define FP_SELC_MASK_XYZ 7
+
+#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \
+ (((destidx) << R300_ALU_DSTC_SHIFT) | \
+ (FP_SELC_MASK_##regmask << 23) | \
+ (FP_SELC_MASK_##outmask << 26) | \
+ ((src0) << R300_ALU_SRC0C_SHIFT) | \
+ ((src1) << R300_ALU_SRC1C_SHIFT) | \
+ ((src2) << R300_ALU_SRC2C_SHIFT))
+
+#define FP_SELA_MASK_NO 0
+#define FP_SELA_MASK_W 1
+
+#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \
+ (((destidx) << R300_ALU_DSTA_SHIFT) | \
+ (FP_SELA_MASK_##regmask << 23) | \
+ (FP_SELA_MASK_##outmask << 24) | \
+ ((src0) << R300_ALU_SRC0A_SHIFT) | \
+ ((src1) << R300_ALU_SRC1A_SHIFT) | \
+ ((src2) << R300_ALU_SRC2A_SHIFT))
+
+/* Produce unshifted argument selectors */
+#define FP_ARGC(source) R300_ALU_ARGC_##source
+#define FP_ARGA(source) R300_ALU_ARGA_##source
+#define FP_ABS(arg) ((arg) | (1 << 6))
+#define FP_NEG(arg) ((arg) ^ (1 << 5))
+
+/* Produce instruction dword */
+#define FP_INSTRC(opcode,arg0,arg1,arg2) \
+ (R300_ALU_OUTC_##opcode | \
+ ((arg0) << R300_ALU_ARG0C_SHIFT) | \
+ ((arg1) << R300_ALU_ARG1C_SHIFT) | \
+ ((arg2) << R300_ALU_ARG2C_SHIFT))
+
+#define FP_INSTRA(opcode,arg0,arg1,arg2) \
+ (R300_ALU_OUTA_##opcode | \
+ ((arg0) << R300_ALU_ARG0A_SHIFT) | \
+ ((arg1) << R300_ALU_ARG1A_SHIFT) | \
+ ((arg2) << R300_ALU_ARG2A_SHIFT))
- cboffset += r300->radeon.radeonScreen->fbLocation;
+#endif
- cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
- end_3d(rmesa);
+static void r300EmitClearState(GLcontext * ctx);
- R300_STATECHANGE(r300, cb);
- reg_start(R300_RB3D_COLOROFFSET0, 0);
- e32(cboffset);
+static void r300ClearBuffer(r300ContextPtr r300, int flags,
+ struct radeon_renderbuffer *rrb,
+ struct radeon_renderbuffer *rrbd)
+{
+ BATCH_LOCALS(&r300->radeon);
+ GLcontext *ctx = r300->radeon.glCtx;
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon);
+ GLuint cbpitch = 0;
+ r300ContextPtr rmesa = r300;
- if (r300->radeon.radeonScreen->cpp == 4)
- cbpitch |= R300_COLOR_FORMAT_ARGB8888;
- else
- cbpitch |= R300_COLOR_FORMAT_RGB565;
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n",
+ __FUNCTION__, rrb, dPriv->x, dPriv->y,
+ dPriv->w, dPriv->h);
- if (r300->radeon.sarea->tiling_enabled)
- cbpitch |= R300_COLOR_TILE_ENABLE;
+ if (rrb) {
+ cbpitch = (rrb->pitch / rrb->cpp);
+ if (rrb->cpp == 4)
+ cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+ else
+ cbpitch |= R300_COLOR_FORMAT_RGB565;
- reg_start(R300_RB3D_COLORPITCH0, 0);
- e32(cbpitch);
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
+ cbpitch |= R300_COLOR_TILE_ENABLE;
+ }
+ }
- R300_STATECHANGE(r300, cmk);
- reg_start(RB3D_COLOR_CHANNEL_MASK, 0);
+ /* TODO in bufmgr */
+ cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+ end_3d(&rmesa->radeon);
if (flags & CLEARBUFFER_COLOR) {
- e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
- (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
- (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
- (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
+ assert(rrb != 0);
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
+ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch);
+ END_BATCH();
+ }
+#if 1
+ if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) {
+ uint32_t zbpitch = (rrbd->pitch / rrbd->cpp);
+ if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
+ zbpitch |= R300_DEPTHMACROTILE_ENABLE;
+ }
+ if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
+ zbpitch |= R300_DEPTHMICROTILE_TILED;
+ }
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
+ OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1);
+ if (!r300->radeon.radeonScreen->kernel_mm)
+ OUT_BATCH(zbpitch);
+ else
+ OUT_BATCH_RELOC(zbpitch, rrbd->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+ }
+#endif
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1);
+ if (flags & CLEARBUFFER_COLOR) {
+ OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
+ (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
+ (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
+ (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
} else {
- e32(0x0);
+ OUT_BATCH(0);
}
- R300_STATECHANGE(r300, zs);
- reg_start(R300_ZB_CNTL, 2);
{
uint32_t t1, t2;
@@ -146,73 +224,92 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
R300_S_FRONT_ZFAIL_OP_SHIFT);
}
- e32(t1);
- e32(t2);
- e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
- (ctx->Stencil.Clear & R300_STENCILREF_MASK));
+ OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3);
+ OUT_BATCH(t1);
+ OUT_BATCH(t2);
+ OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) <<
+ R300_STENCILWRITEMASK_SHIFT) |
+ (ctx->Stencil.Clear & R300_STENCILREF_MASK));
+ END_BATCH();
}
- cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__);
- cmd2[0].packet3.cmd_type = R300_CMD_PACKET3;
- cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR;
- cmd2[1].u = r300PackFloat32(dPriv->w / 2.0);
- cmd2[2].u = r300PackFloat32(dPriv->h / 2.0);
- cmd2[3].u = r300PackFloat32(ctx->Depth.Clear);
- cmd2[4].u = r300PackFloat32(1.0);
- cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]);
- cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]);
- cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]);
- cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(9);
+ OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR));
+ OUT_BATCH_FLOAT32(dPriv->w / 2.0);
+ OUT_BATCH_FLOAT32(dPriv->h / 2.0);
+ OUT_BATCH_FLOAT32(ctx->Depth.Clear);
+ OUT_BATCH_FLOAT32(1.0);
+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
+ END_BATCH();
+ } else {
+ OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
+ OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
+ (1 << R300_PRIM_NUM_VERTICES_SHIFT));
+ OUT_BATCH_FLOAT32(dPriv->w / 2.0);
+ OUT_BATCH_FLOAT32(dPriv->h / 2.0);
+ OUT_BATCH_FLOAT32(ctx->Depth.Clear);
+ OUT_BATCH_FLOAT32(1.0);
+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
+ }
r300EmitCacheFlush(rmesa);
- cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+ cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+
+ R300_STATECHANGE(r300, cb);
+ R300_STATECHANGE(r300, cmk);
+ R300_STATECHANGE(r300, zs);
}
static void r300EmitClearState(GLcontext * ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- r300ContextPtr rmesa = r300;
- __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
+ BATCH_LOCALS(&r300->radeon);
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon);
int i;
- int cmd_reserved = 0;
- int cmd_written = 0;
- drm_radeon_cmd_header_t *cmd = NULL;
- int has_tcl = 1;
+ int has_tcl;
int is_r500 = 0;
GLuint vap_cntl;
- if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- has_tcl = 0;
+ has_tcl = r300->options.hw_tcl_enabled;
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
- is_r500 = 1;
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ is_r500 = 1;
-
- /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and
- * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are
- * quite complex; see the functions in r300_emit.c.
+ /* State atom dirty tracking is a little subtle here.
+ *
+ * On the one hand, we need to make sure base state is emitted
+ * here if we start with an empty batch buffer, otherwise clear
+ * works incorrectly with multiple processes. Therefore, the first
+ * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE.
+ *
+ * On the other hand, implicit state emission clears the state atom
+ * dirty bits, so we have to call R300_STATECHANGE later than the
+ * first BEGIN_BATCH.
*
- * I believe it would be a good idea to extend the functions in
- * r300_emit.c so that they can be used to setup the default values for
- * these registers, as well as the actual values used for rendering.
+ * The final trickiness is that, because we change state, we need
+ * to ensure that any stored swtcl primitives are flushed properly
+ * before we start changing state. See the R300_NEWPRIM in r300Clear
+ * for this.
*/
- R300_STATECHANGE(r300, vir[0]);
- reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0);
+ BEGIN_BATCH(31);
+ OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1);
if (!has_tcl)
- e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+ OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
else
- e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+ OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
- /* disable fog */
- R300_STATECHANGE(r300, fogs);
- reg_start(R300_FG_FOG_BLEND, 0);
- e32(0x0);
-
- R300_STATECHANGE(r300, vir[1]);
- reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0);
- e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
+ OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
+ OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+ ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
(R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
(R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
(R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
@@ -226,619 +323,460 @@ static void r300EmitClearState(GLcontext * ctx)
<< R300_SWIZZLE1_SHIFT)));
/* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
- R300_STATECHANGE(r300, vic);
- reg_start(R300_VAP_VTX_STATE_CNTL, 1);
- e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
- e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
+ OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2);
+ OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
+ OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
- R300_STATECHANGE(r300, vte);
/* comes from fglrx startup of clear */
- reg_start(R300_SE_VTE_CNTL, 1);
- e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
- R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
- R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
- R300_VPORT_Z_OFFSET_ENA);
- e32(0x8);
+ OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
+ OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
+ R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
+ R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
+ R300_VPORT_Z_OFFSET_ENA);
+ OUT_BATCH(0x8);
- reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0);
- e32(0xaaaaaaaa);
+ OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
- R300_STATECHANGE(r300, vof);
- reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
- e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
- e32(0x0); /* no textures */
+ OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
+ OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
+ OUT_BATCH(0); /* no textures */
- R300_STATECHANGE(r300, txe);
- reg_start(R300_TX_ENABLE, 0);
- e32(0x0);
+ OUT_BATCH_REGVAL(R300_TX_ENABLE, 0);
- R300_STATECHANGE(r300, vpt);
- reg_start(R300_SE_VPORT_XSCALE, 5);
- efloat(1.0);
- efloat(dPriv->x);
- efloat(1.0);
- efloat(dPriv->y);
- efloat(1.0);
- efloat(0.0);
+ OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6);
+ OUT_BATCH_FLOAT32(1.0);
+ OUT_BATCH_FLOAT32(dPriv->x);
+ OUT_BATCH_FLOAT32(1.0);
+ OUT_BATCH_FLOAT32(dPriv->y);
+ OUT_BATCH_FLOAT32(1.0);
+ OUT_BATCH_FLOAT32(0.0);
- R300_STATECHANGE(r300, at);
- reg_start(R300_FG_ALPHA_FUNC, 0);
- e32(0x0);
+ OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
+
+ OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
+ OUT_BATCH(0x0);
+ OUT_BATCH(0x0);
+ END_BATCH();
+ R300_STATECHANGE(r300, vir[0]);
+ R300_STATECHANGE(r300, fogs);
+ R300_STATECHANGE(r300, vir[1]);
+ R300_STATECHANGE(r300, vic);
+ R300_STATECHANGE(r300, vte);
+ R300_STATECHANGE(r300, vof);
+ R300_STATECHANGE(r300, txe);
+ R300_STATECHANGE(r300, vpt);
+ R300_STATECHANGE(r300, at);
R300_STATECHANGE(r300, bld);
- reg_start(R300_RB3D_CBLEND, 1);
- e32(0x0);
- e32(0x0);
+ R300_STATECHANGE(r300, ps);
if (has_tcl) {
- R300_STATECHANGE(r300, vap_clip_cntl);
- reg_start(R300_VAP_CLIP_CNTL, 0);
- e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
+ R300_STATECHANGE(r300, vap_clip_cntl);
+
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
+ END_BATCH();
}
- R300_STATECHANGE(r300, ps);
- reg_start(R300_GA_POINT_SIZE, 0);
- e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
- ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH_REGVAL(R300_GA_POINT_SIZE,
+ ((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
+ ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
+ END_BATCH();
if (!is_r500) {
R300_STATECHANGE(r300, ri);
- reg_start(R300_RS_IP_0, 7);
- for (i = 0; i < 8; ++i) {
- e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
- }
-
R300_STATECHANGE(r300, rc);
- /* The second constant is needed to get glxgears display anything .. */
- reg_start(R300_RS_COUNT, 1);
- e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
- e32(0x0);
-
R300_STATECHANGE(r300, rr);
- reg_start(R300_RS_INST_0, 0);
- e32(R300_RS_INST_COL_CN_WRITE);
+
+ BEGIN_BATCH(14);
+ OUT_BATCH_REGSEQ(R300_RS_IP_0, 8);
+ for (i = 0; i < 8; ++i)
+ OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
+
+ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
+ OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+ OUT_BATCH(0x0);
+
+ OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
+ END_BATCH();
} else {
R300_STATECHANGE(r300, ri);
- reg_start(R500_RS_IP_0, 7);
+ R300_STATECHANGE(r300, rc);
+ R300_STATECHANGE(r300, rr);
+
+ BEGIN_BATCH(14);
+ OUT_BATCH_REGSEQ(R500_RS_IP_0, 8);
for (i = 0; i < 8; ++i) {
- e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
- (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+ OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
}
- R300_STATECHANGE(r300, rc);
- /* The second constant is needed to get glxgears display anything .. */
- reg_start(R300_RS_COUNT, 1);
- e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
- e32(0x0);
-
- R300_STATECHANGE(r300, rr);
- reg_start(R500_RS_INST_0, 0);
- e32(R500_RS_INST_COL_CN_WRITE);
+ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
+ OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+ OUT_BATCH(0x0);
+ OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
+ END_BATCH();
}
if (!is_r500) {
R300_STATECHANGE(r300, fp);
- reg_start(R300_US_CONFIG, 2);
- e32(0x0);
- e32(0x0);
- e32(0x0);
- reg_start(R300_US_CODE_ADDR_0, 3);
- e32(0x0);
- e32(0x0);
- e32(0x0);
- e32(R300_RGBA_OUT);
-
R300_STATECHANGE(r300, fpi[0]);
R300_STATECHANGE(r300, fpi[1]);
R300_STATECHANGE(r300, fpi[2]);
R300_STATECHANGE(r300, fpi[3]);
- reg_start(R300_US_ALU_RGB_INST_0, 0);
- e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
-
- reg_start(R300_US_ALU_RGB_ADDR_0, 0);
- e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
-
- reg_start(R300_US_ALU_ALPHA_INST_0, 0);
- e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
-
- reg_start(R300_US_ALU_ALPHA_ADDR_0, 0);
- e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
+ BEGIN_BATCH(17);
+ OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
+ OUT_BATCH(0x0);
+ OUT_BATCH(0x0);
+ OUT_BATCH(0x0);
+ OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
+ OUT_BATCH(0x0);
+ OUT_BATCH(0x0);
+ OUT_BATCH(0x0);
+ OUT_BATCH(R300_RGBA_OUT);
+
+ OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0,
+ FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
+ OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0,
+ FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
+ OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0,
+ FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
+ OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0,
+ FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
+ END_BATCH();
} else {
- R300_STATECHANGE(r300, fp);
- reg_start(R500_US_CONFIG, 1);
- e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
- e32(0x0);
- reg_start(R500_US_CODE_ADDR, 2);
- e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
- e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
- e32(R500_US_CODE_OFFSET_ADDR(0));
+ struct radeon_state_atom r500fp;
+ uint32_t _cmd[10];
+ R300_STATECHANGE(r300, fp);
R300_STATECHANGE(r300, r500fp);
- r500fp_start_fragment(0, 6);
-
- e32(R500_INST_TYPE_OUT |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_LAST |
- R500_INST_RGB_OMASK_R |
- R500_INST_RGB_OMASK_G |
- R500_INST_RGB_OMASK_B |
- R500_INST_ALPHA_OMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP);
-
- e32(R500_RGB_ADDR0(0) |
- R500_RGB_ADDR1(0) |
- R500_RGB_ADDR1_CONST |
- R500_RGB_ADDR2(0) |
- R500_RGB_ADDR2_CONST);
-
- e32(R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR1(0) |
- R500_ALPHA_ADDR1_CONST |
- R500_ALPHA_ADDR2(0) |
- R500_ALPHA_ADDR2_CONST);
-
- e32(R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC0 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_G |
- R500_ALU_RGB_G_SWIZ_B_B);
-
- e32(R500_ALPHA_OP_CMP |
- R500_ALPHA_SWIZ_A_A |
- R500_ALPHA_SWIZ_B_A);
-
- e32(R500_ALU_RGBA_OP_CMP |
- R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 |
- R500_ALU_RGBA_B_SWIZ_0 |
- R500_ALU_RGBA_A_SWIZ_0);
+
+ BEGIN_BATCH(7);
+ OUT_BATCH_REGSEQ(R500_US_CONFIG, 2);
+ OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+ OUT_BATCH(0x0);
+ OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
+ OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
+ OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
+ OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0));
+ END_BATCH();
+
+ r500fp.check = check_r500fp;
+ r500fp.cmd = _cmd;
+ r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0);
+ r500fp.cmd[1] = R500_INST_TYPE_OUT |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_LAST |
+ R500_INST_RGB_OMASK_R |
+ R500_INST_RGB_OMASK_G |
+ R500_INST_RGB_OMASK_B |
+ R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP;
+ r500fp.cmd[2] = R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR1_CONST |
+ R500_RGB_ADDR2(0) |
+ R500_RGB_ADDR2_CONST;
+ r500fp.cmd[3] = R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR1_CONST |
+ R500_ALPHA_ADDR2(0) |
+ R500_ALPHA_ADDR2_CONST;
+ r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC0 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_B;
+ r500fp.cmd[5] = R500_ALPHA_OP_CMP |
+ R500_ALPHA_SWIZ_A_A |
+ R500_ALPHA_SWIZ_B_A;
+ r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP |
+ R500_ALU_RGBA_R_SWIZ_0 |
+ R500_ALU_RGBA_G_SWIZ_0 |
+ R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_A_SWIZ_0;
+
+ r500fp.cmd[7] = 0;
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ emit_r500fp(ctx, &r500fp);
+ } else {
+ int dwords = r500fp.check(ctx,&r500fp);
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(r500fp.cmd, dwords);
+ END_BATCH();
+ }
+
}
- reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0);
- e32(0x00000000);
+ BEGIN_BATCH(2);
+ OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+ END_BATCH();
+
if (has_tcl) {
- vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
(12 << R300_VF_MAX_VTX_NUM_SHIFT));
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
- vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
- } else
- vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
+ } else {
+ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
(5 << R300_VF_MAX_VTX_NUM_SHIFT));
+ }
if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
- vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+ vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
(r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
(r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
- vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+ vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
(r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
- vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+ vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
(r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
- vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+ vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
else
- vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
+ vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
- R300_STATECHANGE(rmesa, vap_cntl);
- reg_start(R300_VAP_CNTL, 0);
- e32(vap_cntl);
+ R300_STATECHANGE(r300, vap_cntl);
+
+ BEGIN_BATCH(2);
+ OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl);
+ END_BATCH();
if (has_tcl) {
+ struct radeon_state_atom vpu;
+ uint32_t _cmd[10];
R300_STATECHANGE(r300, pvs);
- reg_start(R300_VAP_PVS_CODE_CNTL_0, 2);
-
- e32((0 << R300_PVS_FIRST_INST_SHIFT) |
- (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (1 << R300_PVS_LAST_INST_SHIFT));
- e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
- (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
- e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
-
+ R300_STATECHANGE(r300, vap_flush);
R300_STATECHANGE(r300, vpi);
- vsf_start_fragment(0x0, 8);
- e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
- e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
- e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
- e32(0x0);
+ BEGIN_BATCH(4);
+ OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
+ OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
+ (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (1 << R300_PVS_LAST_INST_SHIFT));
+ OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
+ (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
+ OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+ END_BATCH();
+
+ vpu.check = check_vpu;
+ vpu.cmd = _cmd;
+ vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2);
+
+ vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE,
+ 0, 0xf, PVS_DST_REG_OUT);
+ vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
+ PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
+ PVS_SRC_REG_INPUT, NEGATE_NONE);
+ vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0,
+ PVS_SRC_SELECT_FORCE_0,
+ PVS_SRC_SELECT_FORCE_0,
+ PVS_SRC_SELECT_FORCE_0,
+ PVS_SRC_REG_INPUT, NEGATE_NONE);
+ vpu.cmd[4] = 0x0;
+
+ vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf,
+ PVS_DST_REG_OUT);
+ vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X,
+ PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
+ PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT,
+ NEGATE_NONE);
+ vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0,
+ PVS_SRC_SELECT_FORCE_0,
+ PVS_SRC_SELECT_FORCE_0,
+ PVS_SRC_SELECT_FORCE_0,
+ PVS_SRC_REG_INPUT, NEGATE_NONE);
+ vpu.cmd[8] = 0x0;
+
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ int dwords = r300->hw.vap_flush.check(ctx,&r300->hw.vap_flush);
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(r300->hw.vap_flush.cmd, dwords);
+ END_BATCH();
+ emit_vpu(ctx, &vpu);
+ } else {
+ int dwords = vpu.check(ctx,&vpu);
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(vpu.cmd, dwords);
+ END_BATCH();
+ }
- e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
- e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
- e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
- e32(0x0);
}
}
-/**
- * Buffer clear
- */
-static void r300Clear(GLcontext * ctx, GLbitfield mask)
+static int r300KernelClear(GLcontext *ctx, GLuint flags)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
- int flags = 0;
- int bits = 0;
- int swapped;
-
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "r300Clear\n");
-
- {
- LOCK_HARDWARE(&r300->radeon);
- UNLOCK_HARDWARE(&r300->radeon);
- if (dPriv->numClipRects == 0)
- return;
- }
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon);
+ struct radeon_framebuffer *rfb = dPriv->driverPrivate;
+ struct radeon_renderbuffer *rrb;
+ struct radeon_renderbuffer *rrbd;
+ int bits = 0, ret;
- if (mask & BUFFER_BIT_FRONT_LEFT) {
- flags |= BUFFER_BIT_FRONT_LEFT;
- mask &= ~BUFFER_BIT_FRONT_LEFT;
- }
+ /* Make sure it fits there. */
+ radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs);
- if (mask & BUFFER_BIT_BACK_LEFT) {
- flags |= BUFFER_BIT_BACK_LEFT;
- mask &= ~BUFFER_BIT_BACK_LEFT;
+ if (flags & BUFFER_BIT_COLOR0) {
+ rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0);
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
+ rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM);
}
- if (mask & BUFFER_BIT_DEPTH) {
- bits |= CLEARBUFFER_DEPTH;
- mask &= ~BUFFER_BIT_DEPTH;
+ if (flags & BUFFER_BIT_FRONT_LEFT) {
+ rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT);
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
+ rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM);
}
- if ((mask & BUFFER_BIT_STENCIL) && r300->state.stencil.hw_stencil) {
- bits |= CLEARBUFFER_STENCIL;
- mask &= ~BUFFER_BIT_STENCIL;
+ if (flags & BUFFER_BIT_BACK_LEFT) {
+ rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_BACK_LEFT);
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
+ rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM);
}
- if (mask) {
- if (RADEON_DEBUG & DEBUG_FALLBACKS)
- fprintf(stderr, "%s: swrast clear, mask: %x\n",
- __FUNCTION__, mask);
- _swrast_Clear(ctx, mask);
+ rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH);
+ if (rrbd) {
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
+ rrbd->bo, 0, RADEON_GEM_DOMAIN_VRAM);
}
- swapped = r300->radeon.sarea->pfCurrentPage == 1;
+ ret = radeon_cs_space_check(r300->radeon.cmdbuf.cs);
+ if (ret)
+ return -1;
- /* Make sure it fits there. */
- r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__);
+ rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__);
if (flags || bits)
r300EmitClearState(ctx);
+ rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH);
+ if (rrbd && (flags & BUFFER_BIT_DEPTH))
+ bits |= CLEARBUFFER_DEPTH;
+
+ if (rrbd && (flags & BUFFER_BIT_STENCIL))
+ bits |= CLEARBUFFER_STENCIL;
+
+ if (flags & BUFFER_BIT_COLOR0) {
+ rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0);
+ r300ClearBuffer(r300, CLEARBUFFER_COLOR, rrb, NULL);
+ bits = 0;
+ }
+
if (flags & BUFFER_BIT_FRONT_LEFT) {
- r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped);
+ rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT);
+ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
bits = 0;
}
if (flags & BUFFER_BIT_BACK_LEFT) {
- r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1);
+ rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_BACK_LEFT);
+ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
bits = 0;
}
if (bits)
- r300ClearBuffer(r300, bits, 0);
-
-}
-
-void r300Flush(GLcontext * ctx)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ r300ClearBuffer(r300, bits, NULL, rrbd);
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- if (rmesa->dma.flush)
- rmesa->dma.flush( rmesa );
-
- if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit)
- r300FlushCmdBuf(rmesa, __FUNCTION__);
+ COMMIT_BATCH();
+ return 0;
}
-#ifdef USER_BUFFERS
-#include "r300_mem.h"
-
-void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
+/**
+ * Buffer clear
+ */
+static void r300Clear(GLcontext * ctx, GLbitfield mask)
{
- struct r300_dma_buffer *dmabuf;
- size = MAX2(size, RADEON_BUFFER_SIZE * 16);
-
- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
- fprintf(stderr, "%s\n", __FUNCTION__);
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon);
+ const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask);
+ GLbitfield swrast_mask = 0, tri_mask = 0;
+ int i, ret;
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
- if (rmesa->dma.flush) {
- rmesa->dma.flush(rmesa);
- }
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "r300Clear\n");
- if (rmesa->dma.current.buf) {
-#ifdef USER_BUFFERS
- r300_mem_use(rmesa, rmesa->dma.current.buf->id);
-#endif
- r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ LOCK_HARDWARE(&r300->radeon);
+ UNLOCK_HARDWARE(&r300->radeon);
+ if (dPriv->numClipRects == 0)
+ return;
}
- if (rmesa->dma.nr_released_bufs > 4)
- r300FlushCmdBuf(rmesa, __FUNCTION__);
- dmabuf = CALLOC_STRUCT(r300_dma_buffer);
- dmabuf->buf = (void *)1; /* hack */
- dmabuf->refcount = 1;
-
- dmabuf->id = r300_mem_alloc(rmesa, 4, size);
- if (dmabuf->id == 0) {
- LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */
-
- r300FlushCmdBufLocked(rmesa, __FUNCTION__);
- radeonWaitForIdleLocked(&rmesa->radeon);
+ /* Flush swtcl vertices if necessary, because we will change hardware
+ * state during clear. See also the state-related comment in
+ * r300EmitClearState.
+ */
+ R300_NEWPRIM(r300);
- dmabuf->id = r300_mem_alloc(rmesa, 4, size);
+ if (colorMask == ~0)
+ tri_mask |= (mask & BUFFER_BITS_COLOR);
+ else
+ tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT));
- UNLOCK_HARDWARE(&rmesa->radeon);
- if (dmabuf->id == 0) {
- fprintf(stderr,
- "Error: Could not get dma buffer... exiting\n");
- _mesa_exit(-1);
- }
+ /* HW stencil */
+ if (mask & BUFFER_BIT_STENCIL) {
+ tri_mask |= BUFFER_BIT_STENCIL;
}
- rmesa->dma.current.buf = dmabuf;
- rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id);
- rmesa->dma.current.end = size;
- rmesa->dma.current.start = 0;
- rmesa->dma.current.ptr = 0;
-}
-
-void r300ReleaseDmaRegion(r300ContextPtr rmesa,
- struct r300_dma_region *region, const char *caller)
-{
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
-
- if (!region->buf)
- return;
-
- if (rmesa->dma.flush)
- rmesa->dma.flush(rmesa);
-
- if (--region->buf->refcount == 0) {
- r300_mem_free(rmesa, region->buf->id);
- FREE(region->buf);
- rmesa->dma.nr_released_bufs++;
+ /* HW depth */
+ if (mask & BUFFER_BIT_DEPTH) {
+ tri_mask |= BUFFER_BIT_DEPTH;
}
- region->buf = 0;
- region->start = 0;
-}
-
-/* Allocates a region from rmesa->dma.current. If there isn't enough
- * space in current, grab a new buffer (and discard what was left of current)
- */
-void r300AllocDmaRegion(r300ContextPtr rmesa,
- struct r300_dma_region *region,
- int bytes, int alignment)
-{
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
-
- if (rmesa->dma.flush)
- rmesa->dma.flush(rmesa);
-
- if (region->buf)
- r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
-
- alignment--;
- rmesa->dma.current.start = rmesa->dma.current.ptr =
- (rmesa->dma.current.ptr + alignment) & ~alignment;
-
- if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
- r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7);
-
- region->start = rmesa->dma.current.start;
- region->ptr = rmesa->dma.current.start;
- region->end = rmesa->dma.current.start + bytes;
- region->address = rmesa->dma.current.address;
- region->buf = rmesa->dma.current.buf;
- region->buf->refcount++;
-
- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
- rmesa->dma.current.start =
- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
-
- assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
-}
+ /* If we're doing a tri pass for depth/stencil, include a likely color
+ * buffer with it.
+ */
-#else
-static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
-{
- struct r300_dma_buffer *dmabuf;
- int fd = rmesa->radeon.dri.fd;
- int index = 0;
- int size = 0;
- drmDMAReq dma;
- int ret;
-
- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- if (rmesa->dma.flush) {
- rmesa->dma.flush(rmesa);
+ for (i = 0; i < BUFFER_COUNT; i++) {
+ GLuint bufBit = 1 << i;
+ if ((tri_mask) & bufBit) {
+ if (!fb->Attachment[i].Renderbuffer->ClassID) {
+ tri_mask &= ~bufBit;
+ swrast_mask |= bufBit;
+ }
+ }
}
- if (rmesa->dma.current.buf)
- r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
-
- if (rmesa->dma.nr_released_bufs > 4)
- r300FlushCmdBuf(rmesa, __FUNCTION__);
-
- dma.context = rmesa->radeon.dri.hwContext;
- dma.send_count = 0;
- dma.send_list = NULL;
- dma.send_sizes = NULL;
- dma.flags = 0;
- dma.request_count = 1;
- dma.request_size = RADEON_BUFFER_SIZE;
- dma.request_list = &index;
- dma.request_sizes = &size;
- dma.granted_count = 0;
-
- LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */
-
- ret = drmDMA(fd, &dma);
-
- if (ret != 0) {
- /* Try to release some buffers and wait until we can't get any more */
- if (rmesa->dma.nr_released_bufs) {
- r300FlushCmdBufLocked(rmesa, __FUNCTION__);
- }
-
- if (RADEON_DEBUG & DEBUG_DMA)
- fprintf(stderr, "Waiting for buffers\n");
-
- radeonWaitForIdleLocked(&rmesa->radeon);
- ret = drmDMA(fd, &dma);
-
- if (ret != 0) {
- UNLOCK_HARDWARE(&rmesa->radeon);
- fprintf(stderr,
- "Error: Could not get dma buffer... exiting\n");
- _mesa_exit(-1);
+ /* SW fallback clearing */
+ swrast_mask = mask & ~tri_mask;
+
+ ret = 0;
+ if (tri_mask) {
+ if (r300->radeon.radeonScreen->kernel_mm)
+ radeonUserClear(ctx, tri_mask);
+ else {
+ /* if kernel clear fails due to size restraints fallback */
+ ret = r300KernelClear(ctx, tri_mask);
+ if (ret < 0)
+ swrast_mask |= tri_mask;
}
}
- UNLOCK_HARDWARE(&rmesa->radeon);
-
- if (RADEON_DEBUG & DEBUG_DMA)
- fprintf(stderr, "Allocated buffer %d\n", index);
-
- dmabuf = CALLOC_STRUCT(r300_dma_buffer);
- dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index];
- dmabuf->refcount = 1;
-
- rmesa->dma.current.buf = dmabuf;
- rmesa->dma.current.address = dmabuf->buf->address;
- rmesa->dma.current.end = dmabuf->buf->total;
- rmesa->dma.current.start = 0;
- rmesa->dma.current.ptr = 0;
-}
-
-void r300ReleaseDmaRegion(r300ContextPtr rmesa,
- struct r300_dma_region *region, const char *caller)
-{
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
-
- if (!region->buf)
- return;
-
- if (rmesa->dma.flush)
- rmesa->dma.flush(rmesa);
-
- if (--region->buf->refcount == 0) {
- drm_radeon_cmd_header_t *cmd;
-
- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
- fprintf(stderr, "%s -- DISCARD BUF %d\n",
- __FUNCTION__, region->buf->buf->idx);
- cmd =
- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
- sizeof
- (*cmd) / 4,
- __FUNCTION__);
- cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
- cmd->dma.buf_idx = region->buf->buf->idx;
-
- FREE(region->buf);
- rmesa->dma.nr_released_bufs++;
+ if (swrast_mask) {
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
+ fprintf(stderr, "%s: swrast clear, mask: %x\n",
+ __FUNCTION__, swrast_mask);
+ _swrast_Clear(ctx, swrast_mask);
}
-
- region->buf = 0;
- region->start = 0;
-}
-
-/* Allocates a region from rmesa->dma.current. If there isn't enough
- * space in current, grab a new buffer (and discard what was left of current)
- */
-void r300AllocDmaRegion(r300ContextPtr rmesa,
- struct r300_dma_region *region,
- int bytes, int alignment)
-{
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
-
- if (rmesa->dma.flush)
- rmesa->dma.flush(rmesa);
-
- if (region->buf)
- r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
-
- alignment--;
- rmesa->dma.current.start = rmesa->dma.current.ptr =
- (rmesa->dma.current.ptr + alignment) & ~alignment;
-
- if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
- r300RefillCurrentDmaRegion(rmesa);
-
- region->start = rmesa->dma.current.start;
- region->ptr = rmesa->dma.current.start;
- region->end = rmesa->dma.current.start + bytes;
- region->address = rmesa->dma.current.address;
- region->buf = rmesa->dma.current.buf;
- region->buf->refcount++;
-
- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
- rmesa->dma.current.start =
- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
-
- assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
-}
-
-#endif
-
-GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer,
- GLint size)
-{
- int offset =
- (char *)pointer -
- (char *)rmesa->radeon.radeonScreen->gartTextures.map;
- int valid = (size >= 0 && offset >= 0
- && offset + size <
- rmesa->radeon.radeonScreen->gartTextures.size);
-
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer,
- valid);
-
- return valid;
-}
-
-GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer)
-{
- int offset =
- (char *)pointer -
- (char *)rmesa->radeon.radeonScreen->gartTextures.map;
-
- //fprintf(stderr, "offset=%08x\n", offset);
-
- if (offset < 0
- || offset > rmesa->radeon.radeonScreen->gartTextures.size)
- return ~0;
- else
- return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
}
void r300InitIoctlFuncs(struct dd_function_table *functions)
{
functions->Clear = r300Clear;
functions->Finish = radeonFinish;
- functions->Flush = r300Flush;
+ functions->Flush = radeonFlush;
}
diff --git a/r300/r300_ioctl.h b/r300/r300_ioctl.h
index e1143fb..3abfa71 100644
--- a/r300/r300_ioctl.h
+++ b/r300/r300_ioctl.h
@@ -39,22 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_context.h"
#include "radeon_drm.h"
-extern GLboolean r300IsGartMemory(r300ContextPtr rmesa,
- const GLvoid * pointer, GLint size);
-
-extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa,
- const GLvoid * pointer);
-
-extern void r300Flush(GLcontext * ctx);
-
-extern void r300ReleaseDmaRegion(r300ContextPtr rmesa,
- struct r300_dma_region *region,
- const char *caller);
-extern void r300AllocDmaRegion(r300ContextPtr rmesa,
- struct r300_dma_region *region, int bytes,
- int alignment);
-
extern void r300InitIoctlFuncs(struct dd_function_table *functions);
-extern void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size);
#endif /* __R300_IOCTL_H__ */
diff --git a/r300/r300_mem.c b/r300/r300_mem.c
deleted file mode 100644
index f8f9d4f..0000000
--- a/r300/r300_mem.c
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Copyright (C) 2005 Aapo Tahkola.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * \file
- *
- * \author Aapo Tahkola <aet@rasterburn.org>
- */
-
-#include <unistd.h>
-
-#include "r300_context.h"
-#include "r300_cmdbuf.h"
-#include "r300_ioctl.h"
-#include "r300_mem.h"
-#include "radeon_ioctl.h"
-
-#ifdef USER_BUFFERS
-
-static void resize_u_list(r300ContextPtr rmesa)
-{
- void *temp;
- int nsize;
-
- temp = rmesa->rmm->u_list;
- nsize = rmesa->rmm->u_size * 2;
-
- rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list));
- _mesa_memset(rmesa->rmm->u_list, 0,
- nsize * sizeof(*rmesa->rmm->u_list));
-
- if (temp) {
- r300FlushCmdBuf(rmesa, __FUNCTION__);
-
- _mesa_memcpy(rmesa->rmm->u_list, temp,
- rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list));
- _mesa_free(temp);
- }
-
- rmesa->rmm->u_size = nsize;
-}
-
-void r300_mem_init(r300ContextPtr rmesa)
-{
- rmesa->rmm = malloc(sizeof(struct r300_memory_manager));
- memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager));
-
- rmesa->rmm->u_size = 128;
- resize_u_list(rmesa);
-}
-
-void r300_mem_destroy(r300ContextPtr rmesa)
-{
- _mesa_free(rmesa->rmm->u_list);
- rmesa->rmm->u_list = NULL;
-
- _mesa_free(rmesa->rmm);
- rmesa->rmm = NULL;
-}
-
-void *r300_mem_ptr(r300ContextPtr rmesa, int id)
-{
- assert(id <= rmesa->rmm->u_last);
- return rmesa->rmm->u_list[id].ptr;
-}
-
-int r300_mem_find(r300ContextPtr rmesa, void *ptr)
-{
- int i;
-
- for (i = 1; i < rmesa->rmm->u_size + 1; i++)
- if (rmesa->rmm->u_list[i].ptr &&
- ptr >= rmesa->rmm->u_list[i].ptr &&
- ptr <
- rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size)
- break;
-
- if (i < rmesa->rmm->u_size + 1)
- return i;
-
- fprintf(stderr, "%p failed\n", ptr);
- return 0;
-}
-
-//#define MM_DEBUG
-int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size)
-{
- drm_radeon_mem_alloc_t alloc;
- int offset = 0, ret;
- int i, free = -1;
- int done_age;
- drm_radeon_mem_free_t memfree;
- int tries = 0;
- static int bytes_wasted = 0, allocated = 0;
-
- if (size < 4096)
- bytes_wasted += 4096 - size;
-
- allocated += size;
-
-#if 0
- static int t = 0;
- if (t != time(NULL)) {
- t = time(NULL);
- fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n",
- rmesa->rmm->u_last, bytes_wasted / 1024,
- allocated / 1024);
- }
-#endif
-
- memfree.region = RADEON_MEM_REGION_GART;
-
- again:
-
- done_age = radeonGetAge((radeonContextPtr) rmesa);
-
- if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size)
- resize_u_list(rmesa);
-
- for (i = rmesa->rmm->u_last + 1; i > 0; i--) {
- if (rmesa->rmm->u_list[i].ptr == NULL) {
- free = i;
- continue;
- }
-
- if (rmesa->rmm->u_list[i].h_pending == 0 &&
- rmesa->rmm->u_list[i].pending
- && rmesa->rmm->u_list[i].age <= done_age) {
- memfree.region_offset =
- (char *)rmesa->rmm->u_list[i].ptr -
- (char *)rmesa->radeon.radeonScreen->gartTextures.
- map;
-
- ret =
- drmCommandWrite(rmesa->radeon.radeonScreen->
- driScreen->fd, DRM_RADEON_FREE,
- &memfree, sizeof(memfree));
-
- if (ret) {
- fprintf(stderr, "Failed to free at %p\n",
- rmesa->rmm->u_list[i].ptr);
- fprintf(stderr, "ret = %s\n", strerror(-ret));
- exit(1);
- } else {
-#ifdef MM_DEBUG
- fprintf(stderr, "really freed %d at age %x\n",
- i,
- radeonGetAge((radeonContextPtr) rmesa));
-#endif
- if (i == rmesa->rmm->u_last)
- rmesa->rmm->u_last--;
-
- if (rmesa->rmm->u_list[i].size < 4096)
- bytes_wasted -=
- 4096 - rmesa->rmm->u_list[i].size;
-
- allocated -= rmesa->rmm->u_list[i].size;
- rmesa->rmm->u_list[i].pending = 0;
- rmesa->rmm->u_list[i].ptr = NULL;
- free = i;
- }
- }
- }
- rmesa->rmm->u_head = i;
-
- if (free == -1) {
- WARN_ONCE("Ran out of slots!\n");
- //usleep(100);
- r300FlushCmdBuf(rmesa, __FUNCTION__);
- tries++;
- if (tries > 100) {
- WARN_ONCE("Ran out of slots!\n");
- exit(1);
- }
- goto again;
- }
-
- alloc.region = RADEON_MEM_REGION_GART;
- alloc.alignment = alignment;
- alloc.size = size;
- alloc.region_offset = &offset;
-
- ret =
- drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc,
- sizeof(alloc));
- if (ret) {
-#if 0
- WARN_ONCE("Ran out of mem!\n");
- r300FlushCmdBuf(rmesa, __FUNCTION__);
- //usleep(100);
- tries2++;
- tries = 0;
- if (tries2 > 100) {
- WARN_ONCE("Ran out of GART memory!\n");
- exit(1);
- }
- goto again;
-#else
- WARN_ONCE
- ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n",
- size);
- return 0;
-#endif
- }
-
- i = free;
-
- if (i > rmesa->rmm->u_last)
- rmesa->rmm->u_last = i;
-
- rmesa->rmm->u_list[i].ptr =
- ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset;
- rmesa->rmm->u_list[i].size = size;
- rmesa->rmm->u_list[i].age = 0;
- //fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i);
-
-#ifdef MM_DEBUG
- fprintf(stderr, "allocated %d at age %x\n", i,
- radeonGetAge((radeonContextPtr) rmesa));
-#endif
-
- return i;
-}
-
-void r300_mem_use(r300ContextPtr rmesa, int id)
-{
- uint64_t ull;
-#ifdef MM_DEBUG
- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
- radeonGetAge((radeonContextPtr) rmesa));
-#endif
- drm_r300_cmd_header_t *cmd;
-
- assert(id <= rmesa->rmm->u_last);
-
- if (id == 0)
- return;
-
- cmd =
- (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa,
- 2 + sizeof(ull) / 4,
- __FUNCTION__);
- cmd[0].scratch.cmd_type = R300_CMD_SCRATCH;
- cmd[0].scratch.reg = R300_MEM_SCRATCH;
- cmd[0].scratch.n_bufs = 1;
- cmd[0].scratch.flags = 0;
- cmd++;
-
- ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age;
- _mesa_memcpy(cmd, &ull, sizeof(ull));
- cmd += sizeof(ull) / 4;
-
- cmd[0].u = /*id */ 0;
-
- LOCK_HARDWARE(&rmesa->radeon); /* Protect from DRM. */
- rmesa->rmm->u_list[id].h_pending++;
- UNLOCK_HARDWARE(&rmesa->radeon);
-}
-
-unsigned long r300_mem_offset(r300ContextPtr rmesa, int id)
-{
- unsigned long offset;
-
- assert(id <= rmesa->rmm->u_last);
-
- offset = (char *)rmesa->rmm->u_list[id].ptr -
- (char *)rmesa->radeon.radeonScreen->gartTextures.map;
- offset += rmesa->radeon.radeonScreen->gart_texture_offset;
-
- return offset;
-}
-
-void *r300_mem_map(r300ContextPtr rmesa, int id, int access)
-{
-#ifdef MM_DEBUG
- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
- radeonGetAge((radeonContextPtr) rmesa));
-#endif
- void *ptr;
- int tries = 0;
-
- assert(id <= rmesa->rmm->u_last);
-
- if (access == R300_MEM_R) {
-
- if (rmesa->rmm->u_list[id].mapped == 1)
- WARN_ONCE("buffer %d already mapped\n", id);
-
- rmesa->rmm->u_list[id].mapped = 1;
- ptr = r300_mem_ptr(rmesa, id);
-
- return ptr;
- }
-
- if (rmesa->rmm->u_list[id].h_pending)
- r300FlushCmdBuf(rmesa, __FUNCTION__);
-
- if (rmesa->rmm->u_list[id].h_pending) {
- return NULL;
- }
-
- while (rmesa->rmm->u_list[id].age >
- radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000)
- usleep(10);
-
- if (tries >= 1000) {
- fprintf(stderr, "Idling failed (%x vs %x)\n",
- rmesa->rmm->u_list[id].age,
- radeonGetAge((radeonContextPtr) rmesa));
- return NULL;
- }
-
- if (rmesa->rmm->u_list[id].mapped == 1)
- WARN_ONCE("buffer %d already mapped\n", id);
-
- rmesa->rmm->u_list[id].mapped = 1;
- ptr = r300_mem_ptr(rmesa, id);
-
- return ptr;
-}
-
-void r300_mem_unmap(r300ContextPtr rmesa, int id)
-{
-#ifdef MM_DEBUG
- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
- radeonGetAge((radeonContextPtr) rmesa));
-#endif
-
- assert(id <= rmesa->rmm->u_last);
-
- if (rmesa->rmm->u_list[id].mapped == 0)
- WARN_ONCE("buffer %d not mapped\n", id);
-
- rmesa->rmm->u_list[id].mapped = 0;
-}
-
-void r300_mem_free(r300ContextPtr rmesa, int id)
-{
-#ifdef MM_DEBUG
- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
- radeonGetAge((radeonContextPtr) rmesa));
-#endif
-
- assert(id <= rmesa->rmm->u_last);
-
- if (id == 0)
- return;
-
- if (rmesa->rmm->u_list[id].ptr == NULL) {
- WARN_ONCE("Not allocated!\n");
- return;
- }
-
- if (rmesa->rmm->u_list[id].pending) {
- WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr);
- return;
- }
-
- rmesa->rmm->u_list[id].pending = 1;
-}
-#endif
diff --git a/r300/r300_mem.h b/r300/r300_mem.h
deleted file mode 100644
index 625a7f6..0000000
--- a/r300/r300_mem.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef __R300_MEM_H__
-#define __R300_MEM_H__
-
-//#define R300_MEM_PDL 0
-#define R300_MEM_UL 1
-
-#define R300_MEM_R 1
-#define R300_MEM_W 2
-#define R300_MEM_RW (R300_MEM_R | R300_MEM_W)
-
-#define R300_MEM_SCRATCH 2
-
-struct r300_memory_manager {
- struct {
- void *ptr;
- uint32_t size;
- uint32_t age;
- uint32_t h_pending;
- int pending;
- int mapped;
- } *u_list;
- int u_head, u_size, u_last;
-
-};
-
-extern void r300_mem_init(r300ContextPtr rmesa);
-extern void r300_mem_destroy(r300ContextPtr rmesa);
-extern void *r300_mem_ptr(r300ContextPtr rmesa, int id);
-extern int r300_mem_find(r300ContextPtr rmesa, void *ptr);
-extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size);
-extern void r300_mem_use(r300ContextPtr rmesa, int id);
-extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id);
-extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access);
-extern void r300_mem_unmap(r300ContextPtr rmesa, int id);
-extern void r300_mem_free(r300ContextPtr rmesa, int id);
-
-#endif
diff --git a/r300/r300_reg.h b/r300/r300_reg.h
index 8f1a663..39b4b61 100644
--- a/r300/r300_reg.h
+++ b/r300/r300_reg.h
@@ -1128,6 +1128,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* SU Depth Offset value */
#define R300_SU_DEPTH_OFFSET 0x42c4
+#define R300_SU_REG_DEST 0x42c8
+# define R300_RASTER_PIPE_SELECT_0 (1 << 0)
+# define R300_RASTER_PIPE_SELECT_1 (1 << 1)
+# define R300_RASTER_PIPE_SELECT_2 (1 << 2)
+# define R300_RASTER_PIPE_SELECT_3 (1 << 3)
+# define R300_RASTER_PIPE_SELECT_ALL 0xf
+
/* BEGIN: Rasterization / Interpolators - many guesses */
@@ -1467,6 +1474,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_FORMAT_3D (1 << 25)
# define R300_TX_FORMAT_CUBIC_MAP (2 << 25)
+# define R300_TX_FORMAT_GAMMA (1 << 21)
+
/* gap */
/* Floating point formats */
/* Note - hardware supports both 16 and 32 bit floating point */
@@ -1531,6 +1540,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R500_SEL_FILTER4_TC3 (3 << 18)
#define R300_TX_OFFSET_0 0x4540
+#define R300_TX_OFFSET_1 0x4544
+#define R300_TX_OFFSET_2 0x4548
+#define R300_TX_OFFSET_3 0x454C
+#define R300_TX_OFFSET_4 0x4550
+#define R300_TX_OFFSET_5 0x4554
+#define R300_TX_OFFSET_6 0x4558
+#define R300_TX_OFFSET_7 0x455C
/* BEGIN: Guess from R200 */
# define R300_TXO_ENDIAN_NO_SWAP (0 << 0)
# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0)
@@ -2005,6 +2021,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R500_FG_ALPHA_VALUE 0x4be0
# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff
+#define RV530_FG_ZBREG_DEST 0x4be8
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0)
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1)
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0)
+
/* gap */
/* Fragment program parameters in 7.16 floating point */
@@ -2425,6 +2446,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* Z Buffer Clear Value */
#define R300_ZB_DEPTHCLEARVALUE 0x4f28
+#define R300_ZB_ZMASK_OFFSET 0x4f30
+#define R300_ZB_ZMASK_PITCH 0x4f34
+#define R300_ZB_ZMASK_WRINDEX 0x4f38
+#define R300_ZB_ZMASK_DWORD 0x4f3c
+#define R300_ZB_ZMASK_RDINDEX 0x4f40
+
/* Hierarchical Z Memory Offset */
#define R300_ZB_HIZ_OFFSET 0x4f44
@@ -2652,6 +2679,24 @@ enum {
PVS_SRC_ADDR_MODE_1_SHIFT = 32,
};
+
+#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \
+ (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \
+ | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \
+ | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \
+ | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \
+ | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \
+ | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
+
+#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \
+ (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \
+ | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \
+ | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \
+ | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \
+ | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \
+ | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \
+ | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
+
/*\}*/
/* BEGIN: Packet 3 commands */
@@ -3165,6 +3210,9 @@ enum {
# define R300_W_SRC_RAS (1 << 2)
+/* Packet0 field ordering to write all values to the same reg */
+#define RADEON_ONE_REG_WR (1 << 15)
+
/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR.
* Two parameter dwords:
* 0. VAP_VTX_FMT: The first parameter is not written to hardware
diff --git a/r300/r300_render.c b/r300/r300_render.c
index 16ce4a1..3cd3875 100644
--- a/r300/r300_render.c
+++ b/r300/r300_render.c
@@ -50,6 +50,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* no bugs...
*/
+#include "r300_render.h"
+
#include "main/glheader.h"
#include "main/state.h"
#include "main/imports.h"
@@ -62,20 +64,19 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
#include "vbo/vbo.h"
+#include "vbo/vbo_split.h"
#include "tnl/tnl.h"
#include "tnl/t_vp_build.h"
#include "radeon_reg.h"
#include "radeon_macros.h"
-#include "radeon_ioctl.h"
-#include "radeon_state.h"
#include "r300_context.h"
#include "r300_ioctl.h"
#include "r300_state.h"
#include "r300_reg.h"
#include "r300_tex.h"
#include "r300_emit.h"
-#include "r300_fragprog.h"
-extern int future_hw_tcl_on;
+#include "r300_fragprog_common.h"
+#include "r300_swtcl.h"
/**
* \brief Convert a OpenGL primitive type into a R300 primitive type.
@@ -172,96 +173,167 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
return num_verts - verts_off;
}
-static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
+static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset)
{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct r300_dma_region *rvb = &rmesa->state.elt_dma;
- void *out;
-
- if (r300IsGartMemory(rmesa, elts, n_elts * 4)) {
- rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
- rvb->start = ((char *)elts) - rvb->address;
- rvb->aos_offset =
- rmesa->radeon.radeonScreen->gart_texture_offset +
- rvb->start;
- return;
- } else if (r300IsGartMemory(rmesa, elts, 1)) {
- WARN_ONCE("Pointer not within GART memory!\n");
- _mesa_exit(-1);
+ BATCH_LOCALS(&rmesa->radeon);
+ int size;
+
+ /* offset is in indices */
+ BEGIN_BATCH(10);
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
+ if (rmesa->ind_buf.is_32bit) {
+ /* convert to bytes */
+ offset *= 4;
+ size = vertex_count;
+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+ (vertex_count << 16) | type |
+ R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+ } else {
+ /* convert to bytes */
+ offset *= 2;
+ size = (vertex_count + 1) >> 1;
+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+ (vertex_count << 16) | type);
}
- r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4);
- rvb->aos_offset = GET_START(rvb);
-
- out = rvb->address + rvb->start;
- memcpy(out, elts, n_elts * 4);
-}
-
-static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
- int vertex_count, int type)
-{
- int cmd_reserved = 0;
- int cmd_written = 0;
- drm_radeon_cmd_header_t *cmd = NULL;
-
- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0);
- e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
-
- start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2);
- e32(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
- (R300_VAP_PORT_IDX0 >> 2));
- e32(addr);
- e32(vertex_count);
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
+ OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+ (R300_VAP_PORT_IDX0 >> 2));
+ OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ OUT_BATCH(size);
+ } else {
+ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
+ OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+ (R300_VAP_PORT_IDX0 >> 2));
+ OUT_BATCH(rmesa->ind_buf.bo_offset + offset);
+ OUT_BATCH(size);
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ }
+ END_BATCH();
}
static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
{
+ BATCH_LOCALS(&rmesa->radeon);
+ uint32_t voffset;
int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
int i;
- int cmd_reserved = 0;
- int cmd_written = 0;
- drm_radeon_cmd_header_t *cmd = NULL;
- if (RADEON_DEBUG & DEBUG_VERTS)
+ if (RADEON_DEBUG & RADEON_VERTS)
fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
offset);
- start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1);
- e32(nr);
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH(sz+2+(nr * 2));
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+ OUT_BATCH(nr);
+
+ for (i = 0; i + 1 < nr; i += 2) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+ (rmesa->radeon.tcl.aos[i].stride << 8) |
+ (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+ (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[i].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[i+1].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
- for (i = 0; i + 1 < nr; i += 2) {
- e32((rmesa->state.aos[i].aos_size << 0) |
- (rmesa->state.aos[i].aos_stride << 8) |
- (rmesa->state.aos[i + 1].aos_size << 16) |
- (rmesa->state.aos[i + 1].aos_stride << 24));
+ if (nr & 1) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+ (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[nr - 1].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
+ END_BATCH();
+ } else {
- e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride);
- e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride);
- }
+ BEGIN_BATCH(sz+2+(nr * 2));
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+ OUT_BATCH(nr);
+
+ for (i = 0; i + 1 < nr; i += 2) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+ (rmesa->radeon.tcl.aos[i].stride << 8) |
+ (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+ (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ OUT_BATCH(voffset);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ OUT_BATCH(voffset);
+ }
- if (nr & 1) {
- e32((rmesa->state.aos[nr - 1].aos_size << 0) |
- (rmesa->state.aos[nr - 1].aos_stride << 8));
- e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
+ if (nr & 1) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+ (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ OUT_BATCH(voffset);
+ }
+ for (i = 0; i + 1 < nr; i += 2) {
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[i+0].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[i+1].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
+ if (nr & 1) {
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[nr-1].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
+ END_BATCH();
}
+
}
static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
{
- int cmd_reserved = 0;
- int cmd_written = 0;
- drm_radeon_cmd_header_t *cmd = NULL;
+ BATCH_LOCALS(&rmesa->radeon);
- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
- e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
+ r300_emit_scissor(rmesa->radeon.glCtx);
+ BEGIN_BATCH(3);
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
+ END_BATCH();
}
-static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
- int start, int end, int prim)
+void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&rmesa->radeon);
int type, num_verts;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
type = r300PrimitiveType(rmesa, prim);
num_verts = r300NumVerts(rmesa, end - start, prim);
@@ -269,194 +341,161 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
if (type < 0 || num_verts <= 0)
return;
- if (vb->Elts) {
- if (num_verts > 65535) {
- /* not implemented yet */
- WARN_ONCE("Too many elts\n");
+ if (rmesa->ind_buf.bo) {
+ GLuint first, incr, offset = 0;
+
+ if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
+ num_verts > 65500) {
+ WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
return;
}
- /* Note: The following is incorrect, but it's the best I can do
- * without a major refactoring of how DMA memory is handled.
- * The problem: Ensuring that both vertex arrays *and* index
- * arrays are at the right position, and then ensuring that
- * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
- * at once.
- *
- * So why is the following incorrect? Well, it seems like
- * allocating the index array might actually evict the vertex
- * arrays. *sigh*
- */
- r300EmitElts(ctx, vb->Elts, num_verts);
- r300EmitAOS(rmesa, rmesa->state.aos_count, start);
- r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type);
- } else {
- r300EmitAOS(rmesa, rmesa->state.aos_count, start);
- r300FireAOS(rmesa, num_verts, type);
- }
-}
-
-static GLboolean r300RunRender(GLcontext * ctx,
- struct tnl_pipeline_stage *stage)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- int i;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
-
-
- if (RADEON_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- r300UpdateShaders(rmesa);
- if (r300EmitArrays(ctx))
- return GL_TRUE;
- r300UpdateShaderStates(rmesa);
- r300EmitCacheFlush(rmesa);
- r300EmitState(rmesa);
-
- for (i = 0; i < vb->PrimitiveCount; i++) {
- GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
- GLuint start = vb->Primitive[i].start;
- GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
- r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
- }
-
- r300EmitCacheFlush(rmesa);
+ r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0);
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1);
+ OUT_BATCH(rmesa->radeon.tcl.aos[0].count);
+ END_BATCH();
+ }
-#ifdef USER_BUFFERS
- r300UseArrays(ctx);
-#endif
+ r300_emit_scissor(rmesa->radeon.glCtx);
+ while (num_verts > 0) {
+ int nr;
+ int align;
+
+ nr = MIN2(num_verts, 65535);
+ nr -= (nr - first) % incr;
+
+ /* get alignment for IB correct */
+ if (nr != num_verts) {
+ do {
+ align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2);
+ if (align % 4)
+ nr -= incr;
+ } while(align % 4);
+ if (nr <= 0) {
+ WARN_ONCE("did the impossible happen? we never aligned nr to dword\n");
+ return;
+ }
+
+ }
+ r300FireEB(rmesa, nr, type, offset);
- r300ReleaseArrays(ctx);
+ num_verts -= nr;
+ offset += nr;
+ }
- return GL_FALSE;
-}
+ } else {
+ GLuint first, incr, offset = 0;
-#define FALLBACK_IF(expr) \
- do { \
- if (expr) { \
- if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
- WARN_ONCE("Software fallback:%s\n", \
- #expr); \
- return R300_FALLBACK_RAST; \
- } \
- } while(0)
-
-static int r300Fallback(GLcontext * ctx)
-{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
- const unsigned back = ctx->Stencil._BackFace;
-
- /* Do we need to use new-style shaders?
- * Also is there a better way to do this? */
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- struct r500_fragment_program *fp = (struct r500_fragment_program *)
- (char *)ctx->FragmentProgram._Current;
- if (fp) {
- if (!fp->translated) {
- r500TranslateFragmentShader(r300, fp);
- FALLBACK_IF(!fp->translated);
- }
+ if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
+ num_verts > 65535) {
+ WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
+ return;
}
- } else {
- struct r300_fragment_program *fp = (struct r300_fragment_program *)
- (char *)ctx->FragmentProgram._Current;
- if (fp) {
- if (!fp->translated) {
- r300TranslateFragmentShader(r300, fp);
- FALLBACK_IF(!fp->translated);
- }
+ r300_emit_scissor(rmesa->radeon.glCtx);
+ while (num_verts > 0) {
+ int nr;
+ nr = MIN2(num_verts, 65535);
+ nr -= (nr - first) % incr;
+ r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start + offset);
+ r300FireAOS(rmesa, nr, type);
+ num_verts -= nr;
+ offset += nr;
}
}
-
- FALLBACK_IF(ctx->RenderMode != GL_RENDER);
-
- /* If GL_EXT_stencil_two_side is disabled, this fallback check can
- * be removed.
- */
- FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
- || ctx->Stencil.ValueMask[0] !=
- ctx->Stencil.ValueMask[back]
- || ctx->Stencil.WriteMask[0] !=
- ctx->Stencil.WriteMask[back]);
-
- if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
- FALLBACK_IF(ctx->Point.PointSprite);
-
- if (!r300->disable_lowimpact_fallback) {
- FALLBACK_IF(ctx->Polygon.StippleFlag);
- FALLBACK_IF(ctx->Multisample._Enabled);
- FALLBACK_IF(ctx->Line.StippleFlag);
- FALLBACK_IF(ctx->Line.SmoothFlag);
- FALLBACK_IF(ctx->Point.SmoothFlag);
- }
-
- return R300_FALLBACK_NONE;
+ COMMIT_BATCH();
}
-static GLboolean r300RunNonTCLRender(GLcontext * ctx,
- struct tnl_pipeline_stage *stage)
+static const char *getFallbackString(uint32_t bit)
{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
-
- if (RADEON_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
- return GL_TRUE;
-
- if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- return GL_TRUE;
-
- return r300RunRender(ctx, stage);
+ switch (bit) {
+ case R300_FALLBACK_VERTEX_PROGRAM :
+ return "vertex program";
+ case R300_FALLBACK_LINE_SMOOTH:
+ return "smooth lines";
+ case R300_FALLBACK_POINT_SMOOTH:
+ return "smooth points";
+ case R300_FALLBACK_POLYGON_SMOOTH:
+ return "smooth polygons";
+ case R300_FALLBACK_LINE_STIPPLE:
+ return "line stipple";
+ case R300_FALLBACK_POLYGON_STIPPLE:
+ return "polygon stipple";
+ case R300_FALLBACK_STENCIL_TWOSIDE:
+ return "two-sided stencil";
+ case R300_FALLBACK_RENDER_MODE:
+ return "render mode != GL_RENDER";
+ case R300_FALLBACK_FRAGMENT_PROGRAM:
+ return "fragment program";
+ case R300_FALLBACK_AOS_LIMIT:
+ return "aos limit";
+ case R300_FALLBACK_INVALID_BUFFERS:
+ return "invalid buffers";
+ default:
+ return "unknown";
+ }
}
-static GLboolean r300RunTCLRender(GLcontext * ctx,
- struct tnl_pipeline_stage *stage)
+void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct r300_vertex_program *vp;
+ uint32_t old_fallback = rmesa->fallback;
+ static uint32_t fallback_warn = 0;
+
+ if (mode) {
+ if ((fallback_warn & bit) == 0) {
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
+ _mesa_fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit));
+ fallback_warn |= bit;
+ }
+ rmesa->fallback |= bit;
+
+ /* update only if we change from no tcl fallbacks to some tcl fallbacks */
+ if (rmesa->options.hw_tcl_enabled) {
+ if (((old_fallback & R300_TCL_FALLBACK_MASK) == 0) &&
+ ((bit & R300_TCL_FALLBACK_MASK) > 0)) {
+ R300_STATECHANGE(rmesa, vap_cntl_status);
+ rmesa->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS;
+ }
+ }
- hw_tcl_on = future_hw_tcl_on;
+ /* update only if we change from no raster fallbacks to some raster fallbacks */
+ if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) &&
+ ((bit & R300_RASTER_FALLBACK_MASK) > 0)) {
- if (RADEON_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s\n", __FUNCTION__);
+ radeon_firevertices(&rmesa->radeon);
+ rmesa->radeon.swtcl.RenderIndex = ~0;
+ _swsetup_Wakeup( ctx );
+ }
+ } else {
+ rmesa->fallback &= ~bit;
- if (hw_tcl_on == GL_FALSE)
- return GL_TRUE;
+ /* update only if we have disabled all tcl fallbacks */
+ if (rmesa->options.hw_tcl_enabled) {
+ if ((old_fallback & R300_TCL_FALLBACK_MASK) == bit) {
+ R300_STATECHANGE(rmesa, vap_cntl_status);
+ rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS;
+ }
+ }
- if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
- hw_tcl_on = GL_FALSE;
- return GL_TRUE;
- }
+ /* update only if we have disabled all raster fallbacks */
+ if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) {
+ _swrast_flush( ctx );
- r300UpdateShaders(rmesa);
+ tnl->Driver.Render.Start = r300RenderStart;
+ tnl->Driver.Render.Finish = r300RenderFinish;
+ tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
+ tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple;
+ tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+ tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+ tnl->Driver.Render.Interp = _tnl_interp;
- vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
- if (vp->native == GL_FALSE) {
- hw_tcl_on = GL_FALSE;
- return GL_TRUE;
+ _tnl_invalidate_vertex_state( ctx, ~0 );
+ _tnl_invalidate_vertices( ctx, ~0 );
+ }
}
- return r300RunRender(ctx, stage);
}
-
-const struct tnl_pipeline_stage _r300_render_stage = {
- "r300 Hardware Rasterization",
- NULL,
- NULL,
- NULL,
- NULL,
- r300RunNonTCLRender
-};
-
-const struct tnl_pipeline_stage _r300_tcl_stage = {
- "r300 Hardware Transform, Clipping and Lighting",
- NULL,
- NULL,
- NULL,
- NULL,
- r300RunTCLRender
-};
diff --git a/r300/r300_render.h b/r300/r300_render.h
new file mode 100644
index 0000000..ec78547
--- /dev/null
+++ b/r300/r300_render.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_RENDER_H__
+#define __R300_RENDER_H__
+
+#include "main/mtypes.h"
+
+#define R300_FALLBACK_VERTEX_PROGRAM (1 << 0)
+#define R300_TCL_FALLBACK_MASK 0x0000ffff
+
+#define R300_FALLBACK_LINE_SMOOTH (1 << 16)
+#define R300_FALLBACK_POINT_SMOOTH (1 << 17)
+#define R300_FALLBACK_POLYGON_SMOOTH (1 << 18)
+#define R300_FALLBACK_LINE_STIPPLE (1 << 19)
+#define R300_FALLBACK_POLYGON_STIPPLE (1 << 20)
+#define R300_FALLBACK_STENCIL_TWOSIDE (1 << 21)
+#define R300_FALLBACK_RENDER_MODE (1 << 22)
+#define R300_FALLBACK_FRAGMENT_PROGRAM (1 << 23)
+#define R300_FALLBACK_AOS_LIMIT (1 << 30)
+#define R300_FALLBACK_INVALID_BUFFERS (1 << 31)
+#define R300_RASTER_FALLBACK_MASK 0xffff0000
+
+#define MASK_XYZW (R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+#define MASK_X R300_WRITE_ENA_X
+#define MASK_Y R300_WRITE_ENA_Y
+#define MASK_Z R300_WRITE_ENA_Z
+#define MASK_W R300_WRITE_ENA_W
+
+#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
+ SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
+ SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
+ SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
+ SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
+ SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
+#error Cannot change these!
+#endif
+
+extern const struct tnl_pipeline_stage _r300_render_stage;
+
+extern void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode);
+
+extern void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim);
+
+#endif
diff --git a/r300/r300_shader.c b/r300/r300_shader.c
index f30fd98..a4f9db1 100644
--- a/r300/r300_shader.c
+++ b/r300/r300_shader.c
@@ -1,47 +1,79 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
#include "main/glheader.h"
#include "shader/program.h"
#include "tnl/tnl.h"
#include "r300_context.h"
-#include "r300_fragprog.h"
+#include "r300_fragprog_common.h"
+
+static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont *cache)
+{
+ struct r300_fragment_program *tmp, *fp = cache->progs;
+
+ while (fp) {
+ tmp = fp->next;
+ rc_constants_destroy(&fp->code.constants);
+ _mesa_free(fp);
+ fp = tmp;
+ }
+}
+
+static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *cache)
+{
+ struct r300_vertex_program *tmp, *vp = cache->progs;
+
+ while (vp) {
+ tmp = vp->next;
+ rc_constants_destroy(&vp->code.constants);
+ _mesa_reference_vertprog(ctx, &vp->Base, NULL);
+ _mesa_free(vp);
+ vp = tmp;
+ }
+}
static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
GLuint id)
{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct r300_vertex_program_cont *vp;
- struct r300_fragment_program *r300_fp;
- struct r500_fragment_program *r500_fp;
+ struct r300_fragment_program_cont *fp;
switch (target) {
case GL_VERTEX_STATE_PROGRAM_NV:
case GL_VERTEX_PROGRAM_ARB:
vp = CALLOC_STRUCT(r300_vertex_program_cont);
- return _mesa_init_vertex_program(ctx, &vp->mesa_program,
- target, id);
- case GL_FRAGMENT_PROGRAM_ARB:
- if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- r500_fp = CALLOC_STRUCT(r500_fragment_program);
- r500_fp->ctx = ctx;
- return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program,
- target, id);
- } else {
- r300_fp = CALLOC_STRUCT(r300_fragment_program);
- return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program,
- target, id);
- }
+ return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
case GL_FRAGMENT_PROGRAM_NV:
- if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- r500_fp = CALLOC_STRUCT(r500_fragment_program);
- return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program,
- target, id);
- } else {
- r300_fp = CALLOC_STRUCT(r300_fragment_program);
- return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program,
- target, id);
- }
+ case GL_FRAGMENT_PROGRAM_ARB:
+ fp = CALLOC_STRUCT(r300_fragment_program_cont);
+ return _mesa_init_fragment_program(ctx, &fp->Base, target, id);
+
default:
_mesa_problem(ctx, "Bad target in r300NewProgram");
}
@@ -51,26 +83,35 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog)
{
+ struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
+ struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
+
+ switch (prog->Target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ freeVertProgCache(ctx, vp);
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ freeFragProgCache(ctx, fp);
+ break;
+ }
+
_mesa_delete_program(ctx, prog);
}
static void
r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct r300_vertex_program_cont *vp = (void *)prog;
- struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog;
- struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog;
+ struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
+ struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
switch (target) {
case GL_VERTEX_PROGRAM_ARB:
+ freeVertProgCache(ctx, vp);
vp->progs = NULL;
break;
case GL_FRAGMENT_PROGRAM_ARB:
- if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
- r500_fp->translated = GL_FALSE;
- else
- r300_fp->translated = GL_FALSE;
+ freeFragProgCache(ctx, fp);
+ fp->progs = NULL;
break;
}
@@ -81,7 +122,15 @@ r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
static GLboolean
r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
- return GL_TRUE;
+ if (target == GL_FRAGMENT_PROGRAM_ARB) {
+ struct r300_fragment_program *fp = r300SelectAndTranslateFragmentShader(ctx);
+
+ return !fp->error;
+ } else {
+ struct r300_vertex_program *vp = r300SelectAndTranslateVertexShader(ctx);
+
+ return !vp->error;
+ }
}
void r300InitShaderFuncs(struct dd_function_table *functions)
diff --git a/r300/r300_state.c b/r300/r300_state.c
index 79f0b36..9301543 100644
--- a/r300/r300_state.c
+++ b/r300/r300_state.c
@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/macros.h"
#include "main/context.h"
#include "main/dd.h"
+#include "main/framebuffer.h"
#include "main/simple_list.h"
#include "main/api_arrayelt.h"
#include "main/texformat.h"
@@ -52,22 +53,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "shader/prog_statevars.h"
#include "vbo/vbo.h"
#include "tnl/tnl.h"
+#include "tnl/t_vp_build.h"
-#include "radeon_ioctl.h"
-#include "radeon_state.h"
#include "r300_context.h"
#include "r300_ioctl.h"
#include "r300_state.h"
#include "r300_reg.h"
#include "r300_emit.h"
-#include "r300_fragprog.h"
#include "r300_tex.h"
+#include "r300_fragprog_common.h"
+#include "r300_render.h"
+#include "r300_vertprog.h"
#include "drirenderbuffer.h"
-extern int future_hw_tcl_on;
-extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx);
-
static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4])
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
@@ -366,12 +365,13 @@ static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
GLint *ip;
/* no VAP UCP on non-TCL chipsets */
- if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ if (!rmesa->options.hw_tcl_enabled)
return;
p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+ R300_STATECHANGE( rmesa, vap_flush );
R300_STATECHANGE( rmesa, vpucp[p] );
rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
@@ -385,7 +385,7 @@ static void r300SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state)
GLuint p;
/* no VAP UCP on non-TCL chipsets */
- if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ if (!r300->options.hw_tcl_enabled)
return;
p = cap - GL_CLIP_PLANE0;
@@ -433,6 +433,10 @@ static void r300UpdateCulling(GLcontext * ctx)
break;
}
+ /* Winding is inverted when rendering to FBO */
+ if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+ val ^= R300_FRONT_FACE_CW;
+
R300_STATECHANGE(r300, cul);
r300->hw.cul.cmd[R300_CUL_CULL] = val;
}
@@ -453,22 +457,14 @@ static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
- struct r300_fragment_program *fp = (struct r300_fragment_program *)
- (char *)ctx->FragmentProgram._Current;
- return (fp && fp->WritesDepth);
- } else {
- struct r500_fragment_program* fp =
- (struct r500_fragment_program*)(char*)
- ctx->FragmentProgram._Current;
- return (fp && fp->writes_depth);
- }
+ return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth;
}
static void r300SetEarlyZState(GLcontext * ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
GLuint topZ = R300_ZTOP_ENABLE;
+ GLuint w_fmt, fgdepthsrc;
if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS)
topZ = R300_ZTOP_DISABLE;
@@ -476,6 +472,8 @@ static void r300SetEarlyZState(GLcontext * ctx)
topZ = R300_ZTOP_DISABLE;
else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill)
topZ = R300_ZTOP_DISABLE;
+ else if (r300->radeon.query.current)
+ topZ = R300_ZTOP_DISABLE;
if (topZ != r300->hw.zstencil_format.cmd[2]) {
/* Note: This completely reemits the stencil format.
@@ -485,6 +483,26 @@ static void r300SetEarlyZState(GLcontext * ctx)
R300_STATECHANGE(r300, zstencil_format);
r300->hw.zstencil_format.cmd[2] = topZ;
}
+
+ /* w_fmt value is set to get best performance
+ * see p.130 R5xx 3D acceleration guide v1.3 */
+ if (current_fragment_program_writes_depth(ctx)) {
+ fgdepthsrc = R300_FG_DEPTH_SRC_SHADER;
+ w_fmt = R300_W_FMT_W24 | R300_W_SRC_US;
+ } else {
+ fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
+ w_fmt = R300_W_FMT_W0 | R300_W_SRC_US;
+ }
+
+ if (w_fmt != r300->hw.us_out_fmt.cmd[5]) {
+ R300_STATECHANGE(r300, us_out_fmt);
+ r300->hw.us_out_fmt.cmd[5] = w_fmt;
+ }
+
+ if (fgdepthsrc != r300->hw.fg_depth_src.cmd[1]) {
+ R300_STATECHANGE(r300, fg_depth_src);
+ r300->hw.fg_depth_src.cmd[1] = fgdepthsrc;
+ }
}
static void r300SetAlphaState(GLcontext * ctx)
@@ -535,8 +553,6 @@ static void r300SetAlphaState(GLcontext * ctx)
R300_STATECHANGE(r300, at);
r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc;
r300->hw.at.cmd[R300_AT_UNKNOWN] = 0;
-
- r300SetEarlyZState(ctx);
}
static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
@@ -584,15 +600,35 @@ static void r300SetDepthState(GLcontext * ctx)
r300->hw.zs.cmd[R300_ZS_CNTL_1] |=
translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT;
}
+}
- r300SetEarlyZState(ctx);
+static void r300CatchStencilFallback(GLcontext *ctx)
+{
+ const unsigned back = ctx->Stencil._BackFace;
+
+ if (ctx->Stencil._Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
+ || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back]
+ || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) {
+ r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE);
+ } else {
+ r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE);
+ }
}
static void r300SetStencilState(GLcontext * ctx, GLboolean state)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLboolean hw_stencil = GL_FALSE;
- if (r300->state.stencil.hw_stencil) {
+ r300CatchStencilFallback(ctx);
+
+ if (ctx->DrawBuffer) {
+ struct radeon_renderbuffer *rrbStencil
+ = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+ hw_stencil = (rrbStencil && rrbStencil->bo);
+ }
+
+ if (hw_stencil) {
R300_STATECHANGE(r300, zs);
if (state) {
r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
@@ -601,10 +637,6 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state)
r300->hw.zs.cmd[R300_ZS_CNTL_0] &=
~R300_STENCIL_ENABLE;
}
- } else {
-#if R200_MERGED
- FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state);
-#endif
}
}
@@ -737,7 +769,12 @@ static void r300ColorMask(GLcontext * ctx,
static void r300PointSize(GLcontext * ctx, GLfloat size)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- /* same size limits for AA, non-AA points */
+
+ /* We need to clamp to user defined range here, because
+ * the HW clamping happens only for per vertex point size. */
+ size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize);
+
+ /* same size limits for AA, non-AA points */
size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize);
R300_STATECHANGE(r300, ps);
@@ -830,29 +867,33 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode)
R300_STATECHANGE(rmesa, shade);
rmesa->hw.shade.cmd[1] = 0x00000002;
+ R300_STATECHANGE(rmesa, shade2);
switch (mode) {
case GL_FLAT:
- rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT;
+ rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_FLAT;
break;
case GL_SMOOTH:
- rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH;
+ rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_SMOOTH;
break;
default:
return;
}
- rmesa->hw.shade.cmd[3] = 0x00000000;
- rmesa->hw.shade.cmd[4] = 0x00000000;
+ rmesa->hw.shade2.cmd[2] = 0x00000000;
+ rmesa->hw.shade2.cmd[3] = 0x00000000;
}
static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
GLenum func, GLint ref, GLuint mask)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- GLuint refmask =
- ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT)
- | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT);
- const unsigned back = ctx->Stencil._BackFace;
+ GLuint refmask;
GLuint flag;
+ const unsigned back = ctx->Stencil._BackFace;
+
+ r300CatchStencilFallback(ctx);
+
+ refmask = ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT)
+ | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT);
R300_STATECHANGE(rmesa, zs);
rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK;
@@ -880,6 +921,8 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ r300CatchStencilFallback(ctx);
+
R300_STATECHANGE(rmesa, zs);
rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
~(R300_STENCILREF_MASK <<
@@ -896,6 +939,8 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
r300ContextPtr rmesa = R300_CONTEXT(ctx);
const unsigned back = ctx->Stencil._BackFace;
+ r300CatchStencilFallback(ctx);
+
R300_STATECHANGE(rmesa, zs);
/* It is easier to mask what's left.. */
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &=
@@ -924,28 +969,32 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
* Window position and viewport transformation
*/
-/*
- * To correctly position primitives:
- */
-#define SUBPIXEL_X 0.125
-#define SUBPIXEL_Y 0.125
-
static void r300UpdateWindow(GLcontext * ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
const GLfloat *v = ctx->Viewport._WindowMap.m;
+ const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+ const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+ GLfloat y_scale, y_bias;
+
+ if (render_to_fbo) {
+ y_scale = 1.0;
+ y_bias = 0;
+ } else {
+ y_scale = -1.0;
+ y_bias = yoffset;
+ }
GLfloat sx = v[MAT_SX];
- GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
- GLfloat sy = -v[MAT_SY];
- GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y;
- GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
- GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
+ GLfloat tx = v[MAT_TX] + xoffset;
+ GLfloat sy = v[MAT_SY] * y_scale;
+ GLfloat ty = (v[MAT_TY] * y_scale) + y_bias;
+ GLfloat sz = v[MAT_SZ] * depthScale;
+ GLfloat tz = v[MAT_TZ] * depthScale;
- R300_FIREVERTICES(rmesa);
R300_STATECHANGE(rmesa, vpt);
rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx);
@@ -964,6 +1013,8 @@ static void r300Viewport(GLcontext * ctx, GLint x, GLint y,
* values, or keep the originals hanging around.
*/
r300UpdateWindow(ctx);
+
+ radeon_viewport(ctx, x, y, width, height);
}
static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval)
@@ -974,13 +1025,13 @@ static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval)
void r300UpdateViewportOffset(GLcontext * ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- __DRIdrawablePrivate *dPriv = ((radeonContextPtr) rmesa)->dri.drawable;
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
GLfloat xoffset = (GLfloat) dPriv->x;
GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h;
const GLfloat *v = ctx->Viewport._WindowMap.m;
- GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
- GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+ GLfloat tx = v[MAT_TX] + xoffset;
+ GLfloat ty = (-v[MAT_TY]) + yoffset;
if (rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] != r300PackFloat32(tx) ||
rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] != r300PackFloat32(ty)) {
@@ -997,138 +1048,26 @@ void r300UpdateViewportOffset(GLcontext * ctx)
}
/**
- * Tell the card where to render (offset, pitch).
- * Effected by glDrawBuffer, etc
- */
-void r300UpdateDrawBuffer(GLcontext * ctx)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- r300ContextPtr r300 = rmesa;
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- driRenderbuffer *drb;
-
- if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
- /* draw to front */
- drb =
- (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].
- Renderbuffer;
- } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
- /* draw to back */
- drb =
- (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].
- Renderbuffer;
- } else {
- /* drawing to multiple buffers, or none */
- return;
- }
-
- assert(drb);
- assert(drb->flippedPitch);
-
- R300_STATECHANGE(rmesa, cb);
-
- r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset + //r300->radeon.state.color.drawOffset +
- r300->radeon.radeonScreen->fbLocation;
- r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch; //r300->radeon.state.color.drawPitch;
-
- if (r300->radeon.radeonScreen->cpp == 4)
- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
- else
- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
-
- if (r300->radeon.sarea->tiling_enabled)
- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
-#if 0
- R200_STATECHANGE(rmesa, ctx);
-
- /* Note: we used the (possibly) page-flipped values */
- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
- = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
- & R200_COLOROFFSET_MASK);
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
-
- if (rmesa->sarea->tiling_enabled) {
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
- R200_COLOR_TILE_ENABLE;
- }
-#endif
-}
-
-static void
-r300FetchStateParameter(GLcontext * ctx,
- const gl_state_index state[STATE_LENGTH],
- GLfloat * value)
-{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
-
- switch (state[0]) {
- case STATE_INTERNAL:
- switch (state[1]) {
- case STATE_R300_WINDOW_DIMENSION:
- value[0] = r300->radeon.dri.drawable->w * 0.5f; /* width*0.5 */
- value[1] = r300->radeon.dri.drawable->h * 0.5f; /* height*0.5 */
- value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */
- value[3] = 1.0F; /* not used */
- break;
-
- case STATE_R300_TEXRECT_FACTOR:{
- struct gl_texture_object *t =
- ctx->Texture.Unit[state[2]].CurrentTex[TEXTURE_RECT_INDEX];
-
- if (t && t->Image[0][t->BaseLevel]) {
- struct gl_texture_image *image =
- t->Image[0][t->BaseLevel];
- value[0] = 1.0 / image->Width2;
- value[1] = 1.0 / image->Height2;
- } else {
- value[0] = 1.0;
- value[1] = 1.0;
- }
- value[2] = 1.0;
- value[3] = 1.0;
- break;
- }
-
- default:
- break;
- }
- break;
-
- default:
- break;
- }
-}
-
-/**
* Update R300's own internal state parameters.
* For now just STATE_R300_WINDOW_DIMENSION
*/
-void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
+static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
{
- struct r300_fragment_program *fp;
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct gl_program_parameter_list *paramList;
- GLuint i;
- if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM)))
+ if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
return;
- fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current;
- if (!fp)
+ if (!ctx->FragmentProgram._Current || !rmesa->selected_fp)
return;
- paramList = fp->mesa_program.Base.Parameters;
+ paramList = ctx->FragmentProgram._Current->Base.Parameters;
if (!paramList)
return;
- for (i = 0; i < paramList->NumParameters; i++) {
- if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
- r300FetchStateParameter(ctx,
- paramList->Parameters[i].
- StateIndexes,
- paramList->ParameterValues[i]);
- }
- }
+ _mesa_load_state_parameters(ctx, paramList);
}
/* =============================================================
@@ -1235,9 +1174,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
- struct r300_fragment_program *fp = (struct r300_fragment_program *)
- (char *)ctx->FragmentProgram._Current;
- struct r300_fragment_program_code *code = &fp->code;
+ struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300;
R300_STATECHANGE(r300, fpt);
@@ -1271,15 +1208,15 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
}
r300->hw.fpt.cmd[R300_FPT_CMD_0] =
- cmdpacket0(R300_US_TEX_INST_0, code->tex.length);
+ cmdpacket0(r300->radeon.radeonScreen,
+ R300_US_TEX_INST_0, code->tex.length);
}
static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
- struct r500_fragment_program *fp = (struct r500_fragment_program *)
- (char *)ctx->FragmentProgram._Current;
- struct r500_fragment_program_code *code = &fp->code;
+ struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500;
/* find all the texture instructions and relocate the texture units */
for (i = 0; i < code->inst_end + 1; i++) {
@@ -1319,16 +1256,15 @@ static GLuint translate_lod_bias(GLfloat bias)
return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
}
+
static void r300SetupTextures(GLcontext * ctx)
{
int i, mtu;
- struct r300_tex_obj *t;
+ struct radeon_tex_obj *t;
r300ContextPtr r300 = R300_CONTEXT(ctx);
int hw_tmu = 0;
int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */
int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, };
- struct r300_fragment_program *fp = (struct r300_fragment_program *)
- (char *)ctx->FragmentProgram._Current;
R300_STATECHANGE(r300, txe);
R300_STATECHANGE(r300, tex.filter);
@@ -1343,7 +1279,7 @@ static void r300SetupTextures(GLcontext * ctx)
r300->hw.txe.cmd[R300_TXE_ENABLE] = 0x0;
mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
- if (RADEON_DEBUG & DEBUG_STATE)
+ if (RADEON_DEBUG & RADEON_STATE)
fprintf(stderr, "mtu=%d\n", mtu);
if (mtu > R300_MAX_TEXTURE_UNITS) {
@@ -1356,24 +1292,19 @@ static void r300SetupTextures(GLcontext * ctx)
/* We cannot let disabled tmu offsets pass DRM */
for (i = 0; i < mtu; i++) {
if (ctx->Texture.Unit[i]._ReallyEnabled) {
-
-#if 0 /* Enables old behaviour */
- hw_tmu = i;
-#endif
tmu_mappings[i] = hw_tmu;
- t = (r300TexObjPtr) r300->state.texture.unit[i].texobj->DriverData;
- /* XXX questionable fix for bug 9170: */
+ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
if (!t)
continue;
- if ((t->format & 0xffffff00) == 0xffffff00) {
+ if ((t->pp_txformat & 0xffffff00) == 0xffffff00) {
WARN_ONCE
("unknown texture format (entry %x) encountered. Help me !\n",
- t->format & 0xff);
+ t->pp_txformat & 0xff);
}
- if (RADEON_DEBUG & DEBUG_STATE)
+ if (RADEON_DEBUG & RADEON_STATE)
fprintf(stderr,
"Activating texture unit %d\n", i);
@@ -1381,29 +1312,28 @@ static void r300SetupTextures(GLcontext * ctx)
r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 +
hw_tmu] =
- gen_fixed_filter(t->filter) | (hw_tmu << 28);
+ gen_fixed_filter(t->pp_txfilter) | (hw_tmu << 28);
/* Note: There is a LOD bias per texture unit and a LOD bias
* per texture object. We add them here to get the correct behaviour.
* (The per-texture object LOD bias was introduced in OpenGL 1.4
* and is not present in the EXT_texture_object extension).
*/
r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
- t->filter_1 |
- translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias);
+ t->pp_txfilter_1 |
+ translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias);
r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
- t->size;
+ t->pp_txsize;
r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
- hw_tmu] = t->format;
+ hw_tmu] = t->pp_txformat;
r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
- t->pitch_reg;
- r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 +
- hw_tmu] = t->offset;
+ t->pp_txpitch;
+ r300->hw.textures[hw_tmu] = t;
- if (t->offset & R300_TXO_MACRO_TILE) {
+ if (t->tile_bits & R300_TXO_MACRO_TILE) {
WARN_ONCE("macro tiling enabled!\n");
}
- if (t->offset & R300_TXO_MICRO_TILE) {
+ if (t->tile_bits & R300_TXO_MICRO_TILE) {
WARN_ONCE("micro tiling enabled!\n");
}
@@ -1419,40 +1349,48 @@ static void r300SetupTextures(GLcontext * ctx)
}
}
+ /* R3xx and R4xx chips require that the texture unit corresponding to
+ * KIL instructions is really enabled.
+ *
+ * We do some fakery here and in the state atom emit logic to enable
+ * the texture without tripping up the CS checker in the kernel.
+ */
+ if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+ if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) {
+ last_hw_tmu++;
+
+ r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
+
+ r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0] = 0;
+ r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0] = 0;
+ r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
+ r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0] = 0;
+ r300->hw.tex.size.cmd[R300_TEX_VALUE_0] = 0; /* 1x1 texture */
+ r300->hw.tex.format.cmd[R300_TEX_VALUE_0] = 0; /* A8 format */
+ r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0] = 0;
+ }
+ }
+
r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_FILTER0_0, last_hw_tmu + 1);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1);
r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1);
r300->hw.tex.size.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1);
r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1);
r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_FORMAT2_0, last_hw_tmu + 1);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1);
r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1);
r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
- if (!fp) /* should only happenen once, just after context is created */
- return;
+ r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings);
- if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
- if (fp->mesa_program.UsesKill && last_hw_tmu < 0) {
- // The KILL operation requires the first texture unit
- // to be enabled.
- r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
- r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
- r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_FILTER0_0, 1);
- }
- r300SetupFragmentShaderTextures(ctx, tmu_mappings);
- } else
- r500SetupFragmentShaderTextures(ctx, tmu_mappings);
-
- if (RADEON_DEBUG & DEBUG_STATE)
+ if (RADEON_DEBUG & RADEON_STATE)
fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n",
r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu);
}
@@ -1469,26 +1407,21 @@ union r300_outputs_written {
static void r300SetupRSUnit(GLcontext * ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *VB = &tnl->vb;
union r300_outputs_written OutputsWritten;
GLuint InputsRead;
int fp_reg, high_rr;
int col_ip, tex_ip;
int rs_tex_count = 0;
- int i, count, col_fmt;
+ int i, col_fmt, hw_tcl_on;
+
+ hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
- OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+ OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten;
else
- RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset);
+ RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
- if (ctx->FragmentProgram._Current)
- InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
- else {
- fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
- return; /* This should only ever happen once.. */
- }
+ InputsRead = r300->selected_fp->InputsRead;
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
@@ -1507,15 +1440,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
if (InputsRead & FRAG_BIT_COL0) {
if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
- count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
- if (count == 4)
- col_fmt = R300_RS_COL_FMT_RGBA;
- else if (count == 3)
- col_fmt = R300_RS_COL_FMT_RGB1;
- else
- col_fmt = R300_RS_COL_FMT_0001;
-
- r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt);
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
InputsRead &= ~FRAG_BIT_COL0;
++col_ip;
@@ -1527,15 +1452,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
if (InputsRead & FRAG_BIT_COL1) {
if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
- count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
- if (count == 4)
- col_fmt = R300_RS_COL_FMT_RGBA;
- else if (count == 3)
- col_fmt = R300_RS_COL_FMT_RGB1;
- else
- col_fmt = R300_RS_COL_FMT_0001;
-
- r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt);
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
InputsRead &= ~FRAG_BIT_COL1;
++col_ip;
@@ -1545,6 +1462,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
}
}
+ /* We always route 4 texcoord components */
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
continue;
@@ -1554,64 +1472,27 @@ static void r300SetupRSUnit(GLcontext * ctx)
continue;
}
- int swiz;
-
- /* with TCL we always seem to route 4 components */
- if (hw_tcl_on)
- count = 4;
- else
- count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
-
- switch(count) {
- case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break;
- case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
- default:
- case 1:
- case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
- };
-
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz | R300_RS_TEX_PTR(rs_tex_count);
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
InputsRead &= ~(FRAG_BIT_TEX0 << i);
- rs_tex_count += count;
- ++tex_ip;
- ++fp_reg;
- }
-
- if (InputsRead & FRAG_BIT_FOGC) {
- if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_FOGC;
- rs_tex_count += 4;
- ++tex_ip;
- ++fp_reg;
- } else {
- WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
- }
- }
-
- if (InputsRead & FRAG_BIT_WPOS) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_WPOS;
rs_tex_count += 4;
++tex_ip;
++fp_reg;
}
- InputsRead &= ~FRAG_BIT_WPOS;
/* Setup default color if no color or tex was set */
if (rs_tex_count == 0 && col_ip == 0) {
- r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+ r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0);
+ r300->hw.ri.cmd[R300_RI_INTERP_0] = R300_RS_COL_PTR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
++col_ip;
}
high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
- r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
+ r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
r300->hw.rc.cmd[2] |= high_rr - 1;
- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr);
+ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, high_rr);
if (InputsRead)
WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
@@ -1620,26 +1501,21 @@ static void r300SetupRSUnit(GLcontext * ctx)
static void r500SetupRSUnit(GLcontext * ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *VB = &tnl->vb;
union r300_outputs_written OutputsWritten;
GLuint InputsRead;
int fp_reg, high_rr;
int col_ip, tex_ip;
int rs_tex_count = 0;
- int i, count, col_fmt;
+ int i, col_fmt, hw_tcl_on;
+
+ hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
- OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+ OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten;
else
- RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset);
+ RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
- if (ctx->FragmentProgram._Current)
- InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
- else {
- fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
- return; /* This should only ever happen once.. */
- }
+ InputsRead = r300->selected_fp->InputsRead;
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
@@ -1658,15 +1534,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
if (InputsRead & FRAG_BIT_COL0) {
if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
- count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
- if (count == 4)
- col_fmt = R300_RS_COL_FMT_RGBA;
- else if (count == 3)
- col_fmt = R300_RS_COL_FMT_RGB1;
- else
- col_fmt = R300_RS_COL_FMT_0001;
-
- r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt);
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
InputsRead &= ~FRAG_BIT_COL0;
++col_ip;
@@ -1678,15 +1546,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
if (InputsRead & FRAG_BIT_COL1) {
if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
- count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
- if (count == 4)
- col_fmt = R300_RS_COL_FMT_RGBA;
- else if (count == 3)
- col_fmt = R300_RS_COL_FMT_RGB1;
- else
- col_fmt = R300_RS_COL_FMT_0001;
-
- r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt);
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
InputsRead &= ~FRAG_BIT_COL1;
++col_ip;
@@ -1696,7 +1556,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
}
}
-
+ /* We always route 4 texcoord components */
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
continue;
@@ -1706,74 +1566,13 @@ static void r500SetupRSUnit(GLcontext * ctx)
continue;
}
- int swiz = 0;
-
- /* with TCL we always seem to route 4 components */
- if (hw_tcl_on)
- count = 4;
- else
- count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
-
- if (count == 4) {
- swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
- swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
- swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT;
- swiz |= (rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT;
- } else if (count == 3) {
- swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
- swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
- swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT;
- swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
- } else if (count == 2) {
- swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
- swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
- swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
- } else if (count == 1) {
- swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
- swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
- } else {
- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT;
- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
- swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
- }
-
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz;
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~(FRAG_BIT_TEX0 << i);
- rs_tex_count += count;
- ++tex_ip;
- ++fp_reg;
- }
-
- if (InputsRead & FRAG_BIT_FOGC) {
- if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
- ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
- ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
- ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
-
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_FOGC;
- rs_tex_count += 4;
- ++tex_ip;
- ++fp_reg;
- } else {
- WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
- }
- }
-
- if (InputsRead & FRAG_BIT_WPOS) {
r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
- ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
- ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
- ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
+ ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
+ ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
+ ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_WPOS;
+ InputsRead &= ~(FRAG_BIT_TEX0 << i);
rs_tex_count += 4;
++tex_ip;
++fp_reg;
@@ -1781,72 +1580,25 @@ static void r500SetupRSUnit(GLcontext * ctx)
/* Setup default color if no color or tex was set */
if (rs_tex_count == 0 && col_ip == 0) {
- r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+ r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0);
+ r300->hw.ri.cmd[R300_RI_INTERP_0] = R500_RS_COL_PTR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
++col_ip;
}
high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
- r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
- r300->hw.rc.cmd[2] |= 0xC0 | (high_rr - 1);
+ r300->hw.rc.cmd[1] = (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
+ r300->hw.rc.cmd[2] = 0xC0 | (high_rr - 1);
- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr);
+ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, high_rr);
if (InputsRead)
WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
}
-
-
-
-#define bump_vpu_count(ptr, new_count) do{\
- drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
- int _nc=(new_count)/4; \
- assert(_nc < 256); \
- if(_nc>_p->vpu.count)_p->vpu.count=_nc;\
- }while(0)
-
-static INLINE void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, struct r300_vertex_shader_fragment *vsf)
-{
- int i;
-
- if (vsf->length == 0)
- return;
-
- if (vsf->length & 0x3) {
- fprintf(stderr, "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n");
- _mesa_exit(-1);
- }
-
- switch ((dest >> 8) & 0xf) {
- case 0:
- R300_STATECHANGE(r300, vpi);
- for (i = 0; i < vsf->length; i++)
- r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]);
- bump_vpu_count(r300->hw.vpi.cmd, vsf->length + 4 * (dest & 0xff));
- break;
-
- case 2:
- R300_STATECHANGE(r300, vpp);
- for (i = 0; i < vsf->length; i++)
- r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]);
- bump_vpu_count(r300->hw.vpp.cmd, vsf->length + 4 * (dest & 0xff));
- break;
- case 4:
- R300_STATECHANGE(r300, vps);
- for (i = 0; i < vsf->length; i++)
- r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]);
- bump_vpu_count(r300->hw.vps.cmd, vsf->length + 4 * (dest & 0xff));
- break;
- default:
- fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
- _mesa_exit(-1);
- }
-}
-
#define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c))
-
-static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
+void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
GLuint output_count, GLuint temp_count)
{
int vtx_mem_size;
@@ -1870,7 +1622,7 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count);
R300_STATECHANGE(rmesa, vap_cntl);
- if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ if (rmesa->options.hw_tcl_enabled) {
rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] =
(pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) |
(pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) |
@@ -1900,114 +1652,6 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
}
-static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa)
-{
- struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader);
- GLuint o_reg = 0;
- GLuint i_reg = 0;
- int i;
- int inst_count = 0;
- int param_count = 0;
- int program_end = 0;
-
- for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) {
- if (rmesa->state.sw_tcl_inputs[i] != -1) {
- prog->program.body.i[program_end + 0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, GL_FALSE, GL_FALSE, o_reg++, VSF_FLAG_ALL, PVS_DST_REG_OUT);
- prog->program.body.i[program_end + 1] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
- prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
- prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
- program_end += 4;
- i_reg++;
- }
- }
-
- prog->program.length = program_end;
-
- r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START,
- &(prog->program));
- inst_count = (prog->program.length / 4) - 1;
-
- r300VapCntl(rmesa, i_reg, o_reg, 0);
-
- R300_STATECHANGE(rmesa, pvs);
- rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
- (0 << R300_PVS_FIRST_INST_SHIFT) |
- (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (inst_count << R300_PVS_LAST_INST_SHIFT);
- rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] =
- (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
- (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
- rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] =
- (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
-}
-
-static int bit_count (int x)
-{
- x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U);
- x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U);
- x = (x >> 16) + (x & 0xffff);
- x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f);
- return (x >> 8) + (x & 0x00ff);
-}
-
-static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
-{
- GLcontext *ctx = rmesa->radeon.glCtx;
- struct r300_vertex_program *prog = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
- int inst_count = 0;
- int param_count = 0;
-
- /* FIXME: r300SetupVertexProgramFragment */
- R300_STATECHANGE(rmesa, vpp);
- param_count =
- r300VertexProgUpdateParams(ctx,
- (struct r300_vertex_program_cont *)
- ctx->VertexProgram._Current,
- (float *)&rmesa->hw.vpp.
- cmd[R300_VPP_PARAM_0]);
- bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
- param_count /= 4;
-
- r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program));
- inst_count = (prog->program.length / 4) - 1;
-
- r300VapCntl(rmesa, bit_count(prog->key.InputsRead),
- bit_count(prog->key.OutputsWritten), prog->num_temporaries);
-
- R300_STATECHANGE(rmesa, pvs);
- rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
- (0 << R300_PVS_FIRST_INST_SHIFT) |
- (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (inst_count << R300_PVS_LAST_INST_SHIFT);
- rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] =
- (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
- (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
- rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] =
- (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
-}
-
-static void r300SetupVertexProgram(r300ContextPtr rmesa)
-{
- GLcontext *ctx = rmesa->radeon.glCtx;
-
- /* Reset state, in case we don't use something */
- ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
- ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
- ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
-
- /* Not sure why this doesnt work...
- 0x400 area might have something to do with pixel shaders as it appears right after pfs programming.
- 0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */
- //setup_vertex_shader_fragment(rmesa, 0x406, &unk4);
- if (hw_tcl_on && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->translated) {
- r300SetupRealVertexProgram(rmesa);
- } else {
- /* FIXME: This needs to be replaced by vertex shader generation code. */
- r300SetupDefaultVertexProgram(rmesa);
- }
-
-}
-
/**
* Enable/Disable states.
*
@@ -2015,20 +1659,13 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa)
*/
static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
{
- if (RADEON_DEBUG & DEBUG_STATE)
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ if (RADEON_DEBUG & RADEON_STATE)
fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__,
_mesa_lookup_enum_by_nr(cap),
state ? "GL_TRUE" : "GL_FALSE");
switch (cap) {
- case GL_TEXTURE_1D:
- case GL_TEXTURE_2D:
- case GL_TEXTURE_3D:
- /* empty */
- break;
- case GL_FOG:
- /* empty */
- break;
case GL_ALPHA_TEST:
r300SetAlphaState(ctx);
break;
@@ -2046,22 +1683,46 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
case GL_CLIP_PLANE5:
r300SetClipPlaneState(ctx, cap, state);
break;
+ case GL_CULL_FACE:
+ r300UpdateCulling(ctx);
+ break;
case GL_DEPTH_TEST:
r300SetDepthState(ctx);
break;
- case GL_STENCIL_TEST:
- r300SetStencilState(ctx, state);
+ case GL_LINE_SMOOTH:
+ if (rmesa->options.conformance_mode)
+ r300SwitchFallback(ctx, R300_FALLBACK_LINE_SMOOTH, ctx->Line.SmoothFlag);
break;
- case GL_CULL_FACE:
- r300UpdateCulling(ctx);
+ case GL_LINE_STIPPLE:
+ if (rmesa->options.conformance_mode)
+ r300SwitchFallback(ctx, R300_FALLBACK_LINE_STIPPLE, ctx->Line.StippleFlag);
+ break;
+ case GL_POINT_SMOOTH:
+ if (rmesa->options.conformance_mode)
+ r300SwitchFallback(ctx, R300_FALLBACK_POINT_SMOOTH, ctx->Point.SmoothFlag);
+ break;
+ case GL_POLYGON_SMOOTH:
+ if (rmesa->options.conformance_mode)
+ r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_SMOOTH, ctx->Polygon.SmoothFlag);
+ break;
+ case GL_POLYGON_STIPPLE:
+ if (rmesa->options.conformance_mode)
+ r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_STIPPLE, ctx->Polygon.StippleFlag);
break;
case GL_POLYGON_OFFSET_POINT:
case GL_POLYGON_OFFSET_LINE:
case GL_POLYGON_OFFSET_FILL:
r300SetPolygonOffsetState(ctx, state);
break;
+ case GL_SCISSOR_TEST:
+ radeon_firevertices(&rmesa->radeon);
+ rmesa->radeon.state.scissor.enabled = state;
+ radeonUpdateScissor( ctx );
+ break;
+ case GL_STENCIL_TEST:
+ r300SetStencilState(ctx, state);
+ break;
default:
- radeonEnable(ctx, cap, state);
break;
}
}
@@ -2072,15 +1733,14 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
static void r300ResetHwState(r300ContextPtr r300)
{
GLcontext *ctx = r300->radeon.glCtx;
- int has_tcl = 1;
+ int has_tcl;
- if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- has_tcl = 0;
+ has_tcl = r300->options.hw_tcl_enabled;
- if (RADEON_DEBUG & DEBUG_STATE)
+ if (RADEON_DEBUG & RADEON_STATE)
fprintf(stderr, "%s\n", __FUNCTION__);
- r300UpdateWindow(ctx);
+ radeon_firevertices(&r300->radeon);
r300ColorMask(ctx,
ctx->Color.ColorMask[RCOMP],
@@ -2102,8 +1762,6 @@ static void r300ResetHwState(r300ContextPtr r300)
r300UpdateCulling(ctx);
- r300UpdateTextureState(ctx);
-
r300SetBlendState(ctx);
r300SetLogicOpState(ctx);
@@ -2182,8 +1840,8 @@ static void r300ResetHwState(r300ContextPtr r300)
}
/* XXX: Enable anti-aliasing? */
- r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE;
- r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = 0;
+ r300->hw.gb_misc2.cmd[R300_GB_MISC2_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE;
+ r300->hw.gb_misc2.cmd[R300_GB_MISC2_SELECT] = 0;
r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0);
r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0);
@@ -2242,20 +1900,6 @@ static void r300ResetHwState(r300ContextPtr r300)
r300BlendColor(ctx, ctx->Color.BlendColor);
- /* Again, r300ClearBuffer uses this */
- r300->hw.cb.cmd[R300_CB_OFFSET] =
- r300->radeon.state.color.drawOffset +
- r300->radeon.radeonScreen->fbLocation;
- r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
-
- if (r300->radeon.radeonScreen->cpp == 4)
- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
- else
- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
-
- if (r300->radeon.sarea->tiling_enabled)
- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
-
r300->hw.rb3d_dither_ctl.cmd[1] = 0;
r300->hw.rb3d_dither_ctl.cmd[2] = 0;
r300->hw.rb3d_dither_ctl.cmd[3] = 0;
@@ -2268,44 +1912,18 @@ static void r300ResetHwState(r300ContextPtr r300)
r300->hw.rb3d_aaresolve_ctl.cmd[1] = 0;
- r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
- r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
-
- r300->hw.zb.cmd[R300_ZB_OFFSET] =
- r300->radeon.radeonScreen->depthOffset +
- r300->radeon.radeonScreen->fbLocation;
- r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
-
- if (r300->radeon.sarea->tiling_enabled) {
- /* XXX: Turn off when clearing buffers ? */
- r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE;
-
- if (ctx->Visual.depthBits == 24)
- r300->hw.zb.cmd[R300_ZB_PITCH] |=
- R300_DEPTHMICROTILE_TILED;
- }
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
r300->hw.zb_depthclearvalue.cmd[1] = 0;
- switch (ctx->Visual.depthBits) {
- case 16:
- r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_16BIT_INT_Z;
- break;
- case 24:
- r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
- break;
- default:
- fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits);
- _mesa_exit(-1);
- }
-
r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE;
r300->hw.zstencil_format.cmd[3] = 0x00000003;
r300->hw.zstencil_format.cmd[4] = 0x00000000;
r300SetEarlyZState(ctx);
- r300->hw.unk4F30.cmd[1] = 0;
- r300->hw.unk4F30.cmd[2] = 0;
+ r300->hw.zb_zmask.cmd[1] = 0;
+ r300->hw.zb_zmask.cmd[2] = 0;
r300->hw.zb_hiz_offset.cmd[1] = 0;
@@ -2319,135 +1937,151 @@ static void r300ResetHwState(r300ContextPtr r300)
r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0;
}
- r300->hw.all_dirty = GL_TRUE;
+ r300->radeon.hw.all_dirty = GL_TRUE;
}
void r300UpdateShaders(r300ContextPtr rmesa)
{
- GLcontext *ctx;
- struct r300_vertex_program *vp;
- int i;
+ GLcontext *ctx = rmesa->radeon.glCtx;
- ctx = rmesa->radeon.glCtx;
+ /* should only happenen once, just after context is created */
+ /* TODO: shouldn't we fallback to sw here? */
+ if (!ctx->FragmentProgram._Current) {
+ _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+ return;
+ }
- if (rmesa->NewGLState && hw_tcl_on) {
- rmesa->NewGLState = 0;
+ {
+ struct r300_fragment_program *fp;
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- rmesa->temp_attrib[i] =
- TNL_CONTEXT(ctx)->vb.AttribPtr[i];
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
- &rmesa->dummy_attrib[i];
- }
+ fp = r300SelectAndTranslateFragmentShader(ctx);
- _tnl_UpdateFixedFunctionProgram(ctx);
+ r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error);
+ }
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
- rmesa->temp_attrib[i];
- }
+ if (rmesa->options.hw_tcl_enabled) {
+ struct r300_vertex_program *vp;
- r300SelectVertexShader(rmesa);
- vp = (struct r300_vertex_program *)
- CURRENT_VERTEX_SHADER(ctx);
- /*if (vp->translated == GL_FALSE)
- r300TranslateVertexShader(vp); */
- if (vp->translated == GL_FALSE) {
- fprintf(stderr, "Failing back to sw-tcl\n");
- hw_tcl_on = future_hw_tcl_on = 0;
- r300ResetHwState(rmesa);
-
- r300UpdateStateParameters(ctx, _NEW_PROGRAM);
- return;
+ if (rmesa->radeon.NewGLState) {
+ int i;
+ for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ rmesa->temp_attrib[i] =
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i];
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
+ &rmesa->dummy_attrib[i];
+ }
+
+ _tnl_UpdateFixedFunctionProgram(ctx);
+
+ for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
+ rmesa->temp_attrib[i];
+ }
}
+
+ vp = r300SelectAndTranslateVertexShader(ctx);
+
+ r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error);
}
- r300UpdateStateParameters(ctx, _NEW_PROGRAM);
+
+ r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+ rmesa->radeon.NewGLState = 0;
}
-static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
- struct gl_program *program, struct prog_src_register srcreg)
+static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, GLfloat * buffer)
{
static const GLfloat dummy[4] = { 0, 0, 0, 0 };
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct rc_constant * rcc = &rmesa->selected_fp->code.constants.Constants[index];
+
+ switch(rcc->Type) {
+ case RC_CONSTANT_EXTERNAL:
+ return ctx->FragmentProgram._Current->Base.Parameters->ParameterValues[rcc->u.External];
+ case RC_CONSTANT_IMMEDIATE:
+ return rcc->u.Immediate;
+ case RC_CONSTANT_STATE:
+ switch(rcc->u.State[0]) {
+ case RC_STATE_SHADOW_AMBIENT: {
+ const int unit = (int) rcc->u.State[1];
+ const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
+ if (texObj) {
+ buffer[0] =
+ buffer[1] =
+ buffer[2] =
+ buffer[3] = texObj->CompareFailValue;
+ }
+ return buffer;
+ }
- switch(srcreg.File) {
- case PROGRAM_LOCAL_PARAM:
- return program->LocalParams[srcreg.Index];
- case PROGRAM_ENV_PARAM:
- return ctx->FragmentProgram.Parameters[srcreg.Index];
- case PROGRAM_STATE_VAR:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_CONSTANT:
- return program->Parameters->ParameterValues[srcreg.Index];
- default:
- _mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n");
- return dummy;
+ case RC_STATE_R300_WINDOW_DIMENSION: {
+ __DRIdrawablePrivate * drawable = radeon_get_drawable(&rmesa->radeon);
+ buffer[0] = drawable->w * 0.5f; /* width*0.5 */
+ buffer[1] = drawable->h * 0.5f; /* height*0.5 */
+ buffer[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */
+ buffer[3] = 1.0F; /* not used */
+ return buffer;
+ }
+
+ case RC_STATE_R300_TEXRECT_FACTOR: {
+ struct gl_texture_object *t =
+ ctx->Texture.Unit[rcc->u.State[1]].CurrentTex[TEXTURE_RECT_INDEX];
+
+ if (t && t->Image[0][t->BaseLevel]) {
+ struct gl_texture_image *image =
+ t->Image[0][t->BaseLevel];
+ buffer[0] = 1.0 / image->Width2;
+ buffer[1] = 1.0 / image->Height2;
+ } else {
+ buffer[0] = 1.0;
+ buffer[1] = 1.0;
+ }
+ buffer[2] = 1.0;
+ buffer[3] = 1.0;
+ return buffer;
+ }
+ }
}
+
+ return dummy;
}
-static void r300SetupPixelShader(r300ContextPtr rmesa)
+static void r300SetupPixelShader(GLcontext *ctx)
{
- GLcontext *ctx = rmesa->radeon.glCtx;
- struct r300_fragment_program *fp = (struct r300_fragment_program *)
- (char *)ctx->FragmentProgram._Current;
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct r300_fragment_program *fp = rmesa->selected_fp;
struct r300_fragment_program_code *code;
- int i, k;
-
- if (!fp) /* should only happenen once, just after context is created */
- return;
-
- r300TranslateFragmentShader(rmesa, fp);
- if (!fp->translated) {
- fprintf(stderr, "%s: No valid fragment shader, exiting\n",
- __FUNCTION__);
- return;
- }
- code = &fp->code;
+ int i;
- r300SetupTextures(ctx);
+ code = &fp->code.code.r300;
R300_STATECHANGE(rmesa, fpi[0]);
R300_STATECHANGE(rmesa, fpi[1]);
R300_STATECHANGE(rmesa, fpi[2]);
R300_STATECHANGE(rmesa, fpi[3]);
- rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu.length);
- rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu.length);
- rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu.length);
- rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+ rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length);
+ rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length);
+ rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length);
+ rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
for (i = 0; i < code->alu.length; i++) {
- rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0;
- rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1;
- rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2;
- rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3;
+ rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_inst;
+ rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_addr;
+ rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_inst;
+ rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_addr;
}
R300_STATECHANGE(rmesa, fp);
- rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3);
- rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx;
- rmesa->hw.fp.cmd[R300_FP_CNTL2] =
- (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
- ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
- (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
- ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
- /* I just want to say, the way these nodes are stored.. weird.. */
- for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) {
- if (i < (code->cur_node + 1)) {
- rmesa->hw.fp.cmd[R300_FP_NODE0 + k] =
- (code->node[i].alu_offset << R300_ALU_START_SHIFT) |
- (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) |
- (code->node[i].tex_offset << R300_TEX_START_SHIFT) |
- (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) |
- code->node[i].flags;
- } else {
- rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0;
- }
- }
+ rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->config;
+ rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->pixsize;
+ rmesa->hw.fp.cmd[R300_FP_CNTL2] = code->code_offset;
+ for (i = 0; i < 4; i++)
+ rmesa->hw.fp.cmd[R300_FP_NODE0 + i] = code->code_addr[i];
R300_STATECHANGE(rmesa, fpp);
- rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4);
- for (i = 0; i < code->const_nr; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx,
- &fp->mesa_program.Base, code->constant[i]);
+ rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4);
+ for (i = 0; i < fp->code.constants.Count; i++) {
+ GLfloat buffer[4];
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
@@ -2469,41 +2103,29 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\
} while(0)
-static void r500SetupPixelShader(r300ContextPtr rmesa)
+static void r500SetupPixelShader(GLcontext *ctx)
{
- GLcontext *ctx = rmesa->radeon.glCtx;
- struct r500_fragment_program *fp = (struct r500_fragment_program *)
- (char *)ctx->FragmentProgram._Current;
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct r300_fragment_program *fp = rmesa->selected_fp;
int i;
struct r500_fragment_program_code *code;
- if (!fp) /* should only happenen once, just after context is created */
- return;
-
((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0;
((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0;
- r500TranslateFragmentShader(rmesa, fp);
- if (!fp->translated) {
- fprintf(stderr, "%s: No valid fragment shader, exiting\n",
- __FUNCTION__);
- return;
- }
- code = &fp->code;
-
- r300SetupTextures(ctx);
+ code = &fp->code.code.r500;
R300_STATECHANGE(rmesa, fp);
rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx;
rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] =
- R500_US_CODE_START_ADDR(code->inst_offset) |
+ R500_US_CODE_START_ADDR(0) |
R500_US_CODE_END_ADDR(code->inst_end);
rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] =
- R500_US_CODE_RANGE_ADDR(code->inst_offset) |
+ R500_US_CODE_RANGE_ADDR(0) |
R500_US_CODE_RANGE_SIZE(code->inst_end);
rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] =
- R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */
+ R500_US_CODE_OFFSET_ADDR(0);
R300_STATECHANGE(rmesa, r500fp);
/* Emit our shader... */
@@ -2519,60 +2141,95 @@ static void r500SetupPixelShader(r300ContextPtr rmesa)
bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6);
R300_STATECHANGE(rmesa, r500fp_const);
- for (i = 0; i < code->const_nr; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx,
- &fp->mesa_program.Base, code->constant[i]);
+ for (i = 0; i < fp->code.constants.Count; i++) {
+ GLfloat buffer[4];
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]);
}
- bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4);
-
+ bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->code.constants.Count * 4);
}
-void r300UpdateShaderStates(r300ContextPtr rmesa)
+void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten)
{
- GLcontext *ctx;
- ctx = rmesa->radeon.glCtx;
+ r300ContextPtr rmesa = R300_CONTEXT( ctx );
+ struct vertex_attribute *attrs = rmesa->vbuf.attribs;
+ int i, j, reg_count;
+ uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1];
+ uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1];
- r300UpdateTextureState(ctx);
- r300SetEarlyZState(ctx);
+ for (i = 0; i < R300_VIR_CMDSIZE-1; ++i)
+ vir0[i] = vir1[i] = 0;
- /* w_fmt value is set to get best performance
- * see p.130 R5xx 3D acceleration guide v1.3 */
- GLuint w_fmt, fgdepthsrc;
- if (current_fragment_program_writes_depth(ctx)) {
- fgdepthsrc = R300_FG_DEPTH_SRC_SHADER;
- w_fmt = R300_W_FMT_W24 | R300_W_SRC_US;
- } else {
- fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
- w_fmt = R300_W_FMT_W0 | R300_W_SRC_US;
+ for (i = 0, j = 0; i < rmesa->vbuf.num_attribs; ++i) {
+ int tmp;
+
+ tmp = attrs[i].data_type | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT);
+ if (attrs[i]._signed)
+ tmp |= R300_SIGNED;
+ if (attrs[i].normalize)
+ tmp |= R300_NORMALIZE;
+
+ if (i % 2 == 0) {
+ vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT;
+ vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT);
+ } else {
+ vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT;
+ vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
+ ++j;
+ }
}
- if (w_fmt != rmesa->hw.us_out_fmt.cmd[5]) {
- R300_STATECHANGE(rmesa, us_out_fmt);
- rmesa->hw.us_out_fmt.cmd[5] = w_fmt;
+ reg_count = (rmesa->vbuf.num_attribs + 1) >> 1;
+ if (rmesa->vbuf.num_attribs % 2 != 0) {
+ vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
+ } else {
+ vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
}
- if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) {
- R300_STATECHANGE(rmesa, fg_depth_src);
- rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc;
+ R300_STATECHANGE(rmesa, vir[0]);
+ R300_STATECHANGE(rmesa, vir[1]);
+ R300_STATECHANGE(rmesa, vof);
+ R300_STATECHANGE(rmesa, vic);
+
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
+ rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
+ rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16;
+ rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16;
+ } else {
+ ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count;
+ ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count;
}
- if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
- r500SetupPixelShader(rmesa);
- else
- r300SetupPixelShader(rmesa);
+ rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+ rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten);
+}
- if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
- r500SetupRSUnit(ctx);
- else
- r300SetupRSUnit(ctx);
+void r300UpdateShaderStates(r300ContextPtr rmesa)
+{
+ GLcontext *ctx;
+ ctx = rmesa->radeon.glCtx;
- if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- r300SetupVertexProgram(rmesa);
+ /* should only happenen once, just after context is created */
+ if (!ctx->FragmentProgram._Current)
+ return;
+ r300SetEarlyZState(ctx);
+
+ r300SetupTextures(ctx);
+
+ rmesa->vtbl.SetupPixelShader(ctx);
+
+ rmesa->vtbl.SetupRSUnit(ctx);
+
+ if (rmesa->options.hw_tcl_enabled) {
+ r300SetupVertexProgram(rmesa);
+ }
}
/**
@@ -2586,15 +2243,17 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
_swsetup_InvalidateState(ctx, new_state);
_vbo_InvalidateState(ctx, new_state);
_tnl_InvalidateState(ctx, new_state);
- _ae_invalidate_state(ctx, new_state);
- if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
- r300UpdateDrawBuffer(ctx);
- }
+ if (new_state & _NEW_BUFFERS) {
+ _mesa_update_framebuffer(ctx);
+ /* this updates the DrawBuffer's Width/Height if it's a FBO */
+ _mesa_update_draw_buffer_bounds(ctx);
- r300UpdateStateParameters(ctx, new_state);
+ R300_STATECHANGE(r300, cb);
+ R300_STATECHANGE(r300, zb);
+ }
- r300->NewGLState |= new_state;
+ r300->radeon.NewGLState |= new_state;
}
/**
@@ -2604,58 +2263,12 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
*/
void r300InitState(r300ContextPtr r300)
{
- GLcontext *ctx = r300->radeon.glCtx;
- GLuint depth_fmt;
-
- radeonInitState(&r300->radeon);
-
- switch (ctx->Visual.depthBits) {
- case 16:
- r300->state.depth.scale = 1.0 / (GLfloat) 0xffff;
- depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z;
- break;
- case 24:
- r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff;
- depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
- break;
- default:
- fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
- ctx->Visual.depthBits);
- _mesa_exit(-1);
- }
-
- /* Only have hw stencil when depth buffer is 24 bits deep */
- r300->state.stencil.hw_stencil = (ctx->Visual.stencilBits > 0 &&
- ctx->Visual.depthBits == 24);
-
- memset(&(r300->state.texture), 0, sizeof(r300->state.texture));
-
r300ResetHwState(r300);
}
static void r300RenderMode(GLcontext * ctx, GLenum mode)
{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- (void)rmesa;
- (void)mode;
-}
-
-void r300UpdateClipPlanes( GLcontext *ctx )
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- GLuint p;
-
- for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
- if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
- GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
-
- R300_STATECHANGE( rmesa, vpucp[p] );
- rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
- rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
- rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2];
- rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3];
- }
- }
+ r300SwitchFallback(ctx, R300_FALLBACK_RENDER_MODE, ctx->RenderMode != GL_RENDER);
}
/**
@@ -2663,7 +2276,6 @@ void r300UpdateClipPlanes( GLcontext *ctx )
*/
void r300InitStateFuncs(struct dd_function_table *functions)
{
- radeonInitStateFuncs(functions);
functions->UpdateState = r300InvalidateState;
functions->AlphaFunc = r300AlphaFunc;
@@ -2699,4 +2311,21 @@ void r300InitStateFuncs(struct dd_function_table *functions)
functions->RenderMode = r300RenderMode;
functions->ClipPlane = r300ClipPlane;
+ functions->Scissor = radeonScissor;
+
+ functions->DrawBuffer = radeonDrawBuffer;
+ functions->ReadBuffer = radeonReadBuffer;
+}
+
+void r300InitShaderFunctions(r300ContextPtr r300)
+{
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ r300->vtbl.SetupRSUnit = r500SetupRSUnit;
+ r300->vtbl.SetupPixelShader = r500SetupPixelShader;
+ r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures;
+ } else {
+ r300->vtbl.SetupRSUnit = r300SetupRSUnit;
+ r300->vtbl.SetupPixelShader = r300SetupPixelShader;
+ r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures;
+ }
}
diff --git a/r300/r300_state.h b/r300/r300_state.h
index 0589ab7..d46bf9f 100644
--- a/r300/r300_state.h
+++ b/r300/r300_state.h
@@ -39,42 +39,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_NEWPRIM( rmesa ) \
do { \
- if ( rmesa->dma.flush ) \
- rmesa->dma.flush( rmesa ); \
+ if ( rmesa->radeon.dma.flush ) \
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \
} while (0)
#define R300_STATECHANGE(r300, atom) \
do { \
R300_NEWPRIM(r300); \
r300->hw.atom.dirty = GL_TRUE; \
- r300->hw.is_dirty = GL_TRUE; \
+ r300->radeon.hw.is_dirty = GL_TRUE; \
} while(0)
-#define R300_PRINT_STATE(r300, atom) \
- r300PrintStateAtom(r300, &r300->hw.atom)
-
-/* Fire the buffered vertices no matter what.
- TODO: This has not been implemented yet
- */
-#define R300_FIREVERTICES( r300 ) \
-do { \
- \
- if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) { \
- r300Flush( (r300)->radeon.glCtx ); \
- } \
- \
-} while (0)
-
-// r300_state.c
-extern int future_hw_tcl_on;
-void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx);
void r300UpdateViewportOffset (GLcontext * ctx);
void r300UpdateDrawBuffer (GLcontext * ctx);
-void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state);
void r300UpdateShaders (r300ContextPtr rmesa);
void r300UpdateShaderStates (r300ContextPtr rmesa);
void r300InitState (r300ContextPtr r300);
-void r300UpdateClipPlanes (GLcontext * ctx);
void r300InitStateFuncs (struct dd_function_table *functions);
+void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count);
+void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten);
#endif /* __R300_STATE_H__ */
diff --git a/r300/r300_swtcl.c b/r300/r300_swtcl.c
index ba3621b..ee2c71e 100644
--- a/r300/r300_swtcl.c
+++ b/r300/r300_swtcl.c
@@ -28,362 +28,264 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/*
* Authors:
* Dave Airlie <airlied@linux.ie>
+ * Maciej Cencora <m.cencora@gmail.com>
*/
-/* derived from r200 swtcl path */
-
-
-
-#include "main/glheader.h"
-#include "main/mtypes.h"
-#include "main/colormac.h"
-#include "main/enums.h"
-#include "main/image.h"
-#include "main/imports.h"
-#include "main/light.h"
-#include "main/macros.h"
-
-#include "swrast/s_context.h"
-#include "swrast/s_fog.h"
-#include "swrast_setup/swrast_setup.h"
-#include "math/m_translate.h"
#include "tnl/tnl.h"
-#include "tnl/t_context.h"
#include "tnl/t_pipeline.h"
-#include "r300_context.h"
-#include "r300_swtcl.h"
#include "r300_state.h"
-#include "r300_ioctl.h"
+#include "r300_swtcl.h"
#include "r300_emit.h"
-#include "r300_mem.h"
-
-static void flush_last_swtcl_prim( r300ContextPtr rmesa );
-
+#include "r300_tex.h"
+#include "r300_render.h"
+#include "main/simple_list.h"
-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset);
-void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
#define EMIT_ATTR( ATTR, STYLE ) \
do { \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \
- rmesa->swtcl.vertex_attr_count++; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \
+ rmesa->radeon.swtcl.vertex_attr_count++; \
} while (0)
#define EMIT_PAD( N ) \
do { \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \
- rmesa->swtcl.vertex_attr_count++; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \
+ rmesa->radeon.swtcl.vertex_attr_count++; \
} while (0)
-static void r300SetVertexFormat( GLcontext *ctx )
+#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask, _normalize) \
+do { \
+ attrs[num_attrs].element = (_attr); \
+ attrs[num_attrs].data_type = (_format); \
+ attrs[num_attrs].dst_loc = (_dst_loc); \
+ attrs[num_attrs].swizzle = (_swizzle); \
+ attrs[num_attrs].write_mask = (_write_mask); \
+ attrs[num_attrs]._signed = 0; \
+ attrs[num_attrs].normalize = (_normalize); \
+ ++num_attrs; \
+} while (0)
+
+void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_OutputsWritten)
{
r300ContextPtr rmesa = R300_CONTEXT( ctx );
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *VB = &tnl->vb;
- DECLARE_RENDERINPUTS(index_bitset);
- GLuint InputsRead = 0, OutputsWritten = 0;
- int vap_fmt_1 = 0;
- int offset = 0;
- int vte = 0;
- int fog_id;
- GLint inputs[VERT_ATTRIB_MAX];
- GLint tab[VERT_ATTRIB_MAX];
- int swizzle[VERT_ATTRIB_MAX][4];
- GLuint i, nr;
- GLuint sz;
-
- DECLARE_RENDERINPUTS(render_inputs_bitset);
- RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
- RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
- RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
-
- vte = rmesa->hw.vte.cmd[1];
- vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT);
- /* Important:
- */
- if ( VB->NdcPtr != NULL ) {
- VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
- vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT;
- }
- else {
- VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
- vte |= R300_VTX_W0_FMT;
- }
-
- assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
- rmesa->swtcl.vertex_attr_count = 0;
-
- /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
- * build up a hardware vertex.
- */
- if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) {
- sz = VB->AttribPtr[VERT_ATTRIB_POS]->size;
- InputsRead |= 1 << VERT_ATTRIB_POS;
- OutputsWritten |= 1 << VERT_RESULT_HPOS;
- EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 );
- offset = sz;
- } else {
- offset = 4;
- EMIT_PAD(4 * sizeof(float));
- }
-/*
- if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
- EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
- offset += 1;
- }
-*/
- if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) {
- sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size;
- rmesa->swtcl.coloroffset = offset;
+ int first_free_tex = 0;
+ GLuint InputsRead = 0;
+ GLuint OutputsWritten = 0;
+ int num_attrs = 0;
+ GLuint fp_reads = rmesa->selected_fp->InputsRead;
+ struct vertex_attribute *attrs = rmesa->vbuf.attribs;
+
+ radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
+ rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
+ rmesa->radeon.swtcl.vertex_attr_count = 0;
+
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s\n", __func__);
+
+ /* We always want non Ndc coords format */
+ VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+
+ /* Always write position vector */
+ InputsRead |= 1 << VERT_ATTRIB_POS;
+ OutputsWritten |= 1 << VERT_RESULT_HPOS;
+ EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
+ ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW, 0);
+ rmesa->swtcl.coloroffset = 4;
+
+ if (fp_reads & FRAG_BIT_COL0) {
InputsRead |= 1 << VERT_ATTRIB_COLOR0;
OutputsWritten |= 1 << VERT_RESULT_COL0;
- EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 );
- offset += sz;
+#if MESA_LITTLE_ENDIAN
+ EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA );
+ ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1);
+#else
+ EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR );
+ ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1);
+#endif
}
- rmesa->swtcl.specoffset = 0;
- if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
- sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size;
- rmesa->swtcl.specoffset = offset;
- EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 );
+ if (fp_reads & FRAG_BIT_COL1) {
+ GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
InputsRead |= 1 << VERT_ATTRIB_COLOR1;
OutputsWritten |= 1 << VERT_RESULT_COL1;
+#if MESA_LITTLE_ENDIAN
+ EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA );
+ ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1);
+#else
+ EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR );
+ ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1);
+#endif
+ rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1;
}
- fog_id = -1;
- if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG)) {
- /* find first free tex coord slot */
- if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
- int i;
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
- fog_id = i;
- break;
- }
- }
- } else {
- fog_id = 0;
+ if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+ VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->ColorPtr[1];
+ OutputsWritten |= 1 << VERT_RESULT_BFC0;
+#if MESA_LITTLE_ENDIAN
+ EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA );
+ ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1);
+#else
+ EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR );
+ ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1);
+#endif
+ if (fp_reads & FRAG_BIT_COL1) {
+ VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->SecondaryColorPtr[1];
+ GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+ OutputsWritten |= 1 << VERT_RESULT_BFC1;
+#if MESA_LITTLE_ENDIAN
+ EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA );
+ ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1);
+#else
+ EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR );
+ ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1);
+#endif
}
+ }
- if (fog_id == -1) {
- fprintf(stderr, "\tout of free texcoords to do fog\n");
- _mesa_exit(-1);
- }
+ if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) {
+ GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
+ InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE;
+ OutputsWritten |= 1 << VERT_RESULT_PSIZ;
+ EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
+ ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0);
+ }
- sz = VB->AttribPtr[VERT_ATTRIB_FOG]->size;
- EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F + sz - 1);
- InputsRead |= 1 << VERT_ATTRIB_FOG;
- OutputsWritten |= 1 << VERT_RESULT_FOGC;
- vap_fmt_1 |= sz << (3 * fog_id);
+ if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0;
+
+ VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+ VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+ RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
}
- if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
- int i;
+ if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0;
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
- sz = VB->TexCoordPtr[i]->size;
- InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
- OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
- EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 );
- vap_fmt_1 |= sz << (3 * i);
- }
- }
+ VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+ VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+ RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
}
- /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */
- if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) {
- int first_free_tex = -1;
- if (fog_id >= 0) {
- first_free_tex = fog_id+1;
- } else {
- if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
- int i;
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
- first_free_tex = i;
+ /**
+ * Sending only one texcoord component may lead to lock up,
+ * so for all textures always output 4 texcoord components to RS.
+ */
+ {
+ int i;
+ GLuint swiz, format, hw_format;
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ if (fp_reads & FRAG_BIT_TEX(i)) {
+ switch (VB->TexCoordPtr[i]->size) {
+ case 1:
+ format = EMIT_1F;
+ hw_format = R300_DATA_TYPE_FLOAT_1;
+ swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+ break;
+ case 2:
+ format = EMIT_2F;
+ hw_format = R300_DATA_TYPE_FLOAT_2;
+ swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE);
+ break;
+ case 3:
+ format = EMIT_3F;
+ hw_format = R300_DATA_TYPE_FLOAT_3;
+ swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+ break;
+ case 4:
+ format = EMIT_4F;
+ hw_format = R300_DATA_TYPE_FLOAT_4;
+ swiz = SWIZZLE_XYZW;
break;
- }
+ default:
+ continue;
}
- } else {
- first_free_tex = 0;
+ InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
+ OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
+ EMIT_ATTR(_TNL_ATTRIB_TEX(i), format);
+ ADD_ATTR(VERT_ATTRIB_TEX0 + i, hw_format, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0);
+ ++first_free_tex;
}
}
-
- if (first_free_tex == -1) {
- fprintf(stderr, "\tout of free texcoords to write w pos\n");
- _mesa_exit(-1);
- }
-
- sz = VB->AttribPtr[VERT_ATTRIB_POS]->size;
- InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex);
- OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex);
- EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 );
- vap_fmt_1 |= sz << (3 * first_free_tex);
- }
-
- for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
- if (InputsRead & (1 << i)) {
- inputs[i] = nr++;
- } else {
- inputs[i] = -1;
- }
- }
-
- /* Fixed, apply to vir0 only */
- if (InputsRead & (1 << VERT_ATTRIB_POS))
- inputs[VERT_ATTRIB_POS] = 0;
- if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
- inputs[VERT_ATTRIB_COLOR0] = 2;
- if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
- inputs[VERT_ATTRIB_COLOR1] = 3;
- if (InputsRead & (1 << VERT_ATTRIB_FOG))
- inputs[VERT_ATTRIB_FOG] = 6 + fog_id;
- for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
- if (InputsRead & (1 << i))
- inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
-
- for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
- if (InputsRead & (1 << i)) {
- tab[nr++] = i;
- }
}
- for (i = 0; i < nr; i++) {
- int ci;
-
- swizzle[i][0] = SWIZZLE_ZERO;
- swizzle[i][1] = SWIZZLE_ZERO;
- swizzle[i][2] = SWIZZLE_ZERO;
- swizzle[i][3] = SWIZZLE_ONE;
-
- for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) {
- swizzle[i][ci] = ci;
- }
+ if (first_free_tex >= ctx->Const.MaxTextureUnits) {
+ fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
+ _mesa_exit(-1);
}
R300_NEWPRIM(rmesa);
- R300_STATECHANGE(rmesa, vir[0]);
- ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
- r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
- VB->AttribPtr, inputs, tab, nr);
- R300_STATECHANGE(rmesa, vir[1]);
- ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
- r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
- nr);
-
- R300_STATECHANGE(rmesa, vic);
- rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
- rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
-
- R300_STATECHANGE(rmesa, vof);
- rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
- rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
-
- rmesa->swtcl.vertex_size =
- _tnl_install_attrs( ctx,
- rmesa->swtcl.vertex_attrs,
- rmesa->swtcl.vertex_attr_count,
- NULL, 0 );
+ rmesa->vbuf.num_attribs = num_attrs;
+ *_InputsRead = InputsRead;
+ *_OutputsWritten = OutputsWritten;
- rmesa->swtcl.vertex_size /= 4;
-
- RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-
-
- R300_STATECHANGE(rmesa, vte);
- rmesa->hw.vte.cmd[1] = vte;
- rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size;
+ RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset);
}
-
-/* Flush vertices in the current dma region.
- */
-static void flush_last_swtcl_prim( r300ContextPtr rmesa )
+static void r300PrepareVertices(GLcontext *ctx)
{
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- rmesa->dma.flush = NULL;
-
- if (rmesa->dma.current.buf) {
- struct r300_dma_region *current = &rmesa->dma.current;
- GLuint current_offset = GET_START(current);
-
- assert (current->start +
- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
- current->ptr);
-
- if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
-
- r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
-
- r300EmitState(rmesa);
-
- r300EmitVertexAOS( rmesa,
- rmesa->swtcl.vertex_size,
- current_offset);
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLuint InputsRead, OutputsWritten;
+ radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
- r300EmitVbufPrim( rmesa,
- rmesa->swtcl.hw_primitive,
- rmesa->swtcl.numverts);
+ r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten);
+ r300SetupVAP(ctx, InputsRead, OutputsWritten);
- r300EmitCacheFlush(rmesa);
- }
+ rmesa->radeon.swtcl.vertex_size =
+ _tnl_install_attrs( ctx,
+ rmesa->radeon.swtcl.vertex_attrs,
+ rmesa->radeon.swtcl.vertex_attr_count,
+ NULL, 0 );
- rmesa->swtcl.numverts = 0;
- current->start = current->ptr;
- }
+ rmesa->radeon.swtcl.vertex_size /= 4;
}
-/* Alloc space in the current dma region.
- */
-static void *
-r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
+static void r300_predict_emit_size( r300ContextPtr rmesa )
{
- GLuint bytes = vsize * nverts;
-
- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
- r300RefillCurrentDmaRegion( rmesa, bytes);
-
- if (!rmesa->dma.flush) {
- rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
- rmesa->dma.flush = flush_last_swtcl_prim;
- }
-
- ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
- ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
- ASSERT( rmesa->dma.current.start +
- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
- rmesa->dma.current.ptr );
-
- {
- GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
- rmesa->dma.current.ptr += bytes;
- rmesa->swtcl.numverts += nverts;
- return head;
+ if (!rmesa->radeon.swtcl.emit_prediction) {
+ const int vertex_size = 7;
+ const int prim_size = 3;
+ const int cache_flush_size = 4;
+ const int pre_emit_state = 4;
+ const int scissor_size = 3;
+ const int state_size = radeonCountStateEmitSize(&rmesa->radeon);
+
+ if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+ state_size + pre_emit_state + scissor_size
+ + vertex_size + prim_size + cache_flush_size * 2,
+ __FUNCTION__))
+ rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon);
+ else
+ rmesa->radeon.swtcl.emit_prediction = state_size;
+
+ rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw
+ + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state;
+ radeon_print(RADEON_SWRENDER, RADEON_VERBOSE,
+ "%s, size %d\n",
+ __func__, rmesa->radeon.cmdbuf.cs->cdw
+ + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state);
}
}
+
static GLuint reduced_prim[] = {
- GL_POINTS,
- GL_LINES,
- GL_LINES,
- GL_LINES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
+ GL_POINTS,
+ GL_LINES,
+ GL_LINES,
+ GL_LINES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
};
static void r300RasterPrimitive( GLcontext *ctx, GLuint prim );
-static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
-//static void r300ResetLineStipple( GLcontext *ctx );
/***********************************************************************
* Emit primitives as inline vertices *
@@ -402,18 +304,26 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
#define HAVE_POLYGONS 1
#define HAVE_ELTS 1
+static void* r300_alloc_verts(r300ContextPtr rmesa, GLuint n, GLuint size)
+{
+ void *rv;
+ do {
+ r300_predict_emit_size( rmesa );
+ rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 );
+ } while (!rv);
+ return rv;
+}
+
#undef LOCAL_VARS
#undef ALLOC_VERTS
#define CTX_ARG r300ContextPtr rmesa
-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
-#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 )
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) r300_alloc_verts(rmesa, n, size);
#define LOCAL_VARS \
r300ContextPtr rmesa = R300_CONTEXT(ctx); \
- const char *r300verts = (char *)rmesa->swtcl.verts;
+ const char *r300verts = (char *)rmesa->radeon.swtcl.verts;
#define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
#define VERTEX r300Vertex
-#define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
-#define PRINT_VERTEX(x)
#undef TAG
#define TAG(x) r300_##x
#include "tnl_dd/t_dd_triemit.h"
@@ -433,9 +343,8 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
* Build render functions from dd templates *
***********************************************************************/
-#define R300_TWOSIDE_BIT 0x01
-#define R300_UNFILLED_BIT 0x02
-#define R300_MAX_TRIFUNC 0x04
+#define R300_UNFILLED_BIT 0x01
+#define R300_MAX_TRIFUNC 0x02
static struct {
tnl_points_func points;
@@ -446,9 +355,9 @@ static struct {
#define DO_FALLBACK 0
#define DO_UNFILLED (IND & R300_UNFILLED_BIT)
-#define DO_TWOSIDE (IND & R300_TWOSIDE_BIT)
+#define DO_TWOSIDE 0
#define DO_FLAT 0
-#define DO_OFFSET 0
+#define DO_OFFSET 0
#define DO_TRI 1
#define DO_QUAD 1
#define DO_LINE 1
@@ -468,33 +377,39 @@ static struct {
#define VERT_Y(_v) _v->v.y
#define VERT_Z(_v) _v->v.z
#define AREA_IS_CCW( a ) (a < 0)
-#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
-
-/* Only used to pull back colors into vertices (ie, we know color is
- * floating point).
- */
-#define R300_COLOR( dst, src ) \
-do { \
- UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \
- UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \
- UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \
- UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]); \
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
+
+#define VERT_SET_RGBA( v, c ) \
+do { \
+ r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \
} while (0)
-#define VERT_SET_RGBA( v, c ) if (coloroffset) R300_COLOR( v->ub4[coloroffset], c )
-#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset]
-#define VERT_SAVE_RGBA( idx ) if (coloroffset) color[idx] = v[idx]->ui[coloroffset]
-#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+
+#define VERT_SET_SPEC( v0, c ) \
+do { \
+ if (specoffset) { \
+ UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \
+ } \
+} while (0)
-#define R300_SPEC( dst, src ) \
-do { \
- UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \
- UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \
- UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \
+#define VERT_COPY_SPEC( v0, v1 ) \
+do { \
+ if (specoffset) { \
+ v0->v.specular.red = v1->v.specular.red; \
+ v0->v.specular.green = v1->v.specular.green; \
+ v0->v.specular.blue = v1->v.specular.blue; \
+ } \
} while (0)
-#define VERT_SET_SPEC( v, c ) if (specoffset) R300_SPEC( v->ub4[specoffset], c )
-#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset])
+#define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
#define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset]
#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
@@ -514,7 +429,7 @@ do { \
***********************************************************************/
#define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] )
-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
#undef TAG
#define TAG(x) x
#include "tnl_dd/t_dd_unfilled.h"
@@ -530,26 +445,15 @@ do { \
#define TAG(x) x
#include "tnl_dd/t_dd_tritmp.h"
-#define IND (R300_TWOSIDE_BIT)
-#define TAG(x) x##_twoside
-#include "tnl_dd/t_dd_tritmp.h"
-
#define IND (R300_UNFILLED_BIT)
#define TAG(x) x##_unfilled
#include "tnl_dd/t_dd_tritmp.h"
-#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT)
-#define TAG(x) x##_twoside_unfilled
-#include "tnl_dd/t_dd_tritmp.h"
-
-
static void init_rast_tab( void )
{
init();
- init_twoside();
init_unfilled();
- init_twoside_unfilled();
}
/**********************************************************************/
@@ -571,8 +475,8 @@ static void init_rast_tab( void )
#undef LOCAL_VARS
#define LOCAL_VARS \
r300ContextPtr rmesa = R300_CONTEXT(ctx); \
- const GLuint vertsize = rmesa->swtcl.vertex_size; \
- const char *r300verts = (char *)rmesa->swtcl.verts; \
+ const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \
+ const char *r300verts = (char *)rmesa->radeon.swtcl.verts; \
const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \
const GLboolean stipple = ctx->Line.StippleFlag; \
(void) elt; (void) stipple;
@@ -600,11 +504,11 @@ static void r300ChooseRenderState( GLcontext *ctx )
r300ContextPtr rmesa = R300_CONTEXT(ctx);
GLuint index = 0;
GLuint flags = ctx->_TriangleCaps;
+ radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
- if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT;
- if (index != rmesa->swtcl.RenderIndex) {
+ if (index != rmesa->radeon.swtcl.RenderIndex) {
tnl->Driver.Render.Points = rast_tab[index].points;
tnl->Driver.Render.Line = rast_tab[index].line;
tnl->Driver.Render.ClippedLine = rast_tab[index].line;
@@ -621,59 +525,64 @@ static void r300ChooseRenderState( GLcontext *ctx )
tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
}
- rmesa->swtcl.RenderIndex = index;
+ rmesa->radeon.swtcl.RenderIndex = index;
}
}
-
-static void r300RenderStart(GLcontext *ctx)
+void r300RenderStart(GLcontext *ctx)
{
- r300ContextPtr rmesa = R300_CONTEXT( ctx );
+ radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
+ r300ContextPtr rmesa = R300_CONTEXT( ctx );
r300ChooseRenderState(ctx);
- r300SetVertexFormat(ctx);
r300UpdateShaders(rmesa);
- r300UpdateShaderStates(rmesa);
- r300EmitCacheFlush(rmesa);
+ r300PrepareVertices(ctx);
+
+ r300ValidateBuffers(ctx);
+
+ r300UpdateShaderStates(rmesa);
- if (rmesa->dma.flush != 0 &&
- rmesa->dma.flush != flush_last_swtcl_prim)
- rmesa->dma.flush( rmesa );
+ /* investigate if we can put back flush optimisation if needed */
+ if (rmesa->radeon.dma.flush != NULL) {
+ rmesa->radeon.dma.flush(ctx);
+ }
}
-static void r300RenderFinish(GLcontext *ctx)
+void r300RenderFinish(GLcontext *ctx)
{
}
static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
- if (rmesa->swtcl.hw_primitive != hwprim) {
- R300_NEWPRIM( rmesa );
- rmesa->swtcl.hw_primitive = hwprim;
+ if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+ R300_NEWPRIM( rmesa );
+ rmesa->radeon.swtcl.hw_primitive = hwprim;
}
}
-static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
+void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- rmesa->swtcl.render_primitive = prim;
+ rmesa->radeon.swtcl.render_primitive = prim;
+ radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
- return;
+ return;
r300RasterPrimitive( ctx, reduced_prim[prim] );
}
-static void r300ResetLineStipple(GLcontext *ctx)
+void r300ResetLineStipple(GLcontext *ctx)
{
-
-
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s\n", __func__);
}
void r300InitSwtcl(GLcontext *ctx)
@@ -681,11 +590,13 @@ void r300InitSwtcl(GLcontext *ctx)
TNLcontext *tnl = TNL_CONTEXT(ctx);
r300ContextPtr rmesa = R300_CONTEXT(ctx);
static int firsttime = 1;
+ radeon_print(RADEON_SWRENDER, RADEON_NORMAL, "%s\n", __func__);
if (firsttime) {
init_rast_tab();
firsttime = 0;
}
+ rmesa->radeon.swtcl.emit_prediction = 0;
tnl->Driver.Render.Start = r300RenderStart;
tnl->Driver.Render.Finish = r300RenderFinish;
@@ -699,50 +610,75 @@ void r300InitSwtcl(GLcontext *ctx)
_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
48 * sizeof(GLfloat) );
- rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
- rmesa->swtcl.RenderIndex = ~0;
- rmesa->swtcl.render_primitive = GL_TRIANGLES;
- rmesa->swtcl.hw_primitive = 0;
+ rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+ rmesa->radeon.swtcl.RenderIndex = ~0;
+ rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+ rmesa->radeon.swtcl.hw_primitive = 0;
_tnl_invalidate_vertex_state( ctx, ~0 );
_tnl_invalidate_vertices( ctx, ~0 );
- RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
_tnl_need_projected_coords( ctx, GL_FALSE );
- r300ChooseRenderState(ctx);
}
void r300DestroySwtcl(GLcontext *ctx)
{
}
-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
+static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
{
- int cmd_reserved = 0;
- int cmd_written = 0;
-
- drm_radeon_cmd_header_t *cmd = NULL;
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n",
- __FUNCTION__, vertex_size, offset);
-
- start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
- e32(1);
- e32(vertex_size | (vertex_size << 8));
- e32(offset);
+ BATCH_LOCALS(&rmesa->radeon);
+
+ radeon_print(RADEON_SWRENDER, RADEON_TRACE,
+ "%s: vertex_size %d, offset 0x%x \n",
+ __FUNCTION__, vertex_size, offset);
+
+ BEGIN_BATCH(7);
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
+ OUT_BATCH(1);
+ OUT_BATCH(vertex_size | (vertex_size << 8));
+ OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
}
-void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
+static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
{
-
- int cmd_reserved = 0;
- int cmd_written = 0;
+ BATCH_LOCALS(&rmesa->radeon);
int type, num_verts;
- drm_radeon_cmd_header_t *cmd = NULL;
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s\n", __func__);
type = r300PrimitiveType(rmesa, primitive);
num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
- e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
+ BEGIN_BATCH(3);
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
+ END_BATCH();
+}
+
+void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
+{
+ radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ r300EmitCacheFlush(rmesa);
+
+ radeonEmitState(&rmesa->radeon);
+ r300_emit_scissor(ctx);
+ r300EmitVertexAOS(rmesa,
+ rmesa->radeon.swtcl.vertex_size,
+ first_elem(&rmesa->radeon.dma.reserved)->bo,
+ current_offset);
+
+ r300EmitVbufPrim(rmesa,
+ rmesa->radeon.swtcl.hw_primitive,
+ rmesa->radeon.swtcl.numverts);
+ r300EmitCacheFlush(rmesa);
+ if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n",
+ rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
+ rmesa->radeon.swtcl.emit_prediction = 0;
+ COMMIT_BATCH();
}
diff --git a/r300/r300_swtcl.h b/r300/r300_swtcl.h
index 55df53c..c271d26 100644
--- a/r300/r300_swtcl.h
+++ b/r300/r300_swtcl.h
@@ -39,7 +39,27 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "swrast/swrast.h"
#include "r300_context.h"
+/*
+ * Here are definitions of OVM locations of vertex attributes for non TCL hw
+ */
+#define SWTCL_OVM_POS 0
+#define SWTCL_OVM_COLOR0 2
+#define SWTCL_OVM_COLOR1 3
+#define SWTCL_OVM_COLOR2 4
+#define SWTCL_OVM_COLOR3 5
+#define SWTCL_OVM_TEX(n) ((n) + 6)
+#define SWTCL_OVM_POINT_SIZE 15
+
+extern void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *InputsRead, GLuint *OutputsWritten);
+
extern void r300InitSwtcl( GLcontext *ctx );
extern void r300DestroySwtcl( GLcontext *ctx );
+extern void r300RenderStart(GLcontext *ctx);
+extern void r300RenderFinish(GLcontext *ctx);
+extern void r300RenderPrimitive(GLcontext *ctx, GLenum prim);
+extern void r300ResetLineStipple(GLcontext *ctx);
+
+extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
+
#endif
diff --git a/r300/r300_tex.c b/r300/r300_tex.c
index 7c699ec..433e5a8 100644
--- a/r300/r300_tex.c
+++ b/r300/r300_tex.c
@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/context.h"
#include "main/enums.h"
#include "main/image.h"
+#include "main/mipmap.h"
#include "main/simple_list.h"
#include "main/texformat.h"
#include "main/texstore.h"
@@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_context.h"
#include "r300_state.h"
#include "r300_ioctl.h"
+#include "radeon_mipmap_tree.h"
#include "r300_tex.h"
#include "xmlpool.h"
@@ -77,20 +79,20 @@ static unsigned int translate_wrap_mode(GLenum wrapmode)
*
* \param t Texture object whose wrap modes are to be set
*/
-static void r300UpdateTexWrap(r300TexObjPtr t)
+static void r300UpdateTexWrap(radeonTexObjPtr t)
{
- struct gl_texture_object *tObj = t->base.tObj;
+ struct gl_texture_object *tObj = &t->base;
- t->filter &=
+ t->pp_txfilter &=
~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
- t->filter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
+ t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
if (tObj->Target != GL_TEXTURE_1D) {
- t->filter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
+ t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
if (tObj->Target == GL_TEXTURE_3D)
- t->filter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
+ t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
}
}
@@ -117,10 +119,13 @@ static GLuint aniso_filter(GLfloat anisotropy)
* \param magf Texture magnification mode
* \param anisotropy Maximum anisotropy level
*/
-static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
+static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
{
- t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
- t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
+ /* Force revalidation to account for switches from/to mipmapping. */
+ t->validated = GL_FALSE;
+
+ t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
+ t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
/* Note that EXT_texture_filter_anisotropic is extremely vague about
* how anisotropic filtering interacts with the "normal" filter modes.
@@ -128,33 +133,33 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
* filter settings completely. This includes driconf's settings.
*/
if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
- t->filter |= R300_TX_MAG_FILTER_ANISO
+ t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO
| R300_TX_MIN_FILTER_ANISO
| R300_TX_MIN_FILTER_MIP_LINEAR
| aniso_filter(anisotropy);
- if (RADEON_DEBUG & DEBUG_TEXTURE)
+ if (RADEON_DEBUG & RADEON_TEXTURE)
fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy);
return;
}
switch (minf) {
case GL_NEAREST:
- t->filter |= R300_TX_MIN_FILTER_NEAREST;
+ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST;
break;
case GL_LINEAR:
- t->filter |= R300_TX_MIN_FILTER_LINEAR;
+ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR;
break;
case GL_NEAREST_MIPMAP_NEAREST:
- t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
+ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
break;
case GL_NEAREST_MIPMAP_LINEAR:
- t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
+ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
break;
case GL_LINEAR_MIPMAP_NEAREST:
- t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
+ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
break;
case GL_LINEAR_MIPMAP_LINEAR:
- t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
+ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
break;
}
@@ -163,15 +168,15 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
*/
switch (magf) {
case GL_NEAREST:
- t->filter |= R300_TX_MAG_FILTER_NEAREST;
+ t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST;
break;
case GL_LINEAR:
- t->filter |= R300_TX_MAG_FILTER_LINEAR;
+ t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR;
break;
}
}
-static void r300SetTexBorderColor(r300TexObjPtr t, const GLfloat color[4])
+static void r300SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4])
{
GLubyte c[4];
CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
@@ -182,729 +187,6 @@ static void r300SetTexBorderColor(r300TexObjPtr t, const GLfloat color[4])
}
/**
- * Allocate space for and load the mesa images into the texture memory block.
- * This will happen before drawing with a new texture, or drawing with a
- * texture after it was swapped out or teximaged again.
- */
-
-static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj)
-{
- r300TexObjPtr t;
-
- t = CALLOC_STRUCT(r300_tex_obj);
- texObj->DriverData = t;
- if (t != NULL) {
- if (RADEON_DEBUG & DEBUG_TEXTURE) {
- fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
- (void *)texObj, (void *)t);
- }
-
- /* Initialize non-image-dependent parts of the state:
- */
- t->base.tObj = texObj;
- t->border_fallback = GL_FALSE;
-
- make_empty_list(&t->base);
-
- r300UpdateTexWrap(t);
- r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
- r300SetTexBorderColor(t, texObj->BorderColor);
- }
-
- return t;
-}
-
-/* try to find a format which will only need a memcopy */
-static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat,
- GLenum srcType)
-{
- const GLuint ui = 1;
- const GLubyte littleEndian = *((const GLubyte *)&ui);
-
- if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
- return &_mesa_texformat_rgba8888;
- } else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
- return &_mesa_texformat_rgba8888_rev;
- } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
- srcType == GL_UNSIGNED_INT_8_8_8_8)) {
- return &_mesa_texformat_argb8888_rev;
- } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
- srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
- return &_mesa_texformat_argb8888;
- } else
- return _dri_texformat_argb8888;
-}
-
-static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx,
- GLint
- internalFormat,
- GLenum format,
- GLenum type)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- const GLboolean do32bpt =
- (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
- const GLboolean force16bpt =
- (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
- (void)format;
-
-#if 0
- fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n",
- _mesa_lookup_enum_by_nr(internalFormat), internalFormat,
- _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
- fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt);
-#endif
-
- switch (internalFormat) {
- case 4:
- case GL_RGBA:
- case GL_COMPRESSED_RGBA:
- switch (type) {
- case GL_UNSIGNED_INT_10_10_10_2:
- case GL_UNSIGNED_INT_2_10_10_10_REV:
- return do32bpt ? _dri_texformat_argb8888 :
- _dri_texformat_argb1555;
- case GL_UNSIGNED_SHORT_4_4_4_4:
- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
- return _dri_texformat_argb4444;
- case GL_UNSIGNED_SHORT_5_5_5_1:
- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
- return _dri_texformat_argb1555;
- default:
- return do32bpt ? r300Choose8888TexFormat(format, type) :
- _dri_texformat_argb4444;
- }
-
- case 3:
- case GL_RGB:
- case GL_COMPRESSED_RGB:
- switch (type) {
- case GL_UNSIGNED_SHORT_4_4_4_4:
- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
- return _dri_texformat_argb4444;
- case GL_UNSIGNED_SHORT_5_5_5_1:
- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
- return _dri_texformat_argb1555;
- case GL_UNSIGNED_SHORT_5_6_5:
- case GL_UNSIGNED_SHORT_5_6_5_REV:
- return _dri_texformat_rgb565;
- default:
- return do32bpt ? _dri_texformat_argb8888 :
- _dri_texformat_rgb565;
- }
-
- case GL_RGBA8:
- case GL_RGB10_A2:
- case GL_RGBA12:
- case GL_RGBA16:
- return !force16bpt ?
- r300Choose8888TexFormat(format,
- type) : _dri_texformat_argb4444;
-
- case GL_RGBA4:
- case GL_RGBA2:
- return _dri_texformat_argb4444;
-
- case GL_RGB5_A1:
- return _dri_texformat_argb1555;
-
- case GL_RGB8:
- case GL_RGB10:
- case GL_RGB12:
- case GL_RGB16:
- return !force16bpt ? _dri_texformat_argb8888 :
- _dri_texformat_rgb565;
-
- case GL_RGB5:
- case GL_RGB4:
- case GL_R3_G3_B2:
- return _dri_texformat_rgb565;
-
- case GL_ALPHA:
- case GL_ALPHA4:
- case GL_ALPHA8:
- case GL_ALPHA12:
- case GL_ALPHA16:
- case GL_COMPRESSED_ALPHA:
- return _dri_texformat_a8;
-
- case 1:
- case GL_LUMINANCE:
- case GL_LUMINANCE4:
- case GL_LUMINANCE8:
- case GL_LUMINANCE12:
- case GL_LUMINANCE16:
- case GL_COMPRESSED_LUMINANCE:
- return _dri_texformat_l8;
-
- case 2:
- case GL_LUMINANCE_ALPHA:
- case GL_LUMINANCE4_ALPHA4:
- case GL_LUMINANCE6_ALPHA2:
- case GL_LUMINANCE8_ALPHA8:
- case GL_LUMINANCE12_ALPHA4:
- case GL_LUMINANCE12_ALPHA12:
- case GL_LUMINANCE16_ALPHA16:
- case GL_COMPRESSED_LUMINANCE_ALPHA:
- return _dri_texformat_al88;
-
- case GL_INTENSITY:
- case GL_INTENSITY4:
- case GL_INTENSITY8:
- case GL_INTENSITY12:
- case GL_INTENSITY16:
- case GL_COMPRESSED_INTENSITY:
- return _dri_texformat_i8;
-
- case GL_YCBCR_MESA:
- if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
- type == GL_UNSIGNED_BYTE)
- return &_mesa_texformat_ycbcr;
- else
- return &_mesa_texformat_ycbcr_rev;
-
- case GL_RGB_S3TC:
- case GL_RGB4_S3TC:
- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
- return &_mesa_texformat_rgb_dxt1;
-
- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
- return &_mesa_texformat_rgba_dxt1;
-
- case GL_RGBA_S3TC:
- case GL_RGBA4_S3TC:
- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
- return &_mesa_texformat_rgba_dxt3;
-
- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
- return &_mesa_texformat_rgba_dxt5;
-
- case GL_ALPHA16F_ARB:
- return &_mesa_texformat_alpha_float16;
- case GL_ALPHA32F_ARB:
- return &_mesa_texformat_alpha_float32;
- case GL_LUMINANCE16F_ARB:
- return &_mesa_texformat_luminance_float16;
- case GL_LUMINANCE32F_ARB:
- return &_mesa_texformat_luminance_float32;
- case GL_LUMINANCE_ALPHA16F_ARB:
- return &_mesa_texformat_luminance_alpha_float16;
- case GL_LUMINANCE_ALPHA32F_ARB:
- return &_mesa_texformat_luminance_alpha_float32;
- case GL_INTENSITY16F_ARB:
- return &_mesa_texformat_intensity_float16;
- case GL_INTENSITY32F_ARB:
- return &_mesa_texformat_intensity_float32;
- case GL_RGB16F_ARB:
- return &_mesa_texformat_rgba_float16;
- case GL_RGB32F_ARB:
- return &_mesa_texformat_rgba_float32;
- case GL_RGBA16F_ARB:
- return &_mesa_texformat_rgba_float16;
- case GL_RGBA32F_ARB:
- return &_mesa_texformat_rgba_float32;
-
- case GL_DEPTH_COMPONENT:
- case GL_DEPTH_COMPONENT16:
- case GL_DEPTH_COMPONENT24:
- case GL_DEPTH_COMPONENT32:
-#if 0
- switch (type) {
- case GL_UNSIGNED_BYTE:
- case GL_UNSIGNED_SHORT:
- return &_mesa_texformat_z16;
- case GL_UNSIGNED_INT:
- return &_mesa_texformat_z32;
- case GL_UNSIGNED_INT_24_8_EXT:
- default:
- return &_mesa_texformat_z24_s8;
- }
-#else
- return &_mesa_texformat_z16;
-#endif
-
- default:
- _mesa_problem(ctx,
- "unexpected internalFormat 0x%x in r300ChooseTextureFormat",
- (int)internalFormat);
- return NULL;
- }
-
- return NULL; /* never get here */
-}
-
-static GLboolean
-r300ValidateClientStorage(GLcontext * ctx, GLenum target,
- GLint internalFormat,
- GLint srcWidth, GLint srcHeight,
- GLenum format, GLenum type, const void *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
-
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "intformat %s format %s type %s\n",
- _mesa_lookup_enum_by_nr(internalFormat),
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type));
-
- if (!ctx->Unpack.ClientStorage)
- return 0;
-
- if (ctx->_ImageTransferState ||
- texImage->IsCompressed || texObj->GenerateMipmap)
- return 0;
-
- /* This list is incomplete, may be different on ppc???
- */
- switch (internalFormat) {
- case GL_RGBA:
- if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) {
- texImage->TexFormat = _dri_texformat_argb8888;
- } else
- return 0;
- break;
-
- case GL_RGB:
- if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
- texImage->TexFormat = _dri_texformat_rgb565;
- } else
- return 0;
- break;
-
- case GL_YCBCR_MESA:
- if (format == GL_YCBCR_MESA &&
- type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
- texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
- } else if (format == GL_YCBCR_MESA &&
- (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
- type == GL_UNSIGNED_BYTE)) {
- texImage->TexFormat = &_mesa_texformat_ycbcr;
- } else
- return 0;
- break;
-
- default:
- return 0;
- }
-
- /* Could deal with these packing issues, but currently don't:
- */
- if (packing->SkipPixels ||
- packing->SkipRows || packing->SwapBytes || packing->LsbFirst) {
- return 0;
- }
-
- GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
- format, type);
-
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: srcRowStride %d/%x\n",
- __FUNCTION__, srcRowStride, srcRowStride);
-
- /* Could check this later in upload, pitch restrictions could be
- * relaxed, but would need to store the image pitch somewhere,
- * as packing details might change before image is uploaded:
- */
- if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
- || (srcRowStride & 63))
- return 0;
-
- /* Have validated that _mesa_transfer_teximage would be a straight
- * memcpy at this point. NOTE: future calls to TexSubImage will
- * overwrite the client data. This is explicitly mentioned in the
- * extension spec.
- */
- texImage->Data = (void *)pixels;
- texImage->IsClientData = GL_TRUE;
- texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
-
- return 1;
-}
-
-static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint border,
- GLenum format, GLenum type, const GLvoid * pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
-{
- driTextureObject *t = (driTextureObject *) texObj->DriverData;
-
- if (t) {
- driSwapOutTextureObject(t);
- } else {
- t = (driTextureObject *) r300AllocTexObj(texObj);
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
- return;
- }
- }
-
- /* Note, this will call ChooseTextureFormat */
- _mesa_store_teximage1d(ctx, target, level, internalFormat,
- width, border, format, type, pixels,
- &ctx->Unpack, texObj, texImage);
-
- t->dirty_images[0] |= (1 << level);
-}
-
-static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
- GLint xoffset,
- GLsizei width,
- GLenum format, GLenum type,
- const GLvoid * pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
-{
- driTextureObject *t = (driTextureObject *) texObj->DriverData;
-
- assert(t); /* this _should_ be true */
- if (t) {
- driSwapOutTextureObject(t);
- } else {
- t = (driTextureObject *) r300AllocTexObj(texObj);
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
- return;
- }
- }
-
- _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
- format, type, pixels, packing, texObj,
- texImage);
-
- t->dirty_images[0] |= (1 << level);
-}
-
-static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint border,
- GLenum format, GLenum type, const GLvoid * pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
-{
- driTextureObject *t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face =
- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- if (t != NULL) {
- driSwapOutTextureObject(t);
- } else {
- t = (driTextureObject *) r300AllocTexObj(texObj);
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
- return;
- }
- }
-
- texImage->IsClientData = GL_FALSE;
-
- if (r300ValidateClientStorage(ctx, target,
- internalFormat,
- width, height,
- format, type, pixels,
- packing, texObj, texImage)) {
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using client storage\n",
- __FUNCTION__);
- } else {
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using normal storage\n",
- __FUNCTION__);
-
- /* Normal path: copy (to cached memory) and eventually upload
- * via another copy to GART memory and then a blit... Could
- * eliminate one copy by going straight to (permanent) GART.
- *
- * Note, this will call r300ChooseTextureFormat.
- */
- _mesa_store_teximage2d(ctx, target, level, internalFormat,
- width, height, border, format, type,
- pixels, &ctx->Unpack, texObj, texImage);
-
- t->dirty_images[face] |= (1 << level);
- }
-}
-
-static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
- GLint xoffset, GLint yoffset,
- GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const GLvoid * pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
-{
- driTextureObject *t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face =
- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- assert(t); /* this _should_ be true */
- if (t) {
- driSwapOutTextureObject(t);
- } else {
- t = (driTextureObject *) r300AllocTexObj(texObj);
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
- return;
- }
- }
-
- _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
- height, format, type, pixels, packing, texObj,
- texImage);
-
- t->dirty_images[face] |= (1 << level);
-}
-
-static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
- GLint level, GLint internalFormat,
- GLint width, GLint height, GLint border,
- GLsizei imageSize, const GLvoid * data,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
-{
- driTextureObject *t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face =
- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- if (t != NULL) {
- driSwapOutTextureObject(t);
- } else {
- t = (driTextureObject *) r300AllocTexObj(texObj);
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY,
- "glCompressedTexImage2D");
- return;
- }
- }
-
- texImage->IsClientData = GL_FALSE;
-
- /* can't call this, different parameters. Would never evaluate to true anyway currently */
-#if 0
- if (r300ValidateClientStorage(ctx, target,
- internalFormat,
- width, height,
- format, type, pixels,
- packing, texObj, texImage)) {
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using client storage\n",
- __FUNCTION__);
- } else
-#endif
- {
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using normal storage\n",
- __FUNCTION__);
-
- /* Normal path: copy (to cached memory) and eventually upload
- * via another copy to GART memory and then a blit... Could
- * eliminate one copy by going straight to (permanent) GART.
- *
- * Note, this will call r300ChooseTextureFormat.
- */
- _mesa_store_compressed_teximage2d(ctx, target, level,
- internalFormat, width, height,
- border, imageSize, data,
- texObj, texImage);
-
- t->dirty_images[face] |= (1 << level);
- }
-}
-
-static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
- GLint level, GLint xoffset,
- GLint yoffset, GLsizei width,
- GLsizei height, GLenum format,
- GLsizei imageSize, const GLvoid * data,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
-{
- driTextureObject *t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face =
- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- assert(t); /* this _should_ be true */
- if (t) {
- driSwapOutTextureObject(t);
- } else {
- t = (driTextureObject *) r300AllocTexObj(texObj);
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY,
- "glCompressedTexSubImage3D");
- return;
- }
- }
-
- _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset,
- yoffset, width, height, format,
- imageSize, data, texObj, texImage);
-
- t->dirty_images[face] |= (1 << level);
-}
-
-static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint depth,
- GLint border,
- GLenum format, GLenum type, const GLvoid * pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
-{
- driTextureObject *t = (driTextureObject *) texObj->DriverData;
-
- if (t) {
- driSwapOutTextureObject(t);
- } else {
- t = (driTextureObject *) r300AllocTexObj(texObj);
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
- return;
- }
- }
-
- texImage->IsClientData = GL_FALSE;
-
-#if 0
- if (r300ValidateClientStorage(ctx, target,
- internalFormat,
- width, height,
- format, type, pixels,
- packing, texObj, texImage)) {
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using client storage\n",
- __FUNCTION__);
- } else
-#endif
- {
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: Using normal storage\n",
- __FUNCTION__);
-
- /* Normal path: copy (to cached memory) and eventually upload
- * via another copy to GART memory and then a blit... Could
- * eliminate one copy by going straight to (permanent) GART.
- *
- * Note, this will call r300ChooseTextureFormat.
- */
- _mesa_store_teximage3d(ctx, target, level, internalFormat,
- width, height, depth, border,
- format, type, pixels,
- &ctx->Unpack, texObj, texImage);
-
- t->dirty_images[0] |= (1 << level);
- }
-}
-
-static void
-r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
- GLint xoffset, GLint yoffset, GLint zoffset,
- GLsizei width, GLsizei height, GLsizei depth,
- GLenum format, GLenum type,
- const GLvoid * pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
-{
- driTextureObject *t = (driTextureObject *) texObj->DriverData;
-
-/* fprintf(stderr, "%s\n", __FUNCTION__); */
-
- assert(t); /* this _should_ be true */
- if (t) {
- driSwapOutTextureObject(t);
- } else {
- t = (driTextureObject *) r300AllocTexObj(texObj);
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
- return;
- }
- texObj->DriverData = t;
- }
-
- _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
- width, height, depth,
- format, type, pixels, packing, texObj,
- texImage);
-
- t->dirty_images[0] |= (1 << level);
-}
-
-/**
* Changes variables and flags for a state update, which will happen at the
* next UpdateTextureState
*/
@@ -913,9 +195,9 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
struct gl_texture_object *texObj,
GLenum pname, const GLfloat * params)
{
- r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData;
+ radeonTexObj* t = radeon_tex_obj(texObj);
- if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+ if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
fprintf(stderr, "%s( %s )\n", __FUNCTION__,
_mesa_lookup_enum_by_nr(pname));
}
@@ -946,7 +228,11 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
* we just have to rely on loading the right subset of mipmap levels
* to simulate a clamped LOD.
*/
- driSwapOutTextureObject((driTextureObject *) t);
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = 0;
+ t->validated = GL_FALSE;
+ }
break;
case GL_DEPTH_TEXTURE_MODE:
@@ -969,42 +255,35 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
}
}
-static void r300BindTexture(GLcontext * ctx, GLenum target,
- struct gl_texture_object *texObj)
-{
- if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
- fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__,
- (void *)texObj, ctx->Texture.CurrentUnit);
- }
-
- if ((target == GL_TEXTURE_1D)
- || (target == GL_TEXTURE_2D)
- || (target == GL_TEXTURE_3D)
- || (target == GL_TEXTURE_CUBE_MAP)
- || (target == GL_TEXTURE_RECTANGLE_NV)) {
- assert(texObj->DriverData != NULL);
- }
-}
-
static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- driTextureObject *t = (driTextureObject *) texObj->DriverData;
+ radeonTexObj* t = radeon_tex_obj(texObj);
- if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+ if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
(void *)texObj,
_mesa_lookup_enum_by_nr(texObj->Target));
}
- if (t != NULL) {
- if (rmesa) {
- R300_FIREVERTICES(rmesa);
- }
+ if (rmesa) {
+ int i;
+ radeon_firevertices(&rmesa->radeon);
+
+ for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
+ if (rmesa->hw.textures[i] == t)
+ rmesa->hw.textures[i] = 0;
+ }
- driDestroyTextureObject(t);
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = 0;
}
- /* Free mipmap images and the texture object itself */
_mesa_delete_texture_object(ctx, texObj);
}
@@ -1013,8 +292,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
* Called via ctx->Driver.NewTextureObject.
* Note: this function will be called during context creation to
* allocate the default texture objects.
- * Note: we could use containment here to 'derive' the driver-specific
- * texture object from the core mesa gl_texture_object. Not done at this time.
* Fixup MaxAnisotropy according to user preference.
*/
static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
@@ -1022,14 +299,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
GLenum target)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct gl_texture_object *obj;
- obj = _mesa_new_texture_object(ctx, name, target);
- if (!obj)
- return NULL;
- obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
+ radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+
- r300AllocTexObj(obj);
- return obj;
+ if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
+ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+ t, _mesa_lookup_enum_by_nr(target));
+ }
+
+ _mesa_initialize_texture_object(&t->base, name, target);
+ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+
+ /* Initialize hardware state */
+ r300UpdateTexWrap(t);
+ r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
+ r300SetTexBorderColor(t, t->base.BorderColor);
+
+ return &t->base;
}
void r300InitTextureFuncs(struct dd_function_table *functions)
@@ -1037,22 +323,30 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
/* Note: we only plug in the functions we implement in the driver
* since _mesa_init_driver_functions() was already called.
*/
- functions->ChooseTextureFormat = r300ChooseTextureFormat;
- functions->TexImage1D = r300TexImage1D;
- functions->TexImage2D = r300TexImage2D;
- functions->TexImage3D = r300TexImage3D;
- functions->TexSubImage1D = r300TexSubImage1D;
- functions->TexSubImage2D = r300TexSubImage2D;
- functions->TexSubImage3D = r300TexSubImage3D;
+ functions->NewTextureImage = radeonNewTextureImage;
+ functions->FreeTexImageData = radeonFreeTexImageData;
+ functions->MapTexture = radeonMapTexture;
+ functions->UnmapTexture = radeonUnmapTexture;
+
+ functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
+ functions->TexImage1D = radeonTexImage1D;
+ functions->TexImage2D = radeonTexImage2D;
+ functions->TexImage3D = radeonTexImage3D;
+ functions->TexSubImage1D = radeonTexSubImage1D;
+ functions->TexSubImage2D = radeonTexSubImage2D;
+ functions->TexSubImage3D = radeonTexSubImage3D;
+ functions->GetTexImage = radeonGetTexImage;
+ functions->GetCompressedTexImage = radeonGetCompressedTexImage;
functions->NewTextureObject = r300NewTextureObject;
- functions->BindTexture = r300BindTexture;
functions->DeleteTexture = r300DeleteTexture;
functions->IsTextureResident = driIsTextureResident;
functions->TexParameter = r300TexParameter;
- functions->CompressedTexImage2D = r300CompressedTexImage2D;
- functions->CompressedTexSubImage2D = r300CompressedTexSubImage2D;
+ functions->CompressedTexImage2D = radeonCompressedTexImage2D;
+ functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+
+ functions->GenerateMipmap = radeonGenerateMipmap;
driInitTextureFormats();
}
diff --git a/r300/r300_tex.h b/r300/r300_tex.h
index b86d45b..8a653ea 100644
--- a/r300/r300_tex.h
+++ b/r300/r300_tex.h
@@ -37,16 +37,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extern void r300SetDepthTexMode(struct gl_texture_object *tObj);
+extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
+ __DRIdrawable *dPriv);
+
+extern void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+ GLint format, __DRIdrawable *dPriv);
+
extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
unsigned long long offset, GLint depth,
GLuint pitch);
-extern void r300UpdateTextureState(GLcontext * ctx);
-
-extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t,
- GLuint face);
-
-extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t);
+extern GLboolean r300ValidateBuffers(GLcontext * ctx);
extern void r300InitTextureFuncs(struct dd_function_table *functions);
diff --git a/r300/r300_texmem.c b/r300/r300_texmem.c
deleted file mode 100644
index a89ab83..0000000
--- a/r300/r300_texmem.c
+++ /dev/null
@@ -1,568 +0,0 @@
-/**************************************************************************
-
-Copyright (C) Tungsten Graphics 2002. All Rights Reserved.
-The Weather Channel, Inc. funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86
-license. This notice must be preserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation on the rights to use, copy, modify, merge, publish,
-distribute, sub license, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-**************************************************************************/
-
-/**
- * \file
- *
- * \author Gareth Hughes <gareth@valinux.com>
- *
- * \author Kevin E. Martin <martin@valinux.com>
- */
-
-#include <errno.h>
-
-#include "main/glheader.h"
-#include "main/imports.h"
-#include "main/context.h"
-#include "main/colormac.h"
-#include "main/macros.h"
-#include "main/simple_list.h"
-#include "main/texobj.h"
-#include "radeon_reg.h" /* gets definition for usleep */
-#include "r300_context.h"
-#include "r300_state.h"
-#include "r300_cmdbuf.h"
-#include "radeon_ioctl.h"
-#include "r300_tex.h"
-#include "r300_ioctl.h"
-#include <unistd.h> /* for usleep() */
-
-#ifdef USER_BUFFERS
-#include "r300_mem.h"
-#endif
-
-/**
- * Destroy any device-dependent state associated with the texture. This may
- * include NULLing out hardware state that points to the texture.
- */
-void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t)
-{
- int i;
-
- if (RADEON_DEBUG & DEBUG_TEXTURE) {
- fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
- (void *)t, (void *)t->base.tObj);
- }
-
- for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
- if (rmesa->state.texture.unit[i].texobj == t->base.tObj) {
- _mesa_reference_texobj(&rmesa->state.texture.unit[i].texobj, NULL);
- }
- }
-}
-
-/* ------------------------------------------------------------
- * Texture image conversions
- */
-
-static void r300UploadGARTClientSubImage(r300ContextPtr rmesa,
- r300TexObjPtr t,
- struct gl_texture_image *texImage,
- GLint hwlevel,
- GLint x, GLint y,
- GLint width, GLint height)
-{
- const struct gl_texture_format *texFormat = texImage->TexFormat;
- GLuint srcPitch, dstPitch;
- int blit_format;
- int srcOffset;
-
- /*
- * XXX it appears that we always upload the full image, not a subimage.
- * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever
- * changed, the src pitch will have to change.
- */
- switch (texFormat->TexelBytes) {
- case 1:
- blit_format = R300_CP_COLOR_FORMAT_CI8;
- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
- break;
- case 2:
- blit_format = R300_CP_COLOR_FORMAT_RGB565;
- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
- break;
- case 4:
- blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
- break;
- case 8:
- case 16:
- blit_format = R300_CP_COLOR_FORMAT_CI8;
- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
- break;
- default:
- return;
- }
-
- t->image[0][hwlevel].data = texImage->Data;
- srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
-
- assert(srcOffset != ~0);
-
- /* Don't currently need to cope with small pitches?
- */
- width = texImage->Width;
- height = texImage->Height;
-
- if (texFormat->TexelBytes > 4) {
- width *= texFormat->TexelBytes;
- }
-
- r300EmitWait(rmesa, R300_WAIT_3D);
-
- r300EmitBlit(rmesa, blit_format,
- srcPitch,
- srcOffset,
- dstPitch,
- t->bufAddr,
- x,
- y,
- t->image[0][hwlevel].x + x,
- t->image[0][hwlevel].y + y, width, height);
-
- r300EmitWait(rmesa, R300_WAIT_2D);
-}
-
-static void r300UploadRectSubImage(r300ContextPtr rmesa,
- r300TexObjPtr t,
- struct gl_texture_image *texImage,
- GLint x, GLint y, GLint width, GLint height)
-{
- const struct gl_texture_format *texFormat = texImage->TexFormat;
- int blit_format, dstPitch, done;
-
- switch (texFormat->TexelBytes) {
- case 1:
- blit_format = R300_CP_COLOR_FORMAT_CI8;
- break;
- case 2:
- blit_format = R300_CP_COLOR_FORMAT_RGB565;
- break;
- case 4:
- blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
- break;
- case 8:
- case 16:
- blit_format = R300_CP_COLOR_FORMAT_CI8;
- break;
- default:
- return;
- }
-
- t->image[0][0].data = texImage->Data;
-
- /* Currently don't need to cope with small pitches.
- */
- width = texImage->Width;
- height = texImage->Height;
- dstPitch = t->pitch;
-
- if (texFormat->TexelBytes > 4) {
- width *= texFormat->TexelBytes;
- }
-
- if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
- /* In this case, could also use GART texturing. This is
- * currently disabled, but has been tested & works.
- */
- t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
- t->pitch = texImage->RowStride * texFormat->TexelBytes - 32;
-
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr,
- "Using GART texturing for rectangular client texture\n");
-
- /* Release FB memory allocated for this image:
- */
- /* FIXME This may not be correct as driSwapOutTextureObject sets
- * FIXME dirty_images. It may be fine, though.
- */
- if (t->base.memBlock) {
- driSwapOutTextureObject((driTextureObject *) t);
- }
- } else if (texImage->IsClientData) {
- /* Data already in GART memory, with usable pitch.
- */
- GLuint srcPitch;
- srcPitch = texImage->RowStride * texFormat->TexelBytes;
- r300EmitBlit(rmesa,
- blit_format,
- srcPitch,
- r300GartOffsetFromVirtual(rmesa, texImage->Data),
- dstPitch, t->bufAddr, 0, 0, 0, 0, width, height);
- } else {
- /* Data not in GART memory, or bad pitch.
- */
- for (done = 0; done < height;) {
- struct r300_dma_region region;
- int lines =
- MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch);
- int src_pitch;
- char *tex;
-
- src_pitch = texImage->RowStride * texFormat->TexelBytes;
-
- tex = (char *)texImage->Data + done * src_pitch;
-
- memset(&region, 0, sizeof(region));
- r300AllocDmaRegion(rmesa, &region, lines * dstPitch,
- 1024);
-
- /* Copy texdata to dma:
- */
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr,
- "%s: src_pitch %d dst_pitch %d\n",
- __FUNCTION__, src_pitch, dstPitch);
-
- if (src_pitch == dstPitch) {
- memcpy(region.address + region.start, tex,
- lines * src_pitch);
- } else {
- char *buf = region.address + region.start;
- int i;
- for (i = 0; i < lines; i++) {
- memcpy(buf, tex, src_pitch);
- buf += dstPitch;
- tex += src_pitch;
- }
- }
-
- r300EmitWait(rmesa, R300_WAIT_3D);
-
- /* Blit to framebuffer
- */
- r300EmitBlit(rmesa,
- blit_format,
- dstPitch, GET_START(&region),
- dstPitch | (t->tile_bits >> 16),
- t->bufAddr, 0, 0, 0, done, width, lines);
-
- r300EmitWait(rmesa, R300_WAIT_2D);
-#ifdef USER_BUFFERS
- r300_mem_use(rmesa, region.buf->id);
-#endif
-
- r300ReleaseDmaRegion(rmesa, &region, __FUNCTION__);
- done += lines;
- }
- }
-}
-
-/**
- * Upload the texture image associated with texture \a t at the specified
- * level at the address relative to \a start.
- */
-static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
- GLint hwlevel,
- GLint x, GLint y, GLint width, GLint height,
- GLuint face)
-{
- struct gl_texture_image *texImage = NULL;
- GLuint offset;
- GLint imageWidth, imageHeight;
- GLint ret;
- drm_radeon_texture_t tex;
- drm_radeon_tex_image_t tmp;
- const int level = hwlevel + t->base.firstLevel;
-
- if (RADEON_DEBUG & DEBUG_TEXTURE) {
- fprintf(stderr,
- "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
- __FUNCTION__, (void *)t, (void *)t->base.tObj, level,
- width, height, face);
- }
-
- ASSERT(face < 6);
-
- /* Ensure we have a valid texture to upload */
- if ((hwlevel < 0) || (hwlevel >= R300_MAX_TEXTURE_LEVELS)) {
- _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
- return;
- }
-
- texImage = t->base.tObj->Image[face][level];
-
- if (!texImage) {
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: texImage %d is NULL!\n",
- __FUNCTION__, level);
- return;
- }
- if (!texImage->Data) {
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: image data is NULL!\n",
- __FUNCTION__);
- return;
- }
-
- if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- assert(level == 0);
- assert(hwlevel == 0);
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: image data is rectangular\n",
- __FUNCTION__);
- r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height);
- return;
- } else if (texImage->IsClientData) {
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr,
- "%s: image data is in GART client storage\n",
- __FUNCTION__);
- r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y,
- width, height);
- return;
- } else if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: image data is in normal memory\n",
- __FUNCTION__);
-
- imageWidth = texImage->Width;
- imageHeight = texImage->Height;
-
- offset = t->bufAddr;
-
- if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
- GLint imageX = 0;
- GLint imageY = 0;
- GLint blitX = t->image[face][hwlevel].x;
- GLint blitY = t->image[face][hwlevel].y;
- GLint blitWidth = t->image[face][hwlevel].width;
- GLint blitHeight = t->image[face][hwlevel].height;
- fprintf(stderr, " upload image: %d,%d at %d,%d\n",
- imageWidth, imageHeight, imageX, imageY);
- fprintf(stderr, " upload blit: %d,%d at %d,%d\n",
- blitWidth, blitHeight, blitX, blitY);
- fprintf(stderr, " blit ofs: 0x%07x level: %d/%d\n",
- (GLuint) offset, hwlevel, level);
- }
-
- t->image[face][hwlevel].data = texImage->Data;
-
- /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
- * NOTE: we're always use a 1KB-wide blit and I8 texture format.
- * We used to use 1, 2 and 4-byte texels and used to use the texture
- * width to dictate the blit width - but that won't work for compressed
- * textures. (Brian)
- * NOTE: can't do that with texture tiling. (sroland)
- */
- tex.offset = offset;
- tex.image = &tmp;
- /* copy (x,y,width,height,data) */
- memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp));
-
- if (texImage->TexFormat->TexelBytes > 4) {
- const int log2TexelBytes =
- (3 + (texImage->TexFormat->TexelBytes >> 4));
- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
- tex.pitch =
- MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
- 64, 1);
- tex.height = imageHeight;
- tex.width = imageWidth << log2TexelBytes;
- tex.offset += (tmp.x << log2TexelBytes) & ~1023;
- tmp.x = tmp.x % (1024 >> log2TexelBytes);
- tmp.width = tmp.width << log2TexelBytes;
- } else if (texImage->TexFormat->TexelBytes) {
- /* use multi-byte upload scheme */
- tex.height = imageHeight;
- tex.width = imageWidth;
- switch (texImage->TexFormat->TexelBytes) {
- case 1:
- tex.format = RADEON_TXFORMAT_I8;
- break;
- case 2:
- tex.format = RADEON_TXFORMAT_AI88;
- break;
- case 4:
- tex.format = RADEON_TXFORMAT_ARGB8888;
- break;
- }
- tex.pitch =
- MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
- 64, 1);
- tex.offset += tmp.x & ~1023;
- tmp.x = tmp.x % 1024;
-
- if (t->tile_bits & R300_TXO_MICRO_TILE) {
- /* need something like "tiled coordinates" ? */
- tmp.y = tmp.x / (tex.pitch * 128) * 2;
- tmp.x =
- tmp.x % (tex.pitch * 128) / 2 /
- texImage->TexFormat->TexelBytes;
- tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
- } else {
- tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
- }
-#if 1
- if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
- (texImage->Width * texImage->TexFormat->TexelBytes >= 256)
- && ((!(t->tile_bits & R300_TXO_MICRO_TILE)
- && (texImage->Height >= 8))
- || (texImage->Height >= 16))) {
- /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
- OR if height is smaller than 8 automatically, but if micro tiling is active
- the limit is height 16 instead ? */
- tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
- }
-#endif
- } else {
- /* In case of for instance 8x8 texture (2x2 dxt blocks),
- padding after the first two blocks is needed (only
- with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
- /* set tex.height to 1/4 since 1 "macropixel" (dxt-block)
- has 4 real pixels. Needed so the kernel module reads
- the right amount of data. */
- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
- tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
- tex.height = (imageHeight + 3) / 4;
- tex.width = (imageWidth + 3) / 4;
- if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
- tex.width *= 8;
- } else {
- tex.width *= 16;
- }
- }
-
- LOCK_HARDWARE(&rmesa->radeon);
- do {
- ret =
- drmCommandWriteRead(rmesa->radeon.dri.fd,
- DRM_RADEON_TEXTURE, &tex,
- sizeof(drm_radeon_texture_t));
- if (ret) {
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr,
- "DRM_RADEON_TEXTURE: again!\n");
- usleep(1);
- }
- } while (ret == -EAGAIN);
-
- UNLOCK_HARDWARE(&rmesa->radeon);
-
- if (ret) {
- fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret);
- fprintf(stderr, " offset=0x%08x\n", offset);
- fprintf(stderr, " image width=%d height=%d\n",
- imageWidth, imageHeight);
- fprintf(stderr, " blit width=%d height=%d data=%p\n",
- t->image[face][hwlevel].width,
- t->image[face][hwlevel].height,
- t->image[face][hwlevel].data);
- _mesa_exit(-1);
- }
-}
-
-/**
- * Upload the texture images associated with texture \a t. This might
- * require the allocation of texture memory.
- *
- * \param rmesa Context pointer
- * \param t Texture to be uploaded
- * \param face Cube map face to be uploaded. Zero for non-cube maps.
- */
-
-int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
-{
- const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-
- if (t->image_override)
- return 0;
-
- if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
- fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
- (void *)rmesa->radeon.glCtx, (void *)t->base.tObj,
- t->base.totalSize, t->base.firstLevel,
- t->base.lastLevel);
- }
-
- if (t->base.totalSize == 0)
- return 0;
-
- if (RADEON_DEBUG & DEBUG_SYNC) {
- fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
- radeonFinish(rmesa->radeon.glCtx);
- }
-
- LOCK_HARDWARE(&rmesa->radeon);
-
- if (t->base.memBlock == NULL) {
- int heap;
-
- heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps,
- (driTextureObject *) t);
- if (heap == -1) {
- UNLOCK_HARDWARE(&rmesa->radeon);
- return -1;
- }
-
- /* Set the base offset of the texture image */
- t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap]
- + t->base.memBlock->ofs;
- t->offset = t->bufAddr;
-
- if (!(t->base.tObj->Image[0][0]->IsClientData)) {
- /* hope it's safe to add that here... */
- t->offset |= t->tile_bits;
- }
- }
-
- /* Let the world know we've used this memory recently.
- */
- driUpdateTextureLRU((driTextureObject *) t);
- UNLOCK_HARDWARE(&rmesa->radeon);
-
- /* Upload any images that are new */
- if (t->base.dirty_images[face]) {
- int i;
- for (i = 0; i < numLevels; i++) {
- if ((t->base.
- dirty_images[face] & (1 <<
- (i + t->base.firstLevel))) !=
- 0) {
- r300UploadSubImage(rmesa, t, i, 0, 0,
- t->image[face][i].width,
- t->image[face][i].height,
- face);
- }
- }
- t->base.dirty_images[face] = 0;
- }
-
- if (RADEON_DEBUG & DEBUG_SYNC) {
- fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
- radeonFinish(rmesa->radeon.glCtx);
- }
-
- return 0;
-}
diff --git a/r300/r300_texstate.c b/r300/r300_texstate.c
index f6ae4b6..f030451 100644
--- a/r300/r300_texstate.c
+++ b/r300/r300_texstate.c
@@ -43,11 +43,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/teximage.h"
#include "main/texobj.h"
#include "main/enums.h"
+#include "main/simple_list.h"
#include "r300_context.h"
#include "r300_state.h"
#include "r300_ioctl.h"
-#include "radeon_ioctl.h"
+#include "radeon_mipmap_tree.h"
#include "r300_tex.h"
#include "r300_reg.h"
@@ -117,7 +118,12 @@ static const struct tx_table {
_ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)),
_ASSIGN(Z16, R300_EASY_TX_FORMAT(X, X, X, X, X16)),
_ASSIGN(Z24_S8, R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8)),
+ _ASSIGN(S8_Z24, R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8)),
_ASSIGN(Z32, R300_EASY_TX_FORMAT(X, X, X, X, X32)),
+ /* EXT_texture_sRGB */
+ _ASSIGN(SRGBA8, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA),
+ _ASSIGN(SLA8, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA),
+ _ASSIGN(SL8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | R300_TX_FORMAT_GAMMA),
/* *INDENT-ON* */
};
@@ -143,13 +149,12 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
},
};
const GLuint *format;
- r300TexObjPtr t;
+ radeonTexObjPtr t;
if (!tObj)
return;
- t = (r300TexObjPtr) tObj->DriverData;
-
+ t = radeon_tex_obj(tObj);
switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
case MESA_FORMAT_Z16:
@@ -171,13 +176,13 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
switch (tObj->DepthMode) {
case GL_LUMINANCE:
- t->format = format[0];
+ t->pp_txformat = format[0];
break;
case GL_INTENSITY:
- t->format = format[1];
+ t->pp_txformat = format[1];
break;
case GL_ALPHA:
- t->format = format[2];
+ t->pp_txformat = format[2];
break;
default:
/* Error...which should have already been caught by higher
@@ -190,486 +195,307 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
/**
- * Compute sizes and fill in offset and blit information for the given
- * image (determined by \p face and \p level).
- *
- * \param curOffset points to the offset at which the image is to be stored
- * and is updated by this function according to the size of the image.
- */
-static void compute_tex_image_offset(
- struct gl_texture_object *tObj,
- GLuint face,
- GLint level,
- GLint* curOffset)
-{
- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
- const struct gl_texture_image* texImage;
- GLuint blitWidth = R300_BLIT_WIDTH_BYTES;
- GLuint texelBytes;
- GLuint size;
-
- texImage = tObj->Image[0][level + t->base.firstLevel];
- if (!texImage)
- return;
-
- texelBytes = texImage->TexFormat->TexelBytes;
-
- /* find image size in bytes */
- if (texImage->IsCompressed) {
- if ((t->format & R300_TX_FORMAT_DXT1) ==
- R300_TX_FORMAT_DXT1) {
- // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
- if ((texImage->Width + 3) < 8) /* width one block */
- size = texImage->CompressedSize * 4;
- else if ((texImage->Width + 3) < 16)
- size = texImage->CompressedSize * 2;
- else
- size = texImage->CompressedSize;
- } else {
- /* DXT3/5, 16 bytes per block */
- WARN_ONCE
- ("DXT 3/5 suffers from multitexturing problems!\n");
- // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
- if ((texImage->Width + 3) < 8)
- size = texImage->CompressedSize * 2;
- else
- size = texImage->CompressedSize;
- }
- } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- size =
- ((texImage->Width * texelBytes +
- 63) & ~63) * texImage->Height;
- blitWidth = 64 / texelBytes;
- } else if (t->tile_bits & R300_TXO_MICRO_TILE) {
- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
- though the actual offset may be different (if texture is less than
- 32 bytes width) to the untiled case */
- int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
- size =
- (w * ((texImage->Height + 1) / 2)) *
- texImage->Depth;
- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
- } else {
- int w = (texImage->Width * texelBytes + 31) & ~31;
- size = w * texImage->Height * texImage->Depth;
- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
- }
- assert(size > 0);
-
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
- texImage->Width, texImage->Height,
- texImage->Depth,
- texImage->TexFormat->TexelBytes,
- texImage->InternalFormat);
-
- /* All images are aligned to a 32-byte offset */
- *curOffset = (*curOffset + 0x1f) & ~0x1f;
-
- if (texelBytes) {
- /* fix x and y coords up later together with offset */
- t->image[face][level].x = *curOffset;
- t->image[face][level].y = 0;
- t->image[face][level].width =
- MIN2(size / texelBytes, blitWidth);
- t->image[face][level].height =
- (size / texelBytes) / t->image[face][level].width;
- } else {
- t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES;
- t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES;
- t->image[face][level].width =
- MIN2(size, R300_BLIT_WIDTH_BYTES);
- t->image[face][level].height = size / t->image[face][level].width;
- }
-
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr,
- "level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
- level, face, texImage->Width, texImage->Height,
- t->image[face][level].x, t->image[face][level].y,
- t->image[face][level].width, t->image[face][level].height,
- size, *curOffset);
-
- *curOffset += size;
-}
-
-
-
-/**
- * This function computes the number of bytes of storage needed for
- * the given texture object (all mipmap levels, all cube faces).
- * The \c image[face][level].x/y/width/height parameters for upload/blitting
- * are computed here. \c filter, \c format, etc. will be set here
- * too.
+ * Compute the cached hardware register values for the given texture object.
*
* \param rmesa Context pointer
- * \param tObj GL texture object whose images are to be posted to
- * hardware state.
+ * \param t the r300 texture object
*/
-static void r300SetTexImages(r300ContextPtr rmesa,
- struct gl_texture_object *tObj)
+static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
{
- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
- const struct gl_texture_image *baseImage =
- tObj->Image[0][tObj->BaseLevel];
- GLint curOffset;
- GLint i, texelBytes;
- GLint numLevels;
- GLint log2Width, log2Height, log2Depth;
-
- /* Set the hardware texture format
- */
+ const struct gl_texture_image *firstImage;
+ int firstlevel = t->mt ? t->mt->firstLevel : 0;
+
+ firstImage = t->base.Image[0][firstlevel];
+
if (!t->image_override
- && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) {
- if (baseImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
- r300SetDepthTexMode(tObj);
+ && VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
+ if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
+ r300SetDepthTexMode(&t->base);
} else {
- t->format = tx_table[baseImage->TexFormat->MesaFormat].format;
+ t->pp_txformat = tx_table[firstImage->TexFormat->MesaFormat].format;
}
- t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter;
+ t->pp_txfilter |= tx_table[firstImage->TexFormat->MesaFormat].filter;
} else if (!t->image_override) {
_mesa_problem(NULL, "unexpected texture format in %s",
__FUNCTION__);
return;
}
- texelBytes = baseImage->TexFormat->TexelBytes;
-
- /* Compute which mipmap levels we really want to send to the hardware.
- */
- driCalculateTextureFirstLastLevel((driTextureObject *) t);
- log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
- log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
- log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
-
- numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+ if (t->image_override && t->bo)
+ return;
- assert(numLevels <= R300_MAX_TEXTURE_LEVELS);
+ t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
+ | ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)
+ | ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)
+ | ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT));
- /* Calculate mipmap offsets and dimensions for blitting (uploading)
- * The idea is that we lay out the mipmap levels within a block of
- * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
- */
t->tile_bits = 0;
- /* figure out if this texture is suitable for tiling. */
-#if 0 /* Disabled for now */
- if (texelBytes) {
- if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
- /* texrect might be able to use micro tiling too in theory? */
- (baseImage->Height > 1)) {
-
- /* allow 32 (bytes) x 1 mip (which will use two times the space
- the non-tiled version would use) max if base texture is large enough */
- if ((numLevels == 1) ||
- (((baseImage->Width * texelBytes /
- baseImage->Height) <= 32)
- && (baseImage->Width * texelBytes > 64))
- ||
- ((baseImage->Width * texelBytes /
- baseImage->Height) <= 16)) {
- t->tile_bits |= R300_TXO_MICRO_TILE;
- }
- }
-
- if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
- /* we can set macro tiling even for small textures, they will be untiled anyway */
- t->tile_bits |= R300_TXO_MACRO_TILE;
- }
- }
-#endif
-
- curOffset = 0;
+ if (t->base.Target == GL_TEXTURE_CUBE_MAP)
+ t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP;
+ if (t->base.Target == GL_TEXTURE_3D)
+ t->pp_txformat |= R300_TX_FORMAT_3D;
- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
- ASSERT(log2Width == log2Height);
- t->format |= R300_TX_FORMAT_CUBIC_MAP;
- for(i = 0; i < numLevels; i++) {
- GLuint face;
- for(face = 0; face < 6; face++)
- compute_tex_image_offset(tObj, face, i, &curOffset);
- }
- } else {
- if (tObj->Target == GL_TEXTURE_3D)
- t->format |= R300_TX_FORMAT_3D;
-
- for (i = 0; i < numLevels; i++)
- compute_tex_image_offset(tObj, 0, i, &curOffset);
- }
-
- /* Align the total size of texture memory block.
- */
- t->base.totalSize =
- (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
-
- t->size =
- (((tObj->Image[0][t->base.firstLevel]->Width -
- 1) << R300_TX_WIDTHMASK_SHIFT)
- | ((tObj->Image[0][t->base.firstLevel]->Height - 1) <<
- R300_TX_HEIGHTMASK_SHIFT)
- | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) <<
- R300_TX_DEPTHMASK_SHIFT))
- | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
-
- t->pitch = 0;
-
- /* Only need to round to nearest 32 for textures, but the blitter
- * requires 64-byte aligned pitches, and we may/may not need the
- * blitter. NPOT only!
- */
- if (baseImage->IsCompressed) {
- t->pitch |=
- (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
- } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- unsigned int align = (64 / texelBytes) - 1;
- t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width *
- texelBytes) + 63) & ~(63);
- t->size |= R300_TX_SIZE_TXPITCH_EN;
+ if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+ unsigned int align = (64 / t->mt->bpp) - 1;
+ t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
if (!t->image_override)
- t->pitch_reg =
- (((tObj->Image[0][t->base.firstLevel]->Width) +
- align) & ~align) - 1;
- } else {
- t->pitch |=
- ((tObj->Image[0][t->base.firstLevel]->Width *
- texelBytes) + 63) & ~(63);
+ t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1;
}
if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- if (tObj->Image[0][t->base.firstLevel]->Width > 2048)
- t->pitch_reg |= R500_TXWIDTH_BIT11;
- if (tObj->Image[0][t->base.firstLevel]->Height > 2048)
- t->pitch_reg |= R500_TXHEIGHT_BIT11;
+ if (firstImage->Width > 2048)
+ t->pp_txpitch |= R500_TXWIDTH_BIT11;
+ if (firstImage->Height > 2048)
+ t->pp_txpitch |= R500_TXHEIGHT_BIT11;
}
}
-/* ================================================================
- * Texture unit state management
+/**
+ * Ensure the given texture is ready for rendering.
+ *
+ * Mostly this means populating the texture object's mipmap tree.
*/
-
-static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit)
+static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+ radeonTexObj *t = radeon_tex_obj(texObj);
- ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
-
- if (t->base.dirty_images[0]) {
- R300_FIREVERTICES(rmesa);
+ if (!radeon_validate_texture_miptree(ctx, texObj))
+ return GL_FALSE;
- r300SetTexImages(rmesa, tObj);
- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
- if (!t->base.memBlock && !t->image_override)
- return GL_FALSE;
- }
+ /* Configure the hardware registers (more precisely, the cached version
+ * of the hardware registers). */
+ setup_hardware_state(rmesa, t);
+ t->validated = GL_TRUE;
return GL_TRUE;
}
-static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
+/**
+ * Ensure all enabled and complete textures are uploaded along with any buffers being used.
+ */
+GLboolean r300ValidateBuffers(GLcontext * ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+ struct radeon_renderbuffer *rrb;
+ int i;
+ int ret;
- ASSERT(tObj->Target == GL_TEXTURE_3D);
+ radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
- /* r300 does not support mipmaps for 3D textures. */
- if ((tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR)) {
- return GL_FALSE;
+ rrb = radeon_get_colorbuffer(&rmesa->radeon);
+ /* color buffer */
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ rrb->bo, 0,
+ RADEON_GEM_DOMAIN_VRAM);
}
- if (t->base.dirty_images[0]) {
- R300_FIREVERTICES(rmesa);
- r300SetTexImages(rmesa, tObj);
- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
- if (!t->base.memBlock)
- return GL_FALSE;
+ /* depth buffer */
+ rrb = radeon_get_depthbuffer(&rmesa->radeon);
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ rrb->bo, 0,
+ RADEON_GEM_DOMAIN_VRAM);
}
+
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+ radeonTexObj *t;
- return GL_TRUE;
-}
-
-static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
- GLuint face;
-
- ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
-
- if (t->base.dirty_images[0] || t->base.dirty_images[1] ||
- t->base.dirty_images[2] || t->base.dirty_images[3] ||
- t->base.dirty_images[4] || t->base.dirty_images[5]) {
- /* flush */
- R300_FIREVERTICES(rmesa);
- /* layout memory space, once for all faces */
- r300SetTexImages(rmesa, tObj);
- }
+ if (!ctx->Texture.Unit[i]._ReallyEnabled)
+ continue;
- /* upload (per face) */
- for (face = 0; face < 6; face++) {
- if (t->base.dirty_images[face]) {
- r300UploadTexImages(rmesa,
- (r300TexObjPtr) tObj->DriverData,
- face);
+ if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) {
+ _mesa_warning(ctx,
+ "failed to validate texture for unit %d.\n",
+ i);
}
+ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+ if (t->image_override && t->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ t->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+ else if (t->mt->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ t->mt->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
}
- if (!t->base.memBlock) {
- /* texmem alloc failed, use s/w fallback */
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
return GL_FALSE;
- }
-
return GL_TRUE;
}
-static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-
- ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
-
- if (t->base.dirty_images[0]) {
- R300_FIREVERTICES(rmesa);
-
- r300SetTexImages(rmesa, tObj);
- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
- if (!t->base.memBlock && !t->image_override &&
- !rmesa->prefer_gart_client_texturing)
- return GL_FALSE;
- }
-
- return GL_TRUE;
-}
-
-static GLboolean r300UpdateTexture(GLcontext * ctx, int unit)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_ReallyEnabled ?
- texUnit->_Current : NULL;
- r300TexObjPtr t = tObj ? (r300TexObjPtr) tObj->DriverData : NULL;
-
- /* Fallback if there's a texture border */
- if (tObj && tObj->Image[0][tObj->BaseLevel]->Border > 0) {
- tObj = NULL;
- t = NULL;
- }
-
- /* Update state if this is a different texture object to last
- * time.
- */
- if (rmesa->state.texture.unit[unit].texobj != tObj) {
- if (rmesa->state.texture.unit[unit].texobj != NULL) {
- r300TexObjPtr t_old = (r300TexObjPtr) rmesa->state.texture.unit[unit].texobj->DriverData;
-
- /* The old texture is no longer bound to this texture unit.
- * Mark it as such.
- */
-
- t_old->base.bound &= ~(1 << unit);
- }
-
- _mesa_reference_texobj(&rmesa->state.texture.unit[unit].texobj, tObj);
-
- if (t) {
- t->base.bound |= (1 << unit);
- driUpdateTextureLRU(&t->base); /* XXX: should be locked! */
- }
- }
-
- return !t || !t->border_fallback;
-}
-
void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
unsigned long long offset, GLint depth, GLuint pitch)
{
r300ContextPtr rmesa = pDRICtx->driverPrivate;
struct gl_texture_object *tObj =
_mesa_lookup_texture(rmesa->radeon.glCtx, texname);
- r300TexObjPtr t;
+ radeonTexObjPtr t = radeon_tex_obj(tObj);
uint32_t pitch_val;
if (!tObj)
return;
- t = (r300TexObjPtr) tObj->DriverData;
-
t->image_override = GL_TRUE;
if (!offset)
return;
- t->offset = offset;
- t->pitch_reg &= (1 << 13) -1;
+ t->bo = NULL;
+ t->override_offset = offset;
+ t->pp_txpitch &= (1 << 13) -1;
pitch_val = pitch;
switch (depth) {
case 32:
- t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
- t->filter |= tx_table[2].filter;
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+ t->pp_txfilter |= tx_table[2].filter;
pitch_val /= 4;
break;
case 24:
default:
- t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
- t->filter |= tx_table[4].filter;
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+ t->pp_txfilter |= tx_table[4].filter;
pitch_val /= 4;
break;
case 16:
- t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
- t->filter |= tx_table[5].filter;
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+ t->pp_txfilter |= tx_table[5].filter;
pitch_val /= 2;
break;
}
pitch_val--;
- t->pitch_reg |= pitch_val;
+ t->pp_txpitch |= pitch_val;
}
-static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit)
+void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv)
{
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-
- if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) {
- return (r300EnableTextureRect(ctx, unit) &&
- r300UpdateTexture(ctx, unit));
- } else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
- return (r300EnableTexture2D(ctx, unit) &&
- r300UpdateTexture(ctx, unit));
- } else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) {
- return (r300EnableTexture3D(ctx, unit) &&
- r300UpdateTexture(ctx, unit));
- } else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) {
- return (r300EnableTextureCube(ctx, unit) &&
- r300UpdateTexture(ctx, unit));
- } else if (texUnit->_ReallyEnabled) {
- return GL_FALSE;
- } else {
- return r300UpdateTexture(ctx, unit);
+ struct gl_texture_unit *texUnit;
+ struct gl_texture_object *texObj;
+ struct gl_texture_image *texImage;
+ struct radeon_renderbuffer *rb;
+ radeon_texture_image *rImage;
+ radeonContextPtr radeon;
+ r300ContextPtr rmesa;
+ struct radeon_framebuffer *rfb;
+ radeonTexObjPtr t;
+ uint32_t pitch_val;
+ uint32_t internalFormat, type, format;
+
+ type = GL_BGRA;
+ format = GL_UNSIGNED_BYTE;
+ internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4);
+
+ radeon = pDRICtx->driverPrivate;
+ rmesa = pDRICtx->driverPrivate;
+
+ rfb = dPriv->driverPrivate;
+ texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
+ texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
+ texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
+
+ rImage = get_radeon_texture_image(texImage);
+ t = radeon_tex_obj(texObj);
+ if (t == NULL) {
+ return;
+ }
+
+ radeon_update_renderbuffers(pDRICtx, dPriv);
+ /* back & depth buffer are useless free them right away */
+ rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
+ if (rb && rb->bo) {
+ radeon_bo_unref(rb->bo);
+ rb->bo = NULL;
+ }
+ rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+ if (rb && rb->bo) {
+ radeon_bo_unref(rb->bo);
+ rb->bo = NULL;
}
+ rb = rfb->color_rb[0];
+ if (rb->bo == NULL) {
+ /* Failed to BO for the buffer */
+ return;
+ }
+
+ _mesa_lock_texture(radeon->glCtx, texObj);
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+ if (rImage->bo) {
+ radeon_bo_unref(rImage->bo);
+ rImage->bo = NULL;
+ }
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = NULL;
+ }
+ if (rImage->mt) {
+ radeon_miptree_unreference(rImage->mt);
+ rImage->mt = NULL;
+ }
+ _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+ rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
+ texImage->RowStride = rb->pitch / rb->cpp;
+ texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
+ internalFormat,
+ type, format, 0);
+ rImage->bo = rb->bo;
+ radeon_bo_ref(rImage->bo);
+ t->bo = rb->bo;
+ radeon_bo_ref(t->bo);
+ t->tile_bits = 0;
+ t->image_override = GL_TRUE;
+ t->override_offset = 0;
+ t->pp_txpitch &= (1 << 13) -1;
+ pitch_val = rb->pitch;
+ switch (rb->cpp) {
+ case 4:
+ if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT)
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+ else
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+ t->pp_txfilter |= tx_table[2].filter;
+ pitch_val /= 4;
+ break;
+ case 3:
+ default:
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+ t->pp_txfilter |= tx_table[4].filter;
+ pitch_val /= 4;
+ break;
+ case 2:
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+ t->pp_txfilter |= tx_table[5].filter;
+ pitch_val /= 2;
+ break;
+ }
+ pitch_val--;
+ t->pp_txsize = ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT) |
+ ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT);
+ t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
+ t->pp_txpitch |= pitch_val;
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ if (rb->base.Width > 2048)
+ t->pp_txpitch |= R500_TXWIDTH_BIT11;
+ if (rb->base.Height > 2048)
+ t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+ }
+ t->validated = GL_TRUE;
+ _mesa_unlock_texture(radeon->glCtx, texObj);
+ return;
}
-void r300UpdateTextureState(GLcontext * ctx)
+void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
{
- int i;
-
- for (i = 0; i < 8; i++) {
- if (!r300UpdateTextureUnit(ctx, i)) {
- _mesa_warning(ctx,
- "failed to update texture state for unit %d.\n",
- i);
- }
- }
+ r300SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
}
diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c
index 146daa3..2f7b67c 100644
--- a/r300/r300_vertprog.c
+++ b/r300/r300_vertprog.c
@@ -32,1437 +32,373 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
+#include "shader/programopt.h"
#include "shader/prog_instruction.h"
+#include "shader/prog_optimize.h"
#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
#include "shader/prog_statevars.h"
#include "tnl/tnl.h"
+#include "compiler/radeon_compiler.h"
+#include "compiler/radeon_nqssadce.h"
#include "r300_context.h"
+#include "r300_fragprog_common.h"
+#include "r300_state.h"
-/* TODO: Get rid of t_src_class call */
-#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
- ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
- t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
- (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
- t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
-
-/*
- * Take an already-setup and valid source then swizzle it appropriately to
- * obtain a constant ZERO or ONE source.
+/**
+ * Write parameter array for the given vertex program into dst.
+ * Return the total number of components written.
*/
-#define __CONST(x, y) \
- (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
- t_swizzle(y), \
- t_swizzle(y), \
- t_swizzle(y), \
- t_swizzle(y), \
- t_src_class(src[x].File), \
- VSF_FLAG_NONE) | (src[x].RelAddr << 4))
-
-#define FREE_TEMPS() \
- do { \
- int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
- if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
- WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
- vp->native = GL_FALSE; \
- } \
- u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
- } while (0)
-
-int r300VertexProgUpdateParams(GLcontext * ctx,
- struct r300_vertex_program_cont *vp, float *dst)
+static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
{
- int pi;
- struct gl_vertex_program *mesa_vp = &vp->mesa_program;
- float *dst_o = dst;
- struct gl_program_parameter_list *paramList;
+ int i;
- if (mesa_vp->IsNVProgram) {
+ if (vp->Base->IsNVProgram) {
_mesa_load_tracked_matrices(ctx);
-
- for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
- *dst++ = ctx->VertexProgram.Parameters[pi][0];
- *dst++ = ctx->VertexProgram.Parameters[pi][1];
- *dst++ = ctx->VertexProgram.Parameters[pi][2];
- *dst++ = ctx->VertexProgram.Parameters[pi][3];
+ } else {
+ if (vp->Base->Base.Parameters) {
+ _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
}
- return dst - dst_o;
}
- assert(mesa_vp->Base.Parameters);
- _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
-
- if (mesa_vp->Base.Parameters->NumParameters * 4 >
- VSF_MAX_FRAGMENT_LENGTH) {
+ if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) {
+ /* Should have checked this earlier... */
fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
_mesa_exit(-1);
}
- paramList = mesa_vp->Base.Parameters;
- for (pi = 0; pi < paramList->NumParameters; pi++) {
- switch (paramList->Parameters[pi].Type) {
- case PROGRAM_STATE_VAR:
- case PROGRAM_NAMED_PARAM:
- //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
- case PROGRAM_CONSTANT:
- *dst++ = paramList->ParameterValues[pi][0];
- *dst++ = paramList->ParameterValues[pi][1];
- *dst++ = paramList->ParameterValues[pi][2];
- *dst++ = paramList->ParameterValues[pi][3];
- break;
- default:
- _mesa_problem(NULL, "Bad param type in %s",
- __FUNCTION__);
- }
-
- }
-
- return dst - dst_o;
-}
-
-static unsigned long t_dst_mask(GLuint mask)
-{
- /* WRITEMASK_* is equivalent to VSF_FLAG_* */
- return mask & VSF_FLAG_ALL;
-}
-
-static unsigned long t_dst_class(gl_register_file file)
-{
-
- switch (file) {
- case PROGRAM_TEMPORARY:
- return PVS_DST_REG_TEMPORARY;
- case PROGRAM_OUTPUT:
- return PVS_DST_REG_OUT;
- case PROGRAM_ADDRESS:
- return PVS_DST_REG_A0;
- /*
- case PROGRAM_INPUT:
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_STATE_VAR:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
- }
-}
-
-static unsigned long t_dst_index(struct r300_vertex_program *vp,
- struct prog_dst_register *dst)
-{
- if (dst->File == PROGRAM_OUTPUT)
- return vp->outputs[dst->Index];
-
- return dst->Index;
-}
-
-static unsigned long t_src_class(gl_register_file file)
-{
- switch (file) {
- case PROGRAM_TEMPORARY:
- return PVS_SRC_REG_TEMPORARY;
- case PROGRAM_INPUT:
- return PVS_SRC_REG_INPUT;
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_CONSTANT:
- case PROGRAM_STATE_VAR:
- return PVS_SRC_REG_CONSTANT;
- /*
- case PROGRAM_OUTPUT:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
- }
-}
+ for(i = 0; i < vp->code.constants.Count; ++i) {
+ const float * src = 0;
+ const struct rc_constant * constant = &vp->code.constants.Constants[i];
-static INLINE unsigned long t_swizzle(GLubyte swizzle)
-{
-/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
- return swizzle;
-}
+ switch(constant->Type) {
+ case RC_CONSTANT_EXTERNAL:
+ if (vp->Base->IsNVProgram) {
+ src = ctx->VertexProgram.Parameters[constant->u.External];
+ } else {
+ src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
+ }
+ break;
-#if 0
-static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
-{
- int i;
+ case RC_CONSTANT_IMMEDIATE:
+ src = constant->u.Immediate;
+ break;
+ }
- if (vp == NULL) {
- fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
- caller);
- return;
+ dst[4*i] = src[0];
+ dst[4*i + 1] = src[1];
+ dst[4*i + 2] = src[2];
+ dst[4*i + 3] = src[3];
}
- fprintf(stderr, "%s:<", caller);
- for (i = 0; i < VERT_ATTRIB_MAX; i++)
- fprintf(stderr, "%d ", vp->inputs[i]);
- fprintf(stderr, ">\n");
-
+ return 4 * vp->code.constants.Count;
}
-#endif
-static unsigned long t_src_index(struct r300_vertex_program *vp,
- struct prog_src_register *src)
+static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
{
+ GLbitfield outputs = 0;
int i;
- int max_reg = -1;
-
- if (src->File == PROGRAM_INPUT) {
- if (vp->inputs[src->Index] != -1)
- return vp->inputs[src->Index];
-
- for (i = 0; i < VERT_ATTRIB_MAX; i++)
- if (vp->inputs[i] > max_reg)
- max_reg = vp->inputs[i];
- vp->inputs[src->Index] = max_reg + 1;
-
- //vp_dump_inputs(vp, __FUNCTION__);
-
- return vp->inputs[src->Index];
- } else {
- if (src->Index < 0) {
- fprintf(stderr,
- "negative offsets for indirect addressing do not work.\n");
- return 0;
- }
- return src->Index;
- }
-}
-
-/* these two functions should probably be merged... */
-
-static unsigned long t_src(struct r300_vertex_program *vp,
- struct prog_src_register *src)
-{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
- * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
- */
- return PVS_SRC_OPERAND(t_src_index(vp, src),
- t_swizzle(GET_SWZ(src->Swizzle, 0)),
- t_swizzle(GET_SWZ(src->Swizzle, 1)),
- t_swizzle(GET_SWZ(src->Swizzle, 2)),
- t_swizzle(GET_SWZ(src->Swizzle, 3)),
- t_src_class(src->File),
- src->Negate) | (src->RelAddr << 4);
-}
+#define ADD_OUTPUT(fp_attr, vp_result) \
+ do { \
+ if (fpreads & (1 << (fp_attr))) \
+ outputs |= (1 << (vp_result)); \
+ } while (0)
-static unsigned long t_src_scalar(struct r300_vertex_program *vp,
- struct prog_src_register *src)
-{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
- * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
- */
- return PVS_SRC_OPERAND(t_src_index(vp, src),
- t_swizzle(GET_SWZ(src->Swizzle, 0)),
- t_swizzle(GET_SWZ(src->Swizzle, 0)),
- t_swizzle(GET_SWZ(src->Swizzle, 0)),
- t_swizzle(GET_SWZ(src->Swizzle, 0)),
- t_src_class(src->File),
- src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src->RelAddr << 4);
-}
+ ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
+ ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
-static GLboolean valid_dst(struct r300_vertex_program *vp,
- struct prog_dst_register *dst)
-{
- if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
- return GL_FALSE;
- } else if (dst->File == PROGRAM_ADDRESS) {
- assert(dst->Index == 0);
+ for (i = 0; i <= 7; ++i) {
+ ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
}
- return GL_TRUE;
-}
-
-static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
-
- inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
- t_src_class(src[0].File),
- (!src[0].
- Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[3] = 0;
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
-
- inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
- SWIZZLE_ZERO,
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[2] =
- PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
- t_src_class(src[1].File),
- src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
- inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
- PVS_SRC_SELECT_FORCE_1,
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3],
- int *u_temp_i)
-{
- /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
- ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
-
- inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
- GL_FALSE,
- GL_FALSE,
- *u_temp_i,
- t_dst_mask(vpi->DstReg.WriteMask),
- PVS_DST_REG_TEMPORARY);
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
- inst += 4;
-
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = PVS_SRC_OPERAND(*u_temp_i,
- PVS_SRC_SELECT_X,
- PVS_SRC_SELECT_Y,
- PVS_SRC_SELECT_Z,
- PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
- /* Not 100% sure about this */
- (!src[0].
- Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE
- /*VSF_FLAG_ALL */ );
- inst[3] = __CONST(0, SWIZZLE_ZERO);
- (*u_temp_i)--;
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
-
- inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
-
- inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- /* NOTE: Users swizzling might not work. */
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- PVS_SRC_SELECT_FORCE_0, // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- PVS_SRC_SELECT_FORCE_0, // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- PVS_SRC_SELECT_FORCE_0, // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
- GL_FALSE,
- GL_TRUE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = t_src(vp, &src[2]);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
-
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = t_src_scalar(vp, &src[1]);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
+#undef ADD_OUTPUT
-static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
+ if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
+ (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
+ outputs |= 1 << VERT_RESULT_BFC0;
+ if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
+ (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
+ outputs |= 1 << VERT_RESULT_BFC1;
-static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
-
-#if 0
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
- t_src_class(src[1].File),
- (!src[1].
- Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
- inst[3] = 0;
-#else
- inst[0] =
- PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ONE);
- inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
- t_src_class(src[1].File),
- (!src[1].
- Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
-#endif
-
- return inst;
-}
+ outputs |= 1 << VERT_RESULT_HPOS;
+ if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ outputs |= 1 << VERT_RESULT_PSIZ;
-static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
-
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
+ return outputs;
}
-static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3],
- int *u_temp_i)
-{
- /* mul r0, r1.yzxw, r2.zxyw
- mad r0, -r2.yzxw, r1.zxyw, r0
- */
- inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
- *u_temp_i,
- t_dst_mask(vpi->DstReg.WriteMask),
- PVS_DST_REG_TEMPORARY);
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
- t_src_class(src[1].File),
- src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
- inst += 4;
-
- inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
- t_src_class(src[1].File),
- (!src[1].
- Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[3] =
- PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
- PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
- PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
-
- (*u_temp_i)--;
-
- return inst;
-}
-
-static void t_inputs_outputs(struct r300_vertex_program *vp)
+static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
{
int i;
- int cur_reg = 0;
+ int cur_reg;
+ GLuint OutputsWritten, InputsRead;
- for (i = 0; i < VERT_ATTRIB_MAX; i++)
- vp->inputs[i] = -1;
+ OutputsWritten = c->Base.Program.OutputsWritten;
+ InputsRead = c->Base.Program.InputsRead;
+
+ cur_reg = -1;
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (InputsRead & (1 << i))
+ c->code->inputs[i] = ++cur_reg;
+ else
+ c->code->inputs[i] = -1;
+ }
+ cur_reg = 0;
for (i = 0; i < VERT_RESULT_MAX; i++)
- vp->outputs[i] = -1;
+ c->code->outputs[i] = -1;
- assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
+ assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
- vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
+ if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
+ c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
- vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+ if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+ c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
- vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+ /* If we're writing back facing colors we need to send
+ * four colors to make front/back face colors selection work.
+ * If the vertex program doesn't write all 4 colors, lets
+ * pretend it does by skipping output index reg so the colors
+ * get written into appropriate output vectors.
+ */
+ if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
+ c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
- vp->outputs[VERT_RESULT_COL1] =
- vp->outputs[VERT_RESULT_COL0] + 1;
- cur_reg = vp->outputs[VERT_RESULT_COL1] + 1;
+ if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
+ c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
- vp->outputs[VERT_RESULT_BFC0] =
- vp->outputs[VERT_RESULT_COL0] + 2;
- cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2;
+ if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- vp->outputs[VERT_RESULT_BFC1] =
- vp->outputs[VERT_RESULT_COL0] + 3;
- cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
+ if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ cur_reg++;
}
for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
- if (vp->key.OutputsWritten & (1 << i)) {
- vp->outputs[i] = cur_reg++;
+ if (OutputsWritten & (1 << i)) {
+ c->code->outputs[i] = cur_reg++;
}
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
- vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+ if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+ c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
}
}
-static void r300TranslateVertexShader(struct r300_vertex_program *vp,
- struct prog_instruction *vpi)
-{
- int i;
- GLuint *inst;
- unsigned long num_operands;
- /* Initial value should be last tmp reg that hw supports.
- Strangely enough r300 doesnt mind even though these would be out of range.
- Smart enough to realize that it doesnt need it? */
- int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
- struct prog_src_register src[3];
-
- vp->pos_end = 0; /* Not supported yet */
- vp->program.length = 0;
- /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
- vp->translated = GL_TRUE;
- vp->native = GL_TRUE;
-
- t_inputs_outputs(vp);
-
- for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END;
- vpi++, inst += 4) {
-
- FREE_TEMPS();
-
- if (!valid_dst(vp, &vpi->DstReg)) {
- /* redirect result to unused temp */
- vpi->DstReg.File = PROGRAM_TEMPORARY;
- vpi->DstReg.Index = u_temp_i;
- }
-
- num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
-
- /* copy the sources (src) from mesa into a local variable... is this needed? */
- for (i = 0; i < num_operands; i++) {
- src[i] = vpi->SrcReg[i];
- }
-
- if (num_operands == 3) { /* TODO: scalars */
- if (CMP_SRCS(src[1], src[2])
- || CMP_SRCS(src[0], src[2])) {
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- u_temp_i,
- VSF_FLAG_ALL,
- PVS_DST_REG_TEMPORARY);
- inst[1] =
- PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
- SWIZZLE_X,
- SWIZZLE_Y,
- SWIZZLE_Z,
- SWIZZLE_W,
- t_src_class(src[2].File),
- VSF_FLAG_NONE) | (src[2].
- RelAddr <<
- 4);
- inst[2] = __CONST(2, SWIZZLE_ZERO);
- inst[3] = __CONST(2, SWIZZLE_ZERO);
- inst += 4;
-
- src[2].File = PROGRAM_TEMPORARY;
- src[2].Index = u_temp_i;
- src[2].RelAddr = 0;
- u_temp_i--;
- }
- }
-
- if (num_operands >= 2) {
- if (CMP_SRCS(src[1], src[0])) {
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- u_temp_i,
- VSF_FLAG_ALL,
- PVS_DST_REG_TEMPORARY);
- inst[1] =
- PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- SWIZZLE_X,
- SWIZZLE_Y,
- SWIZZLE_Z,
- SWIZZLE_W,
- t_src_class(src[0].File),
- VSF_FLAG_NONE) | (src[0].
- RelAddr <<
- 4);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
- inst += 4;
-
- src[0].File = PROGRAM_TEMPORARY;
- src[0].Index = u_temp_i;
- src[0].RelAddr = 0;
- u_temp_i--;
- }
- }
-
- switch (vpi->Opcode) {
- case OPCODE_ABS:
- inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
- break;
- case OPCODE_ADD:
- inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
- break;
- case OPCODE_ARL:
- inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
- break;
- case OPCODE_DP3:
- inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
- break;
- case OPCODE_DP4:
- inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
- break;
- case OPCODE_DPH:
- inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
- break;
- case OPCODE_DST:
- inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
- break;
- case OPCODE_EX2:
- inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
- break;
- case OPCODE_EXP:
- inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
- break;
- case OPCODE_FLR:
- inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */
- &u_temp_i);
- break;
- case OPCODE_FRC:
- inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
- break;
- case OPCODE_LG2:
- inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
- break;
- case OPCODE_LIT:
- inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
- break;
- case OPCODE_LOG:
- inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
- break;
- case OPCODE_MAD:
- inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
- break;
- case OPCODE_MAX:
- inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
- break;
- case OPCODE_MIN:
- inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
- break;
- case OPCODE_MOV:
- inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
- break;
- case OPCODE_MUL:
- inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
- break;
- case OPCODE_POW:
- inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
- break;
- case OPCODE_RCP:
- inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
- break;
- case OPCODE_RSQ:
- inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
- break;
- case OPCODE_SGE:
- inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
- break;
- case OPCODE_SLT:
- inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
- break;
- case OPCODE_SUB:
- inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
- break;
- case OPCODE_SWZ:
- inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
- break;
- case OPCODE_XPD:
- inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */
- &u_temp_i);
- break;
- default:
- assert(0);
- break;
- }
- }
-
- /* Some outputs may be artificially added, to match the inputs
- of the fragment program. Blank the outputs here. */
- for (i = 0; i < VERT_RESULT_MAX; i++) {
- if (vp->key.OutputsAdded & (1 << i)) {
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- vp->outputs[i],
- VSF_FLAG_ALL,
- PVS_DST_REG_OUT);
- inst[1] = __CONST(0, SWIZZLE_ZERO);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
- inst += 4;
- }
+/**
+ * The NV_vertex_program spec mandates that all registers be
+ * initialized to zero. We do this here unconditionally.
+ *
+ * \note We rely on dead-code elimination in the compiler.
+ */
+static void initialize_NV_registers(struct radeon_compiler * compiler)
+{
+ unsigned int reg;
+ struct rc_instruction * inst;
+
+ for(reg = 0; reg < 12; ++reg) {
+ inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = reg;
+ inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_0000;
}
- vp->program.length = (inst - vp->program.body.i);
- if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) {
- vp->program.length = 0;
- vp->native = GL_FALSE;
- }
-#if 0
- fprintf(stderr, "hw program:\n");
- for (i = 0; i < vp->program.length; i++)
- fprintf(stderr, "%08x\n", vp->program.body.d[i]);
-#endif
+ inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
+ inst->I.Opcode = OPCODE_ARL;
+ inst->I.DstReg.File = PROGRAM_ADDRESS;
+ inst->I.DstReg.Index = 0;
+ inst->I.DstReg.WriteMask = WRITEMASK_X;
+ inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_0000;
}
-/* DP4 version seems to trigger some hw peculiarity */
-//#define PREFER_DP4
-
-static void position_invariant(struct gl_program *prog)
+static struct r300_vertex_program *build_program(GLcontext *ctx,
+ struct r300_vertex_program_key *wanted_key,
+ const struct gl_vertex_program *mesa_vp)
{
- struct prog_instruction *vpi;
- struct gl_program_parameter_list *paramList;
- int i;
-
- gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
-
- /* tokens[4] = matrix modifier */
-#ifdef PREFER_DP4
- tokens[4] = 0; /* not transposed or inverted */
-#else
- tokens[4] = STATE_MATRIX_TRANSPOSE;
-#endif
- paramList = prog->Parameters;
-
- vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
- _mesa_init_instructions(vpi, prog->NumInstructions + 4);
-
- for (i = 0; i < 4; i++) {
- GLint idx;
- tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
- idx = _mesa_add_state_reference(paramList, tokens);
-#ifdef PREFER_DP4
- vpi[i].Opcode = OPCODE_DP4;
- vpi[i].StringPos = 0;
- vpi[i].Data = 0;
-
- vpi[i].DstReg.File = PROGRAM_OUTPUT;
- vpi[i].DstReg.Index = VERT_RESULT_HPOS;
- vpi[i].DstReg.WriteMask = 1 << i;
- vpi[i].DstReg.CondMask = COND_TR;
-
- vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
- vpi[i].SrcReg[0].Index = idx;
- vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- vpi[i].SrcReg[1].File = PROGRAM_INPUT;
- vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
- vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
-#else
- if (i == 0)
- vpi[i].Opcode = OPCODE_MUL;
- else
- vpi[i].Opcode = OPCODE_MAD;
-
- vpi[i].Data = 0;
-
- if (i == 3)
- vpi[i].DstReg.File = PROGRAM_OUTPUT;
- else
- vpi[i].DstReg.File = PROGRAM_TEMPORARY;
- vpi[i].DstReg.Index = 0;
- vpi[i].DstReg.WriteMask = 0xf;
- vpi[i].DstReg.CondMask = COND_TR;
-
- vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
- vpi[i].SrcReg[0].Index = idx;
- vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- vpi[i].SrcReg[1].File = PROGRAM_INPUT;
- vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
- vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
-
- if (i > 0) {
- vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
- vpi[i].SrcReg[2].Index = 0;
- vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
- }
-#endif
- }
-
- _mesa_copy_instructions(&vpi[i], prog->Instructions,
- prog->NumInstructions);
-
- free(prog->Instructions);
-
- prog->Instructions = vpi;
-
- prog->NumInstructions += 4;
- vpi = &prog->Instructions[prog->NumInstructions - 1];
+ struct r300_vertex_program *vp;
+ struct r300_vertex_program_compiler compiler;
- assert(vpi->Opcode == OPCODE_END);
-}
+ vp = _mesa_calloc(sizeof(*vp));
+ vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
+ _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
-static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
- GLuint temp_index)
-{
- struct prog_instruction *vpi;
- struct prog_instruction *vpi_insert;
- int i = 0;
+ rc_init(&compiler.Base);
+ compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;
- vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
- _mesa_init_instructions(vpi, prog->NumInstructions + 2);
- /* all but END */
- _mesa_copy_instructions(vpi, prog->Instructions,
- prog->NumInstructions - 1);
- /* END */
- _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
- &prog->Instructions[prog->NumInstructions - 1],
- 1);
- vpi_insert = &vpi[prog->NumInstructions - 1];
+ compiler.code = &vp->code;
+ compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
+ compiler.SetHwInputOutput = &t_inputs_outputs;
- vpi_insert[i].Opcode = OPCODE_MOV;
+ if (compiler.Base.Debug) {
+ fprintf(stderr, "Initial vertex program:\n");
+ _mesa_print_program(&vp->Base->Base);
+ fflush(stderr);
+ }
- vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
- vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
- vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
- vpi_insert[i].DstReg.CondMask = COND_TR;
+ if (mesa_vp->IsPositionInvariant) {
+ _mesa_insert_mvp_code(ctx, vp->Base);
+ }
- vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi_insert[i].SrcReg[0].Index = temp_index;
- vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
- i++;
+ rc_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
- vpi_insert[i].Opcode = OPCODE_MOV;
+ if (mesa_vp->IsNVProgram)
+ initialize_NV_registers(&compiler.Base);
- vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
- vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
- vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
- vpi_insert[i].DstReg.CondMask = COND_TR;
+ rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
- vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi_insert[i].SrcReg[0].Index = temp_index;
- vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
- i++;
+ if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
+ rc_copy_output(&compiler.Base,
+ VERT_RESULT_HPOS,
+ vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
+ }
- free(prog->Instructions);
+ if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
+ rc_move_output(&compiler.Base,
+ VERT_RESULT_FOGC,
+ vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
+ }
- prog->Instructions = vpi;
+ r3xx_compile_vertex_program(&compiler);
+ vp->error = compiler.Base.Error;
- prog->NumInstructions += i;
- vpi = &prog->Instructions[prog->NumInstructions - 1];
+ vp->Base->Base.InputsRead = vp->code.InputsRead;
+ vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
- assert(vpi->Opcode == OPCODE_END);
-}
+ rc_destroy(&compiler.Base);
-static void pos_as_texcoord(struct r300_vertex_program *vp,
- struct gl_program *prog)
-{
- struct prog_instruction *vpi;
- GLuint tempregi = prog->NumTemporaries;
- /* should do something else if no temps left... */
- prog->NumTemporaries++;
-
- for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
- if (vpi->DstReg.File == PROGRAM_OUTPUT
- && vpi->DstReg.Index == VERT_RESULT_HPOS) {
- vpi->DstReg.File = PROGRAM_TEMPORARY;
- vpi->DstReg.Index = tempregi;
- }
- }
- insert_wpos(vp, prog, tempregi);
+ return vp;
}
-static struct r300_vertex_program *build_program(struct r300_vertex_program_key
- *wanted_key, struct gl_vertex_program
- *mesa_vp, GLint wpos_idx)
+struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_program_key wanted_key = { 0 };
+ struct r300_vertex_program_cont *vpc;
struct r300_vertex_program *vp;
- vp = _mesa_calloc(sizeof(*vp));
- _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
- vp->wpos_idx = wpos_idx;
+ vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
- if (mesa_vp->IsPositionInvariant) {
- position_invariant(&mesa_vp->Base);
+ if (!r300->selected_fp) {
+ /* This can happen when GetProgramiv is called to check
+ * whether the program runs natively.
+ *
+ * To be honest, this is not a very good solution,
+ * but solving the problem of reporting good values
+ * for those queries is tough anyway considering that
+ * we recompile vertex programs based on the precise
+ * fragment program that is in use.
+ */
+ r300SelectAndTranslateFragmentShader(ctx);
}
- if (wpos_idx > -1) {
- pos_as_texcoord(vp, &mesa_vp->Base);
+ wanted_key.FpReads = r300->selected_fp->InputsRead;
+ wanted_key.FogAttr = r300->selected_fp->fog_attr;
+ wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
+
+ for (vp = vpc->progs; vp; vp = vp->next) {
+ if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
+ == 0) {
+ return r300->selected_vp = vp;
+ }
}
- assert(mesa_vp->Base.NumInstructions);
- vp->num_temporaries = mesa_vp->Base.NumTemporaries;
- r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
+ vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
+ vp->next = vpc->progs;
+ vpc->progs = vp;
- return vp;
+ return r300->selected_vp = vp;
}
-static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
+#define bump_vpu_count(ptr, new_count) do { \
+ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
+ int _nc=(new_count)/4; \
+ assert(_nc < 256); \
+ if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
+ } while(0)
+
+static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code)
{
- if (key->OutputsWritten & (1 << vert))
- return;
+ int i;
- key->OutputsWritten |= 1 << vert;
- key->OutputsAdded |= 1 << vert;
-}
+ assert((code->length > 0) && (code->length % 4 == 0));
-void r300SelectVertexShader(r300ContextPtr r300)
-{
- GLcontext *ctx = ctx = r300->radeon.glCtx;
- GLuint InputsRead;
- struct r300_vertex_program_key wanted_key = { 0 };
- GLint i;
- struct r300_vertex_program_cont *vpc;
- struct r300_vertex_program *vp;
- GLint wpos_idx;
+ R300_STATECHANGE( r300, vap_flush );
- vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
- wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
- wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
- InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
-
- wpos_idx = -1;
- if (InputsRead & FRAG_BIT_WPOS) {
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
- if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
- break;
-
- if (i == ctx->Const.MaxTextureUnits) {
- fprintf(stderr, "\tno free texcoord found\n");
+ switch ((dest >> 8) & 0xf) {
+ case 0:
+ R300_STATECHANGE(r300, vpi);
+ for (i = 0; i < code->length; i++)
+ r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+ bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
+ break;
+ case 2:
+ R300_STATECHANGE(r300, vpp);
+ for (i = 0; i < code->length; i++)
+ r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+ bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
+ break;
+ case 4:
+ R300_STATECHANGE(r300, vps);
+ for (i = 0; i < code->length; i++)
+ r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+ bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
+ break;
+ default:
+ fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
_mesa_exit(-1);
- }
-
- wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
- wpos_idx = i;
}
+}
- add_outputs(&wanted_key, VERT_RESULT_HPOS);
+void r300SetupVertexProgram(r300ContextPtr rmesa)
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+ struct r300_vertex_program *prog = rmesa->selected_vp;
+ int inst_count = 0;
+ int param_count = 0;
- if (InputsRead & FRAG_BIT_COL0) {
- add_outputs(&wanted_key, VERT_RESULT_COL0);
- }
+ /* Reset state, in case we don't use something */
+ ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
- if (InputsRead & FRAG_BIT_COL1) {
- add_outputs(&wanted_key, VERT_RESULT_COL1);
- }
+ R300_STATECHANGE(rmesa, vap_flush);
+ R300_STATECHANGE(rmesa, vpp);
+ param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+ bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
+ param_count /= 4;
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (InputsRead & (FRAG_BIT_TEX0 << i)) {
- add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
- }
- }
+ r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
+ inst_count = (prog->code.length / 4) - 1;
- if (vpc->mesa_program.IsPositionInvariant) {
- /* we wan't position don't we ? */
- wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
- }
+ r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead),
+ _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries);
- for (vp = vpc->progs; vp; vp = vp->next)
- if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
- == 0) {
- r300->selected_vp = vp;
- return;
- }
- //_mesa_print_program(&vpc->mesa_program.Base);
+ R300_STATECHANGE(rmesa, pvs);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (inst_count << R300_PVS_LAST_INST_SHIFT);
- vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
- vp->next = vpc->progs;
- vpc->progs = vp;
- r300->selected_vp = vp;
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
}
diff --git a/r300/r300_vertprog.h b/r300/r300_vertprog.h
index 2f35f02..ccec896 100644
--- a/r300/r300_vertprog.h
+++ b/r300/r300_vertprog.h
@@ -3,33 +3,9 @@
#include "r300_reg.h"
-#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \
- (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \
- | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \
- | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \
- | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \
- | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \
- | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
-#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \
- (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \
- | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \
- | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \
- | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \
- | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \
- | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \
- | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
+void r300SetupVertexProgram(r300ContextPtr rmesa);
-#if 1
-
-#define VSF_FLAG_X 1
-#define VSF_FLAG_Y 2
-#define VSF_FLAG_Z 4
-#define VSF_FLAG_W 8
-#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z)
-#define VSF_FLAG_ALL 0xf
-#define VSF_FLAG_NONE 0
-
-#endif
+struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx);
#endif
diff --git a/r300/r500_fragprog.c b/r300/r500_fragprog.c
deleted file mode 100644
index 292573d..0000000
--- a/r300/r500_fragprog.c
+++ /dev/null
@@ -1,718 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "r500_fragprog.h"
-
-#include "radeon_nqssadce.h"
-#include "radeon_program_alu.h"
-
-
-static void reset_srcreg(struct prog_src_register* reg)
-{
- _mesa_bzero(reg, sizeof(*reg));
- reg->Swizzle = SWIZZLE_NOOP;
-}
-
-static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
-{
- gl_state_index fail_value_tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
- };
- struct prog_src_register reg = { 0, };
-
- fail_value_tokens[2] = tmu;
- reg.File = PROGRAM_STATE_VAR;
- reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
- reg.Swizzle = SWIZZLE_WWWW;
- return reg;
-}
-
-/**
- * Transform TEX, TXP, TXB, and KIL instructions in the following way:
- * - premultiply texture coordinates for RECT
- * - extract operand swizzles
- * - introduce a temporary register when write masks are needed
- *
- */
-static GLboolean transform_TEX(
- struct radeon_transform_context *t,
- struct prog_instruction* orig_inst, void* data)
-{
- struct r500_fragment_program_compiler *compiler =
- (struct r500_fragment_program_compiler*)data;
- struct prog_instruction inst = *orig_inst;
- struct prog_instruction* tgt;
- GLboolean destredirect = GL_FALSE;
-
- if (inst.Opcode != OPCODE_TEX &&
- inst.Opcode != OPCODE_TXB &&
- inst.Opcode != OPCODE_TXP &&
- inst.Opcode != OPCODE_KIL)
- return GL_FALSE;
-
- /* ARB_shadow & EXT_shadow_funcs */
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
-
- if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = inst.DstReg;
- if (comparefunc == GL_ALWAYS) {
- tgt->SrcReg[0].File = PROGRAM_BUILTIN;
- tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
- } else {
- tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
- }
- return GL_TRUE;
- }
-
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = radeonFindFreeTemporary(t);
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) {
- int tempreg = radeonFindFreeTemporary(t);
-
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = tempreg;
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- destredirect = GL_TRUE;
- }
-
- if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
- int tmpreg = radeonFindFreeTemporary(t);
- tgt = radeonAppendInstructions(t->Program, 1);
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tmpreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tmpreg;
- }
-
- tgt = radeonAppendInstructions(t->Program, 1);
- _mesa_copy_instructions(tgt, &inst, 1);
-
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
- int rcptemp = radeonFindFreeTemporary(t);
- int pass, fail;
-
- tgt = radeonAppendInstructions(t->Program, 3);
-
- tgt[0].Opcode = OPCODE_RCP;
- tgt[0].DstReg.File = PROGRAM_TEMPORARY;
- tgt[0].DstReg.Index = rcptemp;
- tgt[0].DstReg.WriteMask = WRITEMASK_W;
- tgt[0].SrcReg[0] = inst.SrcReg[0];
- tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- tgt[1].Opcode = OPCODE_MAD;
- tgt[1].DstReg = inst.DstReg;
- tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
- tgt[1].SrcReg[0] = inst.SrcReg[0];
- tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[1].Index = rcptemp;
- tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[2].Index = inst.DstReg.Index;
- if (depthmode == 0) /* GL_LUMINANCE */
- tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
- else if (depthmode == 2) /* GL_ALPHA */
- tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
-
- /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
- * r < tex <=> -tex+r < 0
- * r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
- else
- tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
-
- tgt[2].Opcode = OPCODE_CMP;
- tgt[2].DstReg = orig_inst->DstReg;
- tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
-
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
- pass = 1;
- fail = 2;
- } else {
- pass = 2;
- fail = 1;
- }
-
- tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
- tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
- tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
- } else if (destredirect) {
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = orig_inst->DstReg;
- tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt->SrcReg[0].Index = inst.DstReg.Index;
- }
-
- return GL_TRUE;
-}
-
-
-static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp)
-{
- struct gl_fragment_program *mp = &fp->mesa_program;
-
- /* Ask Mesa nicely to fill in ParameterValues for us */
- if (mp->Base.Parameters)
- _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters);
-}
-
-
-/**
- * Transform the program to support fragment.position.
- *
- * Introduce a small fragment at the start of the program that will be
- * the only code that directly reads the FRAG_ATTRIB_WPOS input.
- * All other code pieces that reference that input will be rewritten
- * to read from a newly allocated temporary.
- *
- * \todo if/when r5xx supports the radeon_program architecture, this is a
- * likely candidate for code sharing.
- */
-static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
-{
- GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead;
-
- if (!(InputsRead & FRAG_BIT_WPOS))
- return;
-
- static gl_state_index tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
- };
- struct prog_instruction *fpi;
- GLuint window_index;
- int i = 0;
- GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
-
- _mesa_insert_instructions(compiler->program, 0, 3);
- fpi = compiler->program->Instructions;
-
- /* perspective divide */
- fpi[i].Opcode = OPCODE_RCP;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_W;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
- fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
- i++;
-
- fpi[i].Opcode = OPCODE_MUL;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
- fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[1].Index = tempregi;
- fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- i++;
-
- /* viewport transformation */
- window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
-
- fpi[i].Opcode = OPCODE_MAD;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[0].Index = tempregi;
- fpi[i].SrcReg[0].Swizzle =
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
- fpi[i].SrcReg[1].Index = window_index;
- fpi[i].SrcReg[1].Swizzle =
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
- fpi[i].SrcReg[2].Index = window_index;
- fpi[i].SrcReg[2].Swizzle =
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
- i++;
-
- for (; i < compiler->program->NumInstructions; ++i) {
- int reg;
- for (reg = 0; reg < 3; reg++) {
- if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
- fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
- fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[reg].Index = tempregi;
- }
- }
- }
-}
-
-
-static void nqssadce_init(struct nqssadce_state* s)
-{
- s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
- s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
-}
-
-static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
-{
- GLuint relevant;
- int i;
-
- if (opcode == OPCODE_TEX ||
- opcode == OPCODE_TXB ||
- opcode == OPCODE_TXP ||
- opcode == OPCODE_KIL) {
- if (reg.Abs)
- return GL_FALSE;
-
- if (reg.Negate)
- reg.Negate ^= NEGATE_XYZW;
-
- if (opcode == OPCODE_KIL) {
- if (reg.Swizzle != SWIZZLE_NOOP)
- return GL_FALSE;
- } else {
- for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(reg.Swizzle, i);
- if (swz == SWIZZLE_NIL) {
- reg.Negate &= ~(1 << i);
- continue;
- }
- if (swz >= 4)
- return GL_FALSE;
- }
- }
-
- if (reg.Negate)
- return GL_FALSE;
-
- return GL_TRUE;
- } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
- /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
- * if it doesn't fit perfectly into a .xyzw case... */
- if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate)
- return GL_TRUE;
-
- return GL_FALSE;
- } else {
- /* ALU instructions support almost everything */
- if (reg.Abs)
- return GL_TRUE;
-
- relevant = 0;
- for(i = 0; i < 3; ++i) {
- GLuint swz = GET_SWZ(reg.Swizzle, i);
- if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
- relevant |= 1 << i;
- }
- if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
- return GL_FALSE;
-
- return GL_TRUE;
- }
-}
-
-/**
- * Implement a MOV with a potentially non-native swizzle.
- *
- * The only thing we *cannot* do in an ALU instruction is per-component
- * negation. Therefore, we split the MOV into two instructions when necessary.
- */
-static void nqssadce_build_swizzle(struct nqssadce_state *s,
- struct prog_dst_register dst, struct prog_src_register src)
-{
- struct prog_instruction *inst;
- GLuint negatebase[2] = { 0, 0 };
- int i;
-
- for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(src.Swizzle, i);
- if (swz == SWIZZLE_NIL)
- continue;
- negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
- }
-
- _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
- inst = s->Program->Instructions + s->IP;
-
- for(i = 0; i <= 1; ++i) {
- if (!negatebase[i])
- continue;
-
- inst->Opcode = OPCODE_MOV;
- inst->DstReg = dst;
- inst->DstReg.WriteMask = negatebase[i];
- inst->SrcReg[0] = src;
- inst++;
- s->IP++;
- }
-}
-
-static GLuint build_dtm(GLuint depthmode)
-{
- switch(depthmode) {
- default:
- case GL_LUMINANCE: return 0;
- case GL_INTENSITY: return 1;
- case GL_ALPHA: return 2;
- }
-}
-
-static GLuint build_func(GLuint comparefunc)
-{
- return comparefunc - GL_NEVER;
-}
-
-
-/**
- * Collect all external state that is relevant for compiling the given
- * fragment program.
- */
-static void build_state(
- r300ContextPtr r300,
- struct r500_fragment_program *fp,
- struct r500_fragment_program_external_state *state)
-{
- int unit;
-
- _mesa_bzero(state, sizeof(*state));
-
- for(unit = 0; unit < 16; ++unit) {
- if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) {
- struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
-
- state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
- state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
- }
- }
-}
-
-static void dump_program(struct r500_fragment_program_code *code);
-
-void r500TranslateFragmentShader(r300ContextPtr r300,
- struct r500_fragment_program *fp)
-{
- struct r500_fragment_program_external_state state;
-
- build_state(r300, fp, &state);
- if (_mesa_memcmp(&fp->state, &state, sizeof(state))) {
- /* TODO: cache compiled programs */
- fp->translated = GL_FALSE;
- _mesa_memcpy(&fp->state, &state, sizeof(state));
- }
-
- if (!fp->translated) {
- struct r500_fragment_program_compiler compiler;
-
- compiler.r300 = r300;
- compiler.fp = fp;
- compiler.code = &fp->code;
- compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base);
-
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Compiler: Initial program:\n");
- _mesa_print_program(compiler.program);
- }
-
- insert_WPOS_trailer(&compiler);
-
- struct radeon_program_transformation transformations[] = {
- { &transform_TEX, &compiler },
- { &radeonTransformALU, 0 },
- { &radeonTransformDeriv, 0 },
- { &radeonTransformTrigScale, 0 }
- };
- radeonLocalTransform(r300->radeon.glCtx, compiler.program,
- 4, transformations);
-
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Compiler: after native rewrite:\n");
- _mesa_print_program(compiler.program);
- }
-
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &is_native_swizzle,
- .BuildSwizzle = &nqssadce_build_swizzle,
- .RewriteDepthOut = GL_TRUE
- };
- radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
-
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Compiler: after NqSSA-DCE:\n");
- _mesa_print_program(compiler.program);
- }
-
- fp->translated = r500FragmentProgramEmit(&compiler);
-
- /* Subtle: Rescue any parameters that have been added during transformations */
- _mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
- fp->mesa_program.Base.Parameters = compiler.program->Parameters;
- compiler.program->Parameters = 0;
-
- _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0);
-
- r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
-
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- if (fp->translated) {
- _mesa_printf("Machine-readable code:\n");
- dump_program(&fp->code);
- }
- }
-
- }
-
- update_params(r300, fp);
-
-}
-
-static char *toswiz(int swiz_val) {
- switch(swiz_val) {
- case 0: return "R";
- case 1: return "G";
- case 2: return "B";
- case 3: return "A";
- case 4: return "0";
- case 5: return "1/2";
- case 6: return "1";
- case 7: return "U";
- }
- return NULL;
-}
-
-static char *toop(int op_val)
-{
- char *str = NULL;
- switch (op_val) {
- case 0: str = "MAD"; break;
- case 1: str = "DP3"; break;
- case 2: str = "DP4"; break;
- case 3: str = "D2A"; break;
- case 4: str = "MIN"; break;
- case 5: str = "MAX"; break;
- case 6: str = "Reserved"; break;
- case 7: str = "CND"; break;
- case 8: str = "CMP"; break;
- case 9: str = "FRC"; break;
- case 10: str = "SOP"; break;
- case 11: str = "MDH"; break;
- case 12: str = "MDV"; break;
- }
- return str;
-}
-
-static char *to_alpha_op(int op_val)
-{
- char *str = NULL;
- switch (op_val) {
- case 0: str = "MAD"; break;
- case 1: str = "DP"; break;
- case 2: str = "MIN"; break;
- case 3: str = "MAX"; break;
- case 4: str = "Reserved"; break;
- case 5: str = "CND"; break;
- case 6: str = "CMP"; break;
- case 7: str = "FRC"; break;
- case 8: str = "EX2"; break;
- case 9: str = "LN2"; break;
- case 10: str = "RCP"; break;
- case 11: str = "RSQ"; break;
- case 12: str = "SIN"; break;
- case 13: str = "COS"; break;
- case 14: str = "MDH"; break;
- case 15: str = "MDV"; break;
- }
- return str;
-}
-
-static char *to_mask(int val)
-{
- char *str = NULL;
- switch(val) {
- case 0: str = "NONE"; break;
- case 1: str = "R"; break;
- case 2: str = "G"; break;
- case 3: str = "RG"; break;
- case 4: str = "B"; break;
- case 5: str = "RB"; break;
- case 6: str = "GB"; break;
- case 7: str = "RGB"; break;
- case 8: str = "A"; break;
- case 9: str = "AR"; break;
- case 10: str = "AG"; break;
- case 11: str = "ARG"; break;
- case 12: str = "AB"; break;
- case 13: str = "ARB"; break;
- case 14: str = "AGB"; break;
- case 15: str = "ARGB"; break;
- }
- return str;
-}
-
-static char *to_texop(int val)
-{
- switch(val) {
- case 0: return "NOP";
- case 1: return "LD";
- case 2: return "TEXKILL";
- case 3: return "PROJ";
- case 4: return "LODBIAS";
- case 5: return "LOD";
- case 6: return "DXDY";
- }
- return NULL;
-}
-
-static void dump_program(struct r500_fragment_program_code *code)
-{
-
- fprintf(stderr, "R500 Fragment Program:\n--------\n");
-
- int n;
- uint32_t inst;
- uint32_t inst0;
- char *str = NULL;
-
- if (code->const_nr) {
- fprintf(stderr, "--------\nConstants:\n");
- for (n = 0; n < code->const_nr; n++) {
- fprintf(stderr, "Constant %d: %i[%i]\n", n,
- code->constant[n].File, code->constant[n].Index);
- }
- fprintf(stderr, "--------\n");
- }
-
- for (n = 0; n < code->inst_end+1; n++) {
- inst0 = inst = code->inst[n].inst0;
- fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
- switch(inst & 0x3) {
- case R500_INST_TYPE_ALU: str = "ALU"; break;
- case R500_INST_TYPE_OUT: str = "OUT"; break;
- case R500_INST_TYPE_FC: str = "FC"; break;
- case R500_INST_TYPE_TEX: str = "TEX"; break;
- };
- fprintf(stderr,"%s %s %s %s %s ", str,
- inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
- inst & R500_INST_LAST ? "LAST" : "",
- inst & R500_INST_NOP ? "NOP" : "",
- inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
- fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
- to_mask((inst >> 15) & 0xf));
-
- switch(inst0 & 0x3) {
- case 0:
- case 1:
- fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
- inst = code->inst[n].inst1;
-
- fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
- inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
- (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
- (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
- (inst >> 30));
-
- fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
- inst = code->inst[n].inst2;
- fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
- inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
- (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
- (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
- (inst >> 30));
- fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
- inst = code->inst[n].inst3;
- fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
- (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
- (inst >> 11) & 0x3,
- (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
- (inst >> 24) & 0x3);
-
-
- fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
- inst = code->inst[n].inst4;
- fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
- (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
- (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
- (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
- (inst >> 31) & 0x1);
-
- fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
- inst = code->inst[n].inst5;
- fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
- (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
- (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
- (inst >> 23) & 0x3,
- (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
- break;
- case 2:
- break;
- case 3:
- inst = code->inst[n].inst1;
- fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
- to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
- (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
- inst = code->inst[n].inst2;
- fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
- inst & 127, inst & (1<<7) ? "(rel)" : "",
- toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
- toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
- (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
- toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
- toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
-
- fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
- break;
- }
- fprintf(stderr,"\n");
- }
-
-}
diff --git a/r300/radeon_context.c b/r300/radeon_context.c
deleted file mode 100644
index 5267fe9..0000000
--- a/r300/radeon_context.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
-
-The Weather Channel (TM) funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86 license.
-This notice must be preserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/**
- * \file radeon_context.c
- * Common context initialization.
- *
- * \author Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include <dlfcn.h>
-
-#include "main/glheader.h"
-#include "main/imports.h"
-#include "main/context.h"
-#include "main/state.h"
-#include "main/matrix.h"
-#include "main/framebuffer.h"
-
-#include "drivers/common/driverfuncs.h"
-#include "swrast/swrast.h"
-
-#include "radeon_screen.h"
-#include "radeon_ioctl.h"
-#include "radeon_macros.h"
-#include "radeon_reg.h"
-
-#include "radeon_state.h"
-#include "r300_state.h"
-
-#include "utils.h"
-#include "vblank.h"
-#include "xmlpool.h" /* for symbolic values of enum-type options */
-
-#define DRIVER_DATE "20060815"
-
-
-/* Return various strings for glGetString().
- */
-static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
-{
- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
- static char buffer[128];
-
- switch (name) {
- case GL_VENDOR:
- if (IS_R300_CLASS(radeon->radeonScreen))
- return (GLubyte *) "DRI R300 Project";
- else
- return (GLubyte *) "Tungsten Graphics, Inc.";
-
- case GL_RENDERER:
- {
- unsigned offset;
- GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
- radeon->radeonScreen->AGPMode;
- const char* chipname;
-
- if (IS_R300_CLASS(radeon->radeonScreen))
- chipname = "R300";
- else
- chipname = "R200";
-
- offset = driGetRendererString(buffer, chipname, DRIVER_DATE,
- agp_mode);
-
- if (IS_R300_CLASS(radeon->radeonScreen)) {
- sprintf(&buffer[offset], " %sTCL",
- (radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
- ? "" : "NO-");
- } else {
- sprintf(&buffer[offset], " %sTCL",
- !(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
- ? "" : "NO-");
- }
-
- return (GLubyte *) buffer;
- }
-
- default:
- return NULL;
- }
-}
-
-/* Initialize the driver's misc functions.
- */
-static void radeonInitDriverFuncs(struct dd_function_table *functions)
-{
- functions->GetString = radeonGetString;
-}
-
-
-/**
- * Create and initialize all common fields of the context,
- * including the Mesa context itself.
- */
-GLboolean radeonInitContext(radeonContextPtr radeon,
- struct dd_function_table* functions,
- const __GLcontextModes * glVisual,
- __DRIcontextPrivate * driContextPriv,
- void *sharedContextPrivate)
-{
- __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
- radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
- GLcontext* ctx;
- GLcontext* shareCtx;
- int fthrottle_mode;
-
- /* Fill in additional standard functions. */
- radeonInitDriverFuncs(functions);
-
- radeon->radeonScreen = screen;
- /* Allocate and initialize the Mesa context */
- if (sharedContextPrivate)
- shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
- else
- shareCtx = NULL;
- radeon->glCtx = _mesa_create_context(glVisual, shareCtx,
- functions, (void *)radeon);
- if (!radeon->glCtx)
- return GL_FALSE;
-
- ctx = radeon->glCtx;
- driContextPriv->driverPrivate = radeon;
-
- /* DRI fields */
- radeon->dri.context = driContextPriv;
- radeon->dri.screen = sPriv;
- radeon->dri.drawable = NULL;
- radeon->dri.readable = NULL;
- radeon->dri.hwContext = driContextPriv->hHWContext;
- radeon->dri.hwLock = &sPriv->pSAREA->lock;
- radeon->dri.fd = sPriv->fd;
- radeon->dri.drmMinor = sPriv->drm_version.minor;
-
- radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
- screen->sarea_priv_offset);
-
- /* Setup IRQs */
- fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
- radeon->iw.irq_seq = -1;
- radeon->irqsEmitted = 0;
- radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
- radeon->radeonScreen->irq);
-
- radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
-
- if (!radeon->do_irqs)
- fprintf(stderr,
- "IRQ's not enabled, falling back to %s: %d %d\n",
- radeon->do_usleeps ? "usleeps" : "busy waits",
- fthrottle_mode, radeon->radeonScreen->irq);
-
- (*sPriv->systemTime->getUST) (&radeon->swap_ust);
-
- return GL_TRUE;
-}
-
-
-/**
- * Cleanup common context fields.
- * Called by r200DestroyContext/r300DestroyContext
- */
-void radeonCleanupContext(radeonContextPtr radeon)
-{
- /* _mesa_destroy_context() might result in calls to functions that
- * depend on the DriverCtx, so don't set it to NULL before.
- *
- * radeon->glCtx->DriverCtx = NULL;
- */
-
- /* free the Mesa context */
- _mesa_destroy_context(radeon->glCtx);
-
- if (radeon->state.scissor.pClipRects) {
- FREE(radeon->state.scissor.pClipRects);
- radeon->state.scissor.pClipRects = 0;
- }
-}
-
-
-/**
- * Swap front and back buffer.
- */
-void radeonSwapBuffers(__DRIdrawablePrivate * dPriv)
-{
- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
- radeonContextPtr radeon;
- GLcontext *ctx;
-
- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
- ctx = radeon->glCtx;
-
- if (ctx->Visual.doubleBufferMode) {
- _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */
- if (radeon->doPageFlip) {
- radeonPageFlip(dPriv);
- } else {
- radeonCopyBuffer(dPriv, NULL);
- }
- }
- } else {
- /* XXX this shouldn't be an error but we can't handle it for now */
- _mesa_problem(NULL, "%s: drawable has no context!",
- __FUNCTION__);
- }
-}
-
-void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
- int x, int y, int w, int h )
-{
- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
- radeonContextPtr radeon;
- GLcontext *ctx;
-
- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
- ctx = radeon->glCtx;
-
- if (ctx->Visual.doubleBufferMode) {
- drm_clip_rect_t rect;
- rect.x1 = x + dPriv->x;
- rect.y1 = (dPriv->h - y - h) + dPriv->y;
- rect.x2 = rect.x1 + w;
- rect.y2 = rect.y1 + h;
- _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */
- radeonCopyBuffer(dPriv, &rect);
- }
- } else {
- /* XXX this shouldn't be an error but we can't handle it for now */
- _mesa_problem(NULL, "%s: drawable has no context!",
- __FUNCTION__);
- }
-}
-
-/* Force the context `c' to be the current context and associate with it
- * buffer `b'.
- */
-GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
- __DRIdrawablePrivate * driDrawPriv,
- __DRIdrawablePrivate * driReadPriv)
-{
- if (driContextPriv) {
- radeonContextPtr radeon =
- (radeonContextPtr) driContextPriv->driverPrivate;
-
- if (RADEON_DEBUG & DEBUG_DRI)
- fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
- radeon->glCtx);
-
- if (radeon->dri.drawable != driDrawPriv) {
- if (driDrawPriv->swap_interval == (unsigned)-1) {
- driDrawPriv->vblFlags =
- (radeon->radeonScreen->irq != 0)
- ? driGetDefaultVBlankFlags(&radeon->
- optionCache)
- : VBLANK_FLAG_NO_IRQ;
-
- driDrawableInitVBlank(driDrawPriv);
- }
- }
-
- radeon->dri.readable = driReadPriv;
-
- if (radeon->dri.drawable != driDrawPriv ||
- radeon->lastStamp != driDrawPriv->lastStamp) {
- radeon->dri.drawable = driDrawPriv;
-
- radeonSetCliprects(radeon);
- r300UpdateViewportOffset(radeon->glCtx);
- }
-
- _mesa_make_current(radeon->glCtx,
- (GLframebuffer *) driDrawPriv->
- driverPrivate,
- (GLframebuffer *) driReadPriv->
- driverPrivate);
-
- _mesa_update_state(radeon->glCtx);
-
- radeonUpdatePageFlipping(radeon);
- } else {
- if (RADEON_DEBUG & DEBUG_DRI)
- fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
- _mesa_make_current(0, 0, 0);
- }
-
- if (RADEON_DEBUG & DEBUG_DRI)
- fprintf(stderr, "End %s\n", __FUNCTION__);
- return GL_TRUE;
-}
-
-/* Force the context `c' to be unbound from its buffer.
- */
-GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv)
-{
- radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
-
- if (RADEON_DEBUG & DEBUG_DRI)
- fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
- radeon->glCtx);
-
- return GL_TRUE;
-}
-
diff --git a/r300/radeon_context.h b/r300/radeon_context.h
index 47cbc22..250570f 100644
--- a/r300/radeon_context.h
+++ b/r300/radeon_context.h
@@ -49,20 +49,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "drm.h"
#include "dri_util.h"
-struct radeon_context;
-typedef struct radeon_context radeonContextRec;
-typedef struct radeon_context *radeonContextPtr;
-
-/* Rasterizing fallbacks */
-/* See correponding strings in r200_swtcl.c */
-#define RADEON_FALLBACK_TEXTURE 0x0001
-#define RADEON_FALLBACK_DRAW_BUFFER 0x0002
-#define RADEON_FALLBACK_STENCIL 0x0004
-#define RADEON_FALLBACK_RENDER_MODE 0x0008
-#define RADEON_FALLBACK_BLEND_EQ 0x0010
-#define RADEON_FALLBACK_BLEND_FUNC 0x0020
-#define RADEON_FALLBACK_DISABLE 0x0040
-#define RADEON_FALLBACK_BORDER_MODE 0x0080
+#include "radeon_screen.h"
#if R200_MERGED
extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
@@ -79,155 +66,11 @@ extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
/* TCL fallbacks */
extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
-#define RADEON_TCL_FALLBACK_RASTER 0x0001 /* rasterization */
-#define RADEON_TCL_FALLBACK_UNFILLED 0x0002 /* unfilled tris */
-#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE 0x0004 /* twoside tris */
-#define RADEON_TCL_FALLBACK_MATERIAL 0x0008 /* material in vb */
-#define RADEON_TCL_FALLBACK_TEXGEN_0 0x0010 /* texgen, unit 0 */
-#define RADEON_TCL_FALLBACK_TEXGEN_1 0x0020 /* texgen, unit 1 */
-#define RADEON_TCL_FALLBACK_TEXGEN_2 0x0040 /* texgen, unit 2 */
-#define RADEON_TCL_FALLBACK_TEXGEN_3 0x0080 /* texgen, unit 3 */
-#define RADEON_TCL_FALLBACK_TEXGEN_4 0x0100 /* texgen, unit 4 */
-#define RADEON_TCL_FALLBACK_TEXGEN_5 0x0200 /* texgen, unit 5 */
-#define RADEON_TCL_FALLBACK_TCL_DISABLE 0x0400 /* user disable */
-#define RADEON_TCL_FALLBACK_BITMAP 0x0800 /* draw bitmap with points */
-#define RADEON_TCL_FALLBACK_VERTEX_PROGRAM 0x1000 /* vertex program active */
-
#if R200_MERGED
#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode )
#else
#define TCL_FALLBACK( ctx, bit, mode ) ;
#endif
-struct radeon_dri_mirror {
- __DRIcontextPrivate *context; /* DRI context */
- __DRIscreenPrivate *screen; /* DRI screen */
- /**
- * DRI drawable bound to this context for drawing.
- */
- __DRIdrawablePrivate *drawable;
-
- /**
- * DRI drawable bound to this context for reading.
- */
- __DRIdrawablePrivate *readable;
-
- drm_context_t hwContext;
- drm_hw_lock_t *hwLock;
- int fd;
- int drmMinor;
-};
-
-/**
- * Derived state for internal purposes.
- */
-struct radeon_scissor_state {
- drm_clip_rect_t rect;
- GLboolean enabled;
-
- GLuint numClipRects; /* Cliprects active */
- GLuint numAllocedClipRects; /* Cliprects available */
- drm_clip_rect_t *pClipRects;
-};
-
-struct radeon_colorbuffer_state {
- GLuint clear;
- GLint drawOffset, drawPitch;
-};
-
-struct radeon_state {
- struct radeon_colorbuffer_state color;
- struct radeon_scissor_state scissor;
-};
-
-/**
- * Common per-context variables shared by R200 and R300.
- * R200- and R300-specific code "derive" their own context from this
- * structure.
- */
-struct radeon_context {
- GLcontext *glCtx; /* Mesa context */
- radeonScreenPtr radeonScreen; /* Screen private DRI data */
-
- /* Fallback state */
- GLuint Fallback;
- GLuint TclFallback;
-
- /* Page flipping */
- GLuint doPageFlip;
-
- /* Drawable, cliprect and scissor information */
- GLuint numClipRects; /* Cliprects for the draw buffer */
- drm_clip_rect_t *pClipRects;
- unsigned int lastStamp;
- GLboolean lost_context;
- drm_radeon_sarea_t *sarea; /* Private SAREA data */
-
- /* Mirrors of some DRI state */
- struct radeon_dri_mirror dri;
-
- /* Busy waiting */
- GLuint do_usleeps;
- GLuint do_irqs;
- GLuint irqsEmitted;
- drm_radeon_irq_wait_t iw;
-
- /* buffer swap */
- int64_t swap_ust;
- int64_t swap_missed_ust;
-
- GLuint swap_count;
- GLuint swap_missed_count;
-
- /* Derived state */
- struct radeon_state state;
-
- /* Configuration cache
- */
- driOptionCache optionCache;
-};
-
-#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
-
-extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
-extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
- int x, int y, int w, int h);
-extern GLboolean radeonInitContext(radeonContextPtr radeon,
- struct dd_function_table *functions,
- const __GLcontextModes * glVisual,
- __DRIcontextPrivate * driContextPriv,
- void *sharedContextPrivate);
-extern void radeonCleanupContext(radeonContextPtr radeon);
-extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
- __DRIdrawablePrivate * driDrawPriv,
- __DRIdrawablePrivate * driReadPriv);
-extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
-
-/* ================================================================
- * Debugging:
- */
-#define DO_DEBUG 1
-
-#if DO_DEBUG
-extern int RADEON_DEBUG;
-#else
-#define RADEON_DEBUG 0
-#endif
-
-#define DEBUG_TEXTURE 0x0001
-#define DEBUG_STATE 0x0002
-#define DEBUG_IOCTL 0x0004
-#define DEBUG_PRIMS 0x0008
-#define DEBUG_VERTS 0x0010
-#define DEBUG_FALLBACKS 0x0020
-#define DEBUG_VFMT 0x0040
-#define DEBUG_CODEGEN 0x0080
-#define DEBUG_VERBOSE 0x0100
-#define DEBUG_DRI 0x0200
-#define DEBUG_DMA 0x0400
-#define DEBUG_SANITY 0x0800
-#define DEBUG_SYNC 0x1000
-#define DEBUG_PIXEL 0x2000
-#define DEBUG_MEMORY 0x4000
#endif /* __RADEON_CONTEXT_H__ */
diff --git a/r300/radeon_ioctl.c b/r300/radeon_ioctl.c
deleted file mode 100644
index f042a7b..0000000
--- a/r300/radeon_ioctl.c
+++ /dev/null
@@ -1,396 +0,0 @@
-/*
-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
-
-The Weather Channel (TM) funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86 license.
-This notice must be preserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include <sched.h>
-#include <errno.h>
-
-#include "main/glheader.h"
-#include "main/imports.h"
-#include "main/macros.h"
-#include "main/context.h"
-#include "swrast/swrast.h"
-#include "r300_context.h"
-#include "radeon_ioctl.h"
-#include "r300_ioctl.h"
-#include "r300_state.h"
-#include "radeon_reg.h"
-
-#include "drirenderbuffer.h"
-#include "vblank.h"
-
-static void radeonWaitForIdle(radeonContextPtr radeon);
-
-/* ================================================================
- * SwapBuffers with client-side throttling
- */
-
-static uint32_t radeonGetLastFrame(radeonContextPtr radeon)
-{
- drm_radeon_getparam_t gp;
- int ret;
- uint32_t frame = 0;
-
- gp.param = RADEON_PARAM_LAST_FRAME;
- gp.value = (int *)&frame;
- ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
- &gp, sizeof(gp));
- if (ret) {
- fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
- ret);
- exit(1);
- }
-
- return frame;
-}
-
-uint32_t radeonGetAge(radeonContextPtr radeon)
-{
- drm_radeon_getparam_t gp;
- int ret;
- uint32_t age = 0;
-
- gp.param = RADEON_PARAM_LAST_CLEAR;
- gp.value = (int *)&age;
- ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
- &gp, sizeof(gp));
- if (ret) {
- fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
- ret);
- exit(1);
- }
-
- return age;
-}
-
-static void radeonEmitIrqLocked(radeonContextPtr radeon)
-{
- drm_radeon_irq_emit_t ie;
- int ret;
-
- ie.irq_seq = &radeon->iw.irq_seq;
- ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT,
- &ie, sizeof(ie));
- if (ret) {
- fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__,
- ret);
- exit(1);
- }
-}
-
-static void radeonWaitIrq(radeonContextPtr radeon)
-{
- int ret;
-
- do {
- ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT,
- &radeon->iw, sizeof(radeon->iw));
- } while (ret && (errno == EINTR || errno == EBUSY));
-
- if (ret) {
- fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__,
- ret);
- exit(1);
- }
-}
-
-static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
-{
- drm_radeon_sarea_t *sarea = radeon->sarea;
-
- if (radeon->do_irqs) {
- if (radeonGetLastFrame(radeon) < sarea->last_frame) {
- if (!radeon->irqsEmitted) {
- while (radeonGetLastFrame(radeon) <
- sarea->last_frame) ;
- } else {
- UNLOCK_HARDWARE(radeon);
- radeonWaitIrq(radeon);
- LOCK_HARDWARE(radeon);
- }
- radeon->irqsEmitted = 10;
- }
-
- if (radeon->irqsEmitted) {
- radeonEmitIrqLocked(radeon);
- radeon->irqsEmitted--;
- }
- } else {
- while (radeonGetLastFrame(radeon) < sarea->last_frame) {
- UNLOCK_HARDWARE(radeon);
- if (radeon->do_usleeps)
- DO_USLEEP(1);
- LOCK_HARDWARE(radeon);
- }
- }
-}
-
-/* Copy the back color buffer to the front color buffer.
- */
-void radeonCopyBuffer(__DRIdrawablePrivate * dPriv,
- const drm_clip_rect_t * rect)
-{
- radeonContextPtr radeon;
- GLint nbox, i, ret;
- GLboolean missed_target;
- int64_t ust;
- __DRIscreenPrivate *psp = dPriv->driScreenPriv;
-
- assert(dPriv);
- assert(dPriv->driContextPriv);
- assert(dPriv->driContextPriv->driverPrivate);
-
- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-
- if (RADEON_DEBUG & DEBUG_IOCTL) {
- fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__,
- (void *)radeon->glCtx);
- }
-
- r300Flush(radeon->glCtx);
-
- LOCK_HARDWARE(radeon);
-
- /* Throttle the frame rate -- only allow one pending swap buffers
- * request at a time.
- */
- radeonWaitForFrameCompletion(radeon);
- if (!rect)
- {
- UNLOCK_HARDWARE(radeon);
- driWaitForVBlank(dPriv, &missed_target);
- LOCK_HARDWARE(radeon);
- }
-
- nbox = dPriv->numClipRects; /* must be in locked region */
-
- for (i = 0; i < nbox;) {
- GLint nr = MIN2(i + RADEON_NR_SAREA_CLIPRECTS, nbox);
- drm_clip_rect_t *box = dPriv->pClipRects;
- drm_clip_rect_t *b = radeon->sarea->boxes;
- GLint n = 0;
-
- for ( ; i < nr ; i++ ) {
-
- *b = box[i];
-
- if (rect)
- {
- if (rect->x1 > b->x1)
- b->x1 = rect->x1;
- if (rect->y1 > b->y1)
- b->y1 = rect->y1;
- if (rect->x2 < b->x2)
- b->x2 = rect->x2;
- if (rect->y2 < b->y2)
- b->y2 = rect->y2;
-
- if (b->x1 >= b->x2 || b->y1 >= b->y2)
- continue;
- }
-
- b++;
- n++;
- }
- radeon->sarea->nbox = n;
-
- if (!n)
- continue;
-
- ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP);
-
- if (ret) {
- fprintf(stderr, "DRM_RADEON_SWAP: return = %d\n",
- ret);
- UNLOCK_HARDWARE(radeon);
- exit(1);
- }
- }
-
- UNLOCK_HARDWARE(radeon);
- if (!rect)
- {
- ((r300ContextPtr)radeon)->hw.all_dirty = GL_TRUE;
-
- radeon->swap_count++;
- (*psp->systemTime->getUST) (&ust);
- if (missed_target) {
- radeon->swap_missed_count++;
- radeon->swap_missed_ust = ust - radeon->swap_ust;
- }
-
- radeon->swap_ust = ust;
-
- sched_yield();
- }
-}
-
-void radeonPageFlip(__DRIdrawablePrivate * dPriv)
-{
- radeonContextPtr radeon;
- GLint ret;
- GLboolean missed_target;
- __DRIscreenPrivate *psp = dPriv->driScreenPriv;
-
- assert(dPriv);
- assert(dPriv->driContextPriv);
- assert(dPriv->driContextPriv->driverPrivate);
-
- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-
- if (RADEON_DEBUG & DEBUG_IOCTL) {
- fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
- radeon->sarea->pfCurrentPage);
- }
-
- r300Flush(radeon->glCtx);
- LOCK_HARDWARE(radeon);
-
- if (!dPriv->numClipRects) {
- UNLOCK_HARDWARE(radeon);
- usleep(10000); /* throttle invisible client 10ms */
- return;
- }
-
- /* Need to do this for the perf box placement:
- */
- {
- drm_clip_rect_t *box = dPriv->pClipRects;
- drm_clip_rect_t *b = radeon->sarea->boxes;
- b[0] = box[0];
- radeon->sarea->nbox = 1;
- }
-
- /* Throttle the frame rate -- only allow a few pending swap buffers
- * request at a time.
- */
- radeonWaitForFrameCompletion(radeon);
- UNLOCK_HARDWARE(radeon);
- driWaitForVBlank(dPriv, &missed_target);
- if (missed_target) {
- radeon->swap_missed_count++;
- (void)(*psp->systemTime->getUST) (&radeon->swap_missed_ust);
- }
- LOCK_HARDWARE(radeon);
-
- ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_FLIP);
-
- UNLOCK_HARDWARE(radeon);
-
- if (ret) {
- fprintf(stderr, "DRM_RADEON_FLIP: return = %d\n", ret);
- exit(1);
- }
-
- radeon->swap_count++;
- (void)(*psp->systemTime->getUST) (&radeon->swap_ust);
-
- driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer,
- radeon->sarea->pfCurrentPage);
-
- if (radeon->sarea->pfCurrentPage == 1) {
- radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
- radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
- } else {
- radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
- radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
- }
-
- if (IS_R300_CLASS(radeon->radeonScreen)) {
- r300ContextPtr r300 = (r300ContextPtr)radeon;
- R300_STATECHANGE(r300, cb);
- r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset +
- r300->radeon.radeonScreen->fbLocation;
- r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
-
- if (r300->radeon.radeonScreen->cpp == 4)
- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
- else
- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
-
- if (r300->radeon.sarea->tiling_enabled)
- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
- }
-}
-
-void radeonWaitForIdleLocked(radeonContextPtr radeon)
-{
- int ret;
- int i = 0;
-
- do {
- ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE);
- if (ret)
- DO_USLEEP(1);
- } while (ret && ++i < 100);
-
- if (ret < 0) {
- UNLOCK_HARDWARE(radeon);
- fprintf(stderr, "Error: R300 timed out... exiting\n");
- exit(-1);
- }
-}
-
-static void radeonWaitForIdle(radeonContextPtr radeon)
-{
- LOCK_HARDWARE(radeon);
- radeonWaitForIdleLocked(radeon);
- UNLOCK_HARDWARE(radeon);
-}
-
-void radeonFlush(GLcontext * ctx)
-{
- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-
- if (IS_R300_CLASS(radeon->radeonScreen))
- r300Flush(ctx);
-}
-
-
-/* Make sure all commands have been sent to the hardware and have
- * completed processing.
- */
-void radeonFinish(GLcontext * ctx)
-{
- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-
- radeonFlush(ctx);
-
- if (radeon->do_irqs) {
- LOCK_HARDWARE(radeon);
- radeonEmitIrqLocked(radeon);
- UNLOCK_HARDWARE(radeon);
- radeonWaitIrq(radeon);
- } else
- radeonWaitForIdle(radeon);
-}
diff --git a/r300/radeon_lock.c b/r300/radeon_lock.c
deleted file mode 100644
index 4f47afd..0000000
--- a/r300/radeon_lock.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/**************************************************************************
-
-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
- VA Linux Systems Inc., Fremont, California.
-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
-
-The Weather Channel (TM) funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86 license.
-This notice must be preserved.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Gareth Hughes <gareth@valinux.com>
- * Keith Whitwell <keith@tungstengraphics.com>
- * Kevin E. Martin <martin@valinux.com>
- */
-
-#include "radeon_lock.h"
-#include "radeon_ioctl.h"
-#include "radeon_state.h"
-#include "r300_context.h"
-#include "r300_state.h"
-
-#include "main/framebuffer.h"
-
-#include "drirenderbuffer.h"
-
-#if DEBUG_LOCKING
-char *prevLockFile = NULL;
-int prevLockLine = 0;
-#endif
-
-/* Turn on/off page flipping according to the flags in the sarea:
- */
-void radeonUpdatePageFlipping(radeonContextPtr rmesa)
-{
- int use_back;
-
- rmesa->doPageFlip = rmesa->sarea->pfState;
- if (rmesa->glCtx->WinSysDrawBuffer) {
- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
- rmesa->sarea->pfCurrentPage);
- r300UpdateDrawBuffer(rmesa->glCtx);
- }
-
- use_back = rmesa->glCtx->DrawBuffer ?
- (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] ==
- BUFFER_BACK_LEFT) : 1;
- use_back ^= (rmesa->sarea->pfCurrentPage == 1);
-
- if (use_back) {
- rmesa->state.color.drawOffset =
- rmesa->radeonScreen->backOffset;
- rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch;
- } else {
- rmesa->state.color.drawOffset =
- rmesa->radeonScreen->frontOffset;
- rmesa->state.color.drawPitch =
- rmesa->radeonScreen->frontPitch;
- }
-}
-
-/* Update the hardware state. This is called if another context has
- * grabbed the hardware lock, which includes the X server. This
- * function also updates the driver's window state after the X server
- * moves, resizes or restacks a window -- the change will be reflected
- * in the drawable position and clip rects. Since the X server grabs
- * the hardware lock when it changes the window state, this routine will
- * automatically be called after such a change.
- */
-void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
-{
- __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
- __DRIdrawablePrivate *const readable = rmesa->dri.readable;
- __DRIscreenPrivate *sPriv = rmesa->dri.screen;
- drm_radeon_sarea_t *sarea = rmesa->sarea;
- r300ContextPtr r300 = (r300ContextPtr) rmesa;
-
- assert(drawable != NULL);
-
- drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
-
- /* The window might have moved, so we might need to get new clip
- * rects.
- *
- * NOTE: This releases and regrabs the hw lock to allow the X server
- * to respond to the DRI protocol request for new drawable info.
- * Since the hardware state depends on having the latest drawable
- * clip rects, all state checking must be done _after_ this call.
- */
- DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
- if (drawable != readable) {
- DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable);
- }
-
- if (rmesa->lastStamp != drawable->lastStamp) {
- radeonUpdatePageFlipping(rmesa);
- radeonSetCliprects(rmesa);
- r300UpdateViewportOffset(rmesa->glCtx);
- driUpdateFramebufferSize(rmesa->glCtx, drawable);
- }
-
- if (sarea->ctx_owner != rmesa->dri.hwContext) {
- int i;
-
- sarea->ctx_owner = rmesa->dri.hwContext;
- for (i = 0; i < r300->nr_heaps; i++) {
- DRI_AGE_TEXTURES(r300->texture_heaps[i]);
- }
- }
-
- rmesa->lost_context = GL_TRUE;
-}
diff --git a/r300/radeon_lock.h b/r300/radeon_lock.h
deleted file mode 100644
index a344837..0000000
--- a/r300/radeon_lock.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/**************************************************************************
-
-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
- VA Linux Systems Inc., Fremont, California.
-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
-
-The Weather Channel (TM) funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86 license.
-This notice must be preserved.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Gareth Hughes <gareth@valinux.com>
- * Keith Whitwell <keith@tungstengraphics.com>
- * Kevin E. Martin <martin@valinux.com>
- */
-
-#ifndef __RADEON_LOCK_H__
-#define __RADEON_LOCK_H__
-
-#include "radeon_context.h"
-
-extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
-extern void radeonUpdatePageFlipping(radeonContextPtr rmesa);
-
-/* Turn DEBUG_LOCKING on to find locking conflicts.
- */
-#define DEBUG_LOCKING 0
-
-#if DEBUG_LOCKING
-extern char *prevLockFile;
-extern int prevLockLine;
-
-#define DEBUG_LOCK() \
- do { \
- prevLockFile = (__FILE__); \
- prevLockLine = (__LINE__); \
- } while (0)
-
-#define DEBUG_RESET() \
- do { \
- prevLockFile = 0; \
- prevLockLine = 0; \
- } while (0)
-
-#define DEBUG_CHECK_LOCK() \
- do { \
- if (prevLockFile) { \
- fprintf(stderr, \
- "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \
- prevLockFile, prevLockLine, __FILE__, __LINE__); \
- exit(1); \
- } \
- } while (0)
-
-#else
-
-#define DEBUG_LOCK()
-#define DEBUG_RESET()
-#define DEBUG_CHECK_LOCK()
-
-#endif
-
-/*
- * !!! We may want to separate locks from locks with validation. This
- * could be used to improve performance for those things commands that
- * do not do any drawing !!!
- */
-
-/* Lock the hardware and validate our state.
- */
-#define LOCK_HARDWARE( rmesa ) \
- do { \
- char __ret = 0; \
- DEBUG_CHECK_LOCK(); \
- DRM_CAS((rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \
- (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret); \
- if (__ret) \
- radeonGetLock((rmesa), 0); \
- DEBUG_LOCK(); \
- } while (0)
-
-#define UNLOCK_HARDWARE( rmesa ) \
- do { \
- DRM_UNLOCK((rmesa)->dri.fd, \
- (rmesa)->dri.hwLock, \
- (rmesa)->dri.hwContext); \
- DEBUG_RESET(); \
- } while (0)
-
-#endif /* __RADEON_LOCK_H__ */
diff --git a/r300/radeon_program.c b/r300/radeon_program.c
deleted file mode 100644
index da5e7ae..0000000
--- a/r300/radeon_program.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "radeon_program.h"
-
-#include "shader/prog_print.h"
-
-
-/**
- * Transform the given clause in the following way:
- * 1. Replace it with an empty clause
- * 2. For every instruction in the original clause, try the given
- * transformations in order.
- * 3. If one of the transformations returns GL_TRUE, assume that it
- * has emitted the appropriate instruction(s) into the new clause;
- * otherwise, copy the instruction verbatim.
- *
- * \note The transformation is currently not recursive; in other words,
- * instructions emitted by transformations are not transformed.
- *
- * \note The transform is called 'local' because it can only look at
- * one instruction at a time.
- */
-void radeonLocalTransform(
- GLcontext *Ctx,
- struct gl_program *program,
- int num_transformations,
- struct radeon_program_transformation* transformations)
-{
- struct radeon_transform_context ctx;
- int ip;
-
- ctx.Ctx = Ctx;
- ctx.Program = program;
- ctx.OldInstructions = program->Instructions;
- ctx.OldNumInstructions = program->NumInstructions;
-
- program->Instructions = 0;
- program->NumInstructions = 0;
-
- for(ip = 0; ip < ctx.OldNumInstructions; ++ip) {
- struct prog_instruction *instr = ctx.OldInstructions + ip;
- int i;
-
- for(i = 0; i < num_transformations; ++i) {
- struct radeon_program_transformation* t = transformations + i;
-
- if (t->function(&ctx, instr, t->userData))
- break;
- }
-
- if (i >= num_transformations) {
- struct prog_instruction* dest = radeonAppendInstructions(program, 1);
- _mesa_copy_instructions(dest, instr, 1);
- }
- }
-
- _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions);
-}
-
-
-static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count)
-{
- GLuint i;
- for (i = 0; i < count; i++) {
- const struct prog_instruction *inst = insts + i;
- const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
- GLuint k;
-
- for (k = 0; k < n; k++) {
- if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
- used[inst->SrcReg[k].Index] = GL_TRUE;
- }
- }
-}
-
-GLint radeonFindFreeTemporary(struct radeon_transform_context *t)
-{
- GLboolean used[MAX_PROGRAM_TEMPS];
- GLuint i;
-
- _mesa_memset(used, 0, sizeof(used));
- scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions);
- scan_instructions(used, t->OldInstructions, t->OldNumInstructions);
-
- for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
- if (!used[i])
- return i;
- }
-
- return -1;
-}
-
-
-/**
- * Append the given number of instructions to the program and return a
- * pointer to the first new instruction.
- */
-struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count)
-{
- int oldnum = program->NumInstructions;
- _mesa_insert_instructions(program, oldnum, count);
- return program->Instructions + oldnum;
-}
diff --git a/r300/radeon_span.c b/r300/radeon_span.c
deleted file mode 100644
index 16f9fb9..0000000
--- a/r300/radeon_span.c
+++ /dev/null
@@ -1,349 +0,0 @@
-/**************************************************************************
-
-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
- VA Linux Systems Inc., Fremont, California.
-
-The Weather Channel (TM) funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86 license.
-This notice must be preserved.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Kevin E. Martin <martin@valinux.com>
- * Gareth Hughes <gareth@valinux.com>
- * Keith Whitwell <keith@tungstengraphics.com>
- *
- */
-
-#include "main/glheader.h"
-#include "swrast/swrast.h"
-
-#include "r300_state.h"
-#include "radeon_ioctl.h"
-#include "r300_ioctl.h"
-#include "radeon_span.h"
-
-#include "drirenderbuffer.h"
-
-#define DBG 0
-
-/*
- * Note that all information needed to access pixels in a renderbuffer
- * should be obtained through the gl_renderbuffer parameter, not per-context
- * information.
- */
-#define LOCAL_VARS \
- driRenderbuffer *drb = (driRenderbuffer *) rb; \
- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
- const GLuint bottom = dPriv->h - 1; \
- GLubyte *buf = (GLubyte *) drb->flippedData \
- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \
- GLuint p; \
- (void) p;
-
-#define LOCAL_DEPTH_VARS \
- driRenderbuffer *drb = (driRenderbuffer *) rb; \
- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
- const GLuint bottom = dPriv->h - 1; \
- GLuint xo = dPriv->x; \
- GLuint yo = dPriv->y; \
- GLubyte *buf = (GLubyte *) drb->Base.Data;
-
-#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
-
-#define Y_FLIP(Y) (bottom - (Y))
-
-#define HW_LOCK()
-
-#define HW_UNLOCK()
-
-/* ================================================================
- * Color buffer
- */
-
-/* 16 bit, RGB565 color spanline and pixel functions
- */
-#define SPANTMP_PIXEL_FMT GL_RGB
-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
-
-#define TAG(x) radeon##x##_RGB565
-#define TAG2(x,y) radeon##x##_RGB565##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
-#include "spantmp2.h"
-
-/* 32 bit, ARGB8888 color spanline and pixel functions
- */
-#define SPANTMP_PIXEL_FMT GL_BGRA
-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
-
-#define TAG(x) radeon##x##_ARGB8888
-#define TAG2(x,y) radeon##x##_ARGB8888##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
-#include "spantmp2.h"
-
-/* ================================================================
- * Depth buffer
- */
-
-/* The Radeon family has depth tiling on all the time, so we have to convert
- * the x,y coordinates into the memory bus address (mba) in the same
- * manner as the engine. In each case, the linear block address (ba)
- * is calculated, and then wired with x and y to produce the final
- * memory address.
- * The chip will do address translation on its own if the surface registers
- * are set up correctly. It is not quite enough to get it working with hyperz
- * too...
- */
-
-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
-{
- GLuint pitch = drb->pitch;
- if (drb->depthHasSurface) {
- return 4 * (x + y * pitch);
- } else {
- GLuint ba, address = 0; /* a[0..1] = 0 */
-
-#ifdef COMPILE_R300
- ba = (y / 8) * (pitch / 8) + (x / 8);
-#else
- ba = (y / 16) * (pitch / 16) + (x / 16);
-#endif
-
- address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */
- address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */
- address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */
- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
-
- address |= (y & 0x8) << 7; /* a[10] = y[3] */
- address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */
- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
-
- return address;
- }
-}
-
-static INLINE GLuint
-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
-{
- GLuint pitch = drb->pitch;
- if (drb->depthHasSurface) {
- return 2 * (x + y * pitch);
- } else {
- GLuint ba, address = 0; /* a[0] = 0 */
-
- ba = (y / 16) * (pitch / 32) + (x / 32);
-
- address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */
- address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */
- address |= (x & 0x8) << 4; /* a[7] = x[3] */
- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
- address |= (y & 0x8) << 7; /* a[10] = y[3] */
- address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */
- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
-
- return address;
- }
-}
-
-/* 16-bit depth buffer functions
- */
-#define VALUE_TYPE GLushort
-
-#define WRITE_DEPTH( _x, _y, d ) \
- *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
-
-#define READ_DEPTH( d, _x, _y ) \
- d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
-
-#define TAG(x) radeon##x##_z16
-#include "depthtmp.h"
-
-/* 24 bit depth, 8 bit stencil depthbuffer functions
- *
- * Careful: It looks like the R300 uses ZZZS byte order while the R200
- * uses SZZZ for 24 bit depth, 8 bit stencil mode.
- */
-#define VALUE_TYPE GLuint
-
-#ifdef COMPILE_R300
-#define WRITE_DEPTH( _x, _y, d ) \
-do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
- tmp &= 0x000000ff; \
- tmp |= ((d << 8) & 0xffffff00); \
- *(GLuint *)(buf + offset) = tmp; \
-} while (0)
-#else
-#define WRITE_DEPTH( _x, _y, d ) \
-do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
- tmp &= 0xff000000; \
- tmp |= ((d) & 0x00ffffff); \
- *(GLuint *)(buf + offset) = tmp; \
-} while (0)
-#endif
-
-#ifdef COMPILE_R300
-#define READ_DEPTH( d, _x, _y ) \
- do { \
- d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
- _y + yo )) & 0xffffff00) >> 8; \
- }while(0)
-#else
-#define READ_DEPTH( d, _x, _y ) \
- d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
- _y + yo )) & 0x00ffffff;
-#endif
-
-#define TAG(x) radeon##x##_z24_s8
-#include "depthtmp.h"
-
-/* ================================================================
- * Stencil buffer
- */
-
-/* 24 bit depth, 8 bit stencil depthbuffer functions
- */
-#ifdef COMPILE_R300
-#define WRITE_STENCIL( _x, _y, d ) \
-do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
- tmp &= 0xffffff00; \
- tmp |= (d) & 0xff; \
- *(GLuint *)(buf + offset) = tmp; \
-} while (0)
-#else
-#define WRITE_STENCIL( _x, _y, d ) \
-do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
- tmp &= 0x00ffffff; \
- tmp |= (((d) & 0xff) << 24); \
- *(GLuint *)(buf + offset) = tmp; \
-} while (0)
-#endif
-
-#ifdef COMPILE_R300
-#define READ_STENCIL( d, _x, _y ) \
-do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
- d = tmp & 0x000000ff; \
-} while (0)
-#else
-#define READ_STENCIL( d, _x, _y ) \
-do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
- d = (tmp & 0xff000000) >> 24; \
-} while (0)
-#endif
-
-#define TAG(x) radeon##x##_z24_s8
-#include "stenciltmp.h"
-
-/* Move locking out to get reasonable span performance (10x better
- * than doing this in HW_LOCK above). WaitForIdle() is the main
- * culprit.
- */
-
-static void radeonSpanRenderStart(GLcontext * ctx)
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-#ifdef COMPILE_R300
- r300ContextPtr r300 = (r300ContextPtr) rmesa;
- R300_FIREVERTICES(r300);
-#else
- RADEON_FIREVERTICES(rmesa);
-#endif
- LOCK_HARDWARE(rmesa);
- radeonWaitForIdleLocked(rmesa);
-
- /* Read the first pixel in the frame buffer. This should
- * be a noop, right? In fact without this conform fails as reading
- * from the framebuffer sometimes produces old results -- the
- * on-card read cache gets mixed up and doesn't notice that the
- * framebuffer has been updated.
- *
- * Note that we should probably be reading some otherwise unused
- * region of VRAM, otherwise we might get incorrect results when
- * reading pixels from the top left of the screen.
- *
- * I found this problem on an R420 with glean's texCube test.
- * Note that the R200 span code also *writes* the first pixel in the
- * framebuffer, but I've found this to be unnecessary.
- * -- Nicolai Hähnle, June 2008
- */
- {
- int p;
- driRenderbuffer *drb =
- (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
- volatile int *buf =
- (volatile int *)(rmesa->dri.screen->pFB + drb->offset);
- p = *buf;
- }
-}
-
-static void radeonSpanRenderFinish(GLcontext * ctx)
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- _swrast_flush(ctx);
- UNLOCK_HARDWARE(rmesa);
-}
-
-void radeonInitSpanFuncs(GLcontext * ctx)
-{
- struct swrast_device_driver *swdd =
- _swrast_GetDeviceDriverReference(ctx);
- swdd->SpanRenderStart = radeonSpanRenderStart;
- swdd->SpanRenderFinish = radeonSpanRenderFinish;
-}
-
-/**
- * Plug in the Get/Put routines for the given driRenderbuffer.
- */
-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
-{
- if (drb->Base.InternalFormat == GL_RGBA) {
- if (vis->redBits == 5 && vis->greenBits == 6
- && vis->blueBits == 5) {
- radeonInitPointers_RGB565(&drb->Base);
- } else {
- radeonInitPointers_ARGB8888(&drb->Base);
- }
- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
- radeonInitDepthPointers_z16(&drb->Base);
- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
- radeonInitDepthPointers_z24_s8(&drb->Base);
- } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
- radeonInitStencilPointers_z24_s8(&drb->Base);
- }
-}
diff --git a/r300/radeon_state.c b/r300/radeon_state.c
deleted file mode 100644
index c401da6..0000000
--- a/r300/radeon_state.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/**************************************************************************
-
-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
-
-The Weather Channel (TM) funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86 license.
-This notice must be preserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "main/glheader.h"
-#include "main/imports.h"
-#include "main/api_arrayelt.h"
-#include "main/enums.h"
-#include "main/framebuffer.h"
-#include "main/colormac.h"
-#include "main/light.h"
-
-#include "swrast/swrast.h"
-#include "vbo/vbo.h"
-#include "tnl/tnl.h"
-#include "tnl/t_pipeline.h"
-#include "swrast_setup/swrast_setup.h"
-
-#include "radeon_ioctl.h"
-#include "radeon_state.h"
-#include "r300_ioctl.h"
-
-
-/* =============================================================
- * Scissoring
- */
-
-static GLboolean intersect_rect(drm_clip_rect_t * out,
- drm_clip_rect_t * a, drm_clip_rect_t * b)
-{
- *out = *a;
- if (b->x1 > out->x1)
- out->x1 = b->x1;
- if (b->y1 > out->y1)
- out->y1 = b->y1;
- if (b->x2 < out->x2)
- out->x2 = b->x2;
- if (b->y2 < out->y2)
- out->y2 = b->y2;
- if (out->x1 >= out->x2)
- return GL_FALSE;
- if (out->y1 >= out->y2)
- return GL_FALSE;
- return GL_TRUE;
-}
-
-void radeonRecalcScissorRects(radeonContextPtr radeon)
-{
- drm_clip_rect_t *out;
- int i;
-
- /* Grow cliprect store?
- */
- if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) {
- while (radeon->state.scissor.numAllocedClipRects <
- radeon->numClipRects) {
- radeon->state.scissor.numAllocedClipRects += 1; /* zero case */
- radeon->state.scissor.numAllocedClipRects *= 2;
- }
-
- if (radeon->state.scissor.pClipRects)
- FREE(radeon->state.scissor.pClipRects);
-
- radeon->state.scissor.pClipRects =
- MALLOC(radeon->state.scissor.numAllocedClipRects *
- sizeof(drm_clip_rect_t));
-
- if (radeon->state.scissor.pClipRects == NULL) {
- radeon->state.scissor.numAllocedClipRects = 0;
- return;
- }
- }
-
- out = radeon->state.scissor.pClipRects;
- radeon->state.scissor.numClipRects = 0;
-
- for (i = 0; i < radeon->numClipRects; i++) {
- if (intersect_rect(out,
- &radeon->pClipRects[i],
- &radeon->state.scissor.rect)) {
- radeon->state.scissor.numClipRects++;
- out++;
- }
- }
-}
-
-void radeonUpdateScissor(GLcontext* ctx)
-{
- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-
- if (radeon->dri.drawable) {
- __DRIdrawablePrivate *dPriv = radeon->dri.drawable;
- int x1 = dPriv->x + ctx->Scissor.X;
- int y1 = dPriv->y + dPriv->h - (ctx->Scissor.Y + ctx->Scissor.Height);
-
- radeon->state.scissor.rect.x1 = x1;
- radeon->state.scissor.rect.y1 = y1;
- radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width;
- radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height;
-
- radeonRecalcScissorRects(radeon);
- }
-}
-
-static void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
-{
- if (ctx->Scissor.Enabled) {
- /* We don't pipeline cliprect changes */
- r300Flush(ctx);
- radeonUpdateScissor(ctx);
- }
-}
-
-
-/**
- * Update cliprects and scissors.
- */
-void radeonSetCliprects(radeonContextPtr radeon)
-{
- __DRIdrawablePrivate *const drawable = radeon->dri.drawable;
- __DRIdrawablePrivate *const readable = radeon->dri.readable;
- GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate;
- GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate;
-
- if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
- /* Can't ignore 2d windows if we are page flipping. */
- if (drawable->numBackClipRects == 0 || radeon->doPageFlip ||
- radeon->sarea->pfCurrentPage == 1) {
- radeon->numClipRects = drawable->numClipRects;
- radeon->pClipRects = drawable->pClipRects;
- } else {
- radeon->numClipRects = drawable->numBackClipRects;
- radeon->pClipRects = drawable->pBackClipRects;
- }
- } else {
- /* front buffer (or none, or multiple buffers */
- radeon->numClipRects = drawable->numClipRects;
- radeon->pClipRects = drawable->pClipRects;
- }
-
- if ((draw_fb->Width != drawable->w) ||
- (draw_fb->Height != drawable->h)) {
- _mesa_resize_framebuffer(radeon->glCtx, draw_fb,
- drawable->w, drawable->h);
- draw_fb->Initialized = GL_TRUE;
- }
-
- if (drawable != readable) {
- if ((read_fb->Width != readable->w) ||
- (read_fb->Height != readable->h)) {
- _mesa_resize_framebuffer(radeon->glCtx, read_fb,
- readable->w, readable->h);
- read_fb->Initialized = GL_TRUE;
- }
- }
-
- if (radeon->state.scissor.enabled)
- radeonRecalcScissorRects(radeon);
-
- radeon->lastStamp = drawable->lastStamp;
-}
-
-
-/**
- * Handle common enable bits.
- * Called as a fallback by r200Enable/r300Enable.
- */
-void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state)
-{
- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-
- switch(cap) {
- case GL_SCISSOR_TEST:
- /* We don't pipeline cliprect & scissor changes */
- r300Flush(ctx);
-
- radeon->state.scissor.enabled = state;
- radeonUpdateScissor(ctx);
- break;
-
- default:
- return;
- }
-}
-
-
-/**
- * Initialize default state.
- * This function is called once at context init time from
- * r200InitState/r300InitState
- */
-void radeonInitState(radeonContextPtr radeon)
-{
- radeon->Fallback = 0;
-
- if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) {
- radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
- radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
- } else {
- radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
- radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
- }
-}
-
-
-/**
- * Initialize common state functions.
- * Called by r200InitStateFuncs/r300InitStateFuncs
- */
-void radeonInitStateFuncs(struct dd_function_table *functions)
-{
- functions->Scissor = radeonScissor;
-}
diff --git a/r300/radeon_state.h b/r300/radeon_state.h
deleted file mode 100644
index 821cb40..0000000
--- a/r300/radeon_state.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-Copyright (C) 2004 Nicolai Haehnle. All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Nicolai Haehnle <prefect_@gmx.net>
- */
-
-#ifndef __RADEON_STATE_H__
-#define __RADEON_STATE_H__
-
-extern void radeonRecalcScissorRects(radeonContextPtr radeon);
-extern void radeonSetCliprects(radeonContextPtr radeon);
-extern void radeonUpdateScissor(GLcontext* ctx);
-
-extern void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state);
-
-extern void radeonInitState(radeonContextPtr radeon);
-extern void radeonInitStateFuncs(struct dd_function_table* functions);
-
-#endif
diff --git a/r600/Lindent b/r600/Lindent
new file mode 100755
index 0000000..7d8d889
--- /dev/null
+++ b/r600/Lindent
@@ -0,0 +1,2 @@
+#!/bin/sh
+indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs "$@"
diff --git a/r600/Makefile.am b/r600/Makefile.am
new file mode 100644
index 0000000..6b2091e
--- /dev/null
+++ b/r600/Makefile.am
@@ -0,0 +1,48 @@
+AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
+
+R600_CFLAGS = -DCOMPILE_R600 -DR200_MERGED=0 -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R600
+R600_CFLAGS += -I../radeon -I../radeon/server
+
+r600_dri_la_LTLIBRARIES = r600_dri.la
+r600_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(R600_CFLAGS)
+r600_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \
+ $(DRM_LIBS) $(DRI_LIBS)
+r600_dri_ladir = @libdir@/dri
+r600_dri_la_SOURCES = \
+ ../radeon/radeon_bo_legacy.c \
+ ../radeon/radeon_common_context.c \
+ ../radeon/radeon_common.c \
+ ../radeon/radeon_cs_legacy.c \
+ ../radeon/radeon_dma.c \
+ ../radeon/radeon_debug.c \
+ ../radeon/radeon_fbo.c \
+ ../radeon/radeon_lock.c \
+ ../radeon/radeon_mipmap_tree.c \
+ ../radeon/radeon_span.c \
+ ../radeon/radeon_texture.c \
+ ../radeon/radeon_queryobj.c \
+ ../radeon/radeon_screen.c \
+ r600_context.c \
+ r600_cmdbuf.c \
+ r600_emit.c \
+ r700_assembler.c \
+ r700_fragprog.c \
+ r700_vertprog.c \
+ r700_shader.c \
+ r700_shaderinst.c \
+ r700_ioctl.c \
+ r700_oglprog.c \
+ r700_chip.c \
+ r700_state.c \
+ r700_clear.c \
+ r700_render.c \
+ r600_tex.c \
+ r600_texstate.c \
+ r700_debug.c
+
+if HAVE_LIBDRM_RADEON
+r600_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
+r600_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
+r600_dri_la_SOURCES += \
+ ../radeon/radeon_cs_space_drm.c
+endif
diff --git a/r600/defaultendian.h b/r600/defaultendian.h
new file mode 100644
index 0000000..32caf32
--- /dev/null
+++ b/r600/defaultendian.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef _DEFINEENDIAN_H_
+#define _DEFINEENDIAN_H_
+
+//We have to choose a reg bits orientation if there is no compile flag for it.
+#if defined(LITTLEENDIAN_CPU)
+#elif defined(BIGENDIAN_CPU)
+#else
+#define LITTLEENDIAN_CPU
+#endif
+
+#endif //_DEFINEENDIAN_H_
diff --git a/r600/r600_cmdbuf.c b/r600/r600_cmdbuf.c
new file mode 100644
index 0000000..3cfe03a
--- /dev/null
+++ b/r600/r600_cmdbuf.c
@@ -0,0 +1,514 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * Mostly coppied from \radeon\radeon_cs_legacy.c
+ */
+
+#include <errno.h>
+
+#include "main/glheader.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "swrast/swrast.h"
+
+#include "drm.h"
+#include "radeon_drm.h"
+
+#include "r600_context.h"
+#include "radeon_reg.h"
+#include "r600_cmdbuf.h"
+#include "r600_emit.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_mipmap_tree.h"
+#include "radeon_reg.h"
+
+
+
+static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm,
+ uint32_t ndw)
+{
+ struct radeon_cs *cs;
+
+ cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
+ if (cs == NULL) {
+ return NULL;
+ }
+ cs->csm = csm;
+ cs->ndw = (ndw + 0x3FF) & (~0x3FF);
+ cs->packets = (uint32_t*)malloc(4*cs->ndw);
+ if (cs->packets == NULL) {
+ free(cs);
+ return NULL;
+ }
+ cs->relocs_total_size = 0;
+ return cs;
+}
+
+static int r600_cs_write_reloc(struct radeon_cs *cs,
+ struct radeon_bo *bo,
+ uint32_t read_domain,
+ uint32_t write_domain,
+ uint32_t flags)
+{
+ struct r600_cs_reloc_legacy *relocs;
+ int i;
+
+ relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
+ /* check domains */
+ if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
+ /* in one CS a bo can only be in read or write domain but not
+ * in read & write domain at the same sime
+ */
+ return -EINVAL;
+ }
+ if (read_domain == RADEON_GEM_DOMAIN_CPU) {
+ return -EINVAL;
+ }
+ if (write_domain == RADEON_GEM_DOMAIN_CPU) {
+ return -EINVAL;
+ }
+ /* check if bo is already referenced */
+ for(i = 0; i < cs->crelocs; i++) {
+ uint32_t *indices;
+ uint32_t *reloc_indices;
+
+ if (relocs[i].base.bo->handle == bo->handle) {
+ /* Check domains must be in read or write. As we check already
+ * checked that in argument one of the read or write domain was
+ * set we only need to check that if previous reloc as the read
+ * domain set then the read_domain should also be set for this
+ * new relocation.
+ */
+ if (relocs[i].base.read_domain && !read_domain) {
+ return -EINVAL;
+ }
+ if (relocs[i].base.write_domain && !write_domain) {
+ return -EINVAL;
+ }
+ relocs[i].base.read_domain |= read_domain;
+ relocs[i].base.write_domain |= write_domain;
+ /* save indice */
+ relocs[i].cindices++;
+ indices = (uint32_t*)realloc(relocs[i].indices,
+ relocs[i].cindices * 4);
+ reloc_indices = (uint32_t*)realloc(relocs[i].reloc_indices,
+ relocs[i].cindices * 4);
+ if ( (indices == NULL) || (reloc_indices == NULL) ) {
+ relocs[i].cindices -= 1;
+ return -ENOMEM;
+ }
+ relocs[i].indices = indices;
+ relocs[i].reloc_indices = reloc_indices;
+ relocs[i].indices[relocs[i].cindices - 1] = cs->cdw;
+ relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->cdw;
+ cs->section_cdw += 2;
+ cs->cdw += 2;
+
+ return 0;
+ }
+ }
+ /* add bo to reloc */
+ relocs = (struct r600_cs_reloc_legacy*)
+ realloc(cs->relocs,
+ sizeof(struct r600_cs_reloc_legacy) * (cs->crelocs + 1));
+ if (relocs == NULL) {
+ return -ENOMEM;
+ }
+ cs->relocs = relocs;
+ relocs[cs->crelocs].base.bo = bo;
+ relocs[cs->crelocs].base.read_domain = read_domain;
+ relocs[cs->crelocs].base.write_domain = write_domain;
+ relocs[cs->crelocs].base.flags = flags;
+ relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
+ relocs[cs->crelocs].reloc_indices = (uint32_t*)malloc(4);
+ if ( (relocs[cs->crelocs].indices == NULL) || (relocs[cs->crelocs].reloc_indices == NULL) )
+ {
+ return -ENOMEM;
+ }
+
+ relocs[cs->crelocs].indices[0] = cs->cdw;
+ relocs[cs->crelocs].reloc_indices[0] = cs->cdw;
+ cs->section_cdw += 2;
+ cs->cdw += 2;
+ relocs[cs->crelocs].cindices = 1;
+ cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
+ cs->crelocs++;
+
+ radeon_bo_ref(bo);
+
+ return 0;
+}
+
+static int r600_cs_begin(struct radeon_cs *cs,
+ uint32_t ndw,
+ const char *file,
+ const char *func,
+ int line)
+{
+ if (cs->section) {
+ fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
+ cs->section_file, cs->section_func, cs->section_line);
+ fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
+ file, func, line);
+ return -EPIPE;
+ }
+
+ cs->section = 1;
+ cs->section_ndw = ndw;
+ cs->section_cdw = 0;
+ cs->section_file = file;
+ cs->section_func = func;
+ cs->section_line = line;
+
+ if (cs->cdw + ndw > cs->ndw) {
+ uint32_t tmp, *ptr;
+ int num = (ndw > 0x400) ? ndw : 0x400;
+
+ tmp = (cs->cdw + num + 0x3FF) & (~0x3FF);
+ ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+ cs->packets = ptr;
+ cs->ndw = tmp;
+ }
+
+ return 0;
+}
+
+static int r600_cs_end(struct radeon_cs *cs,
+ const char *file,
+ const char *func,
+ int line)
+
+{
+ if (!cs->section) {
+ fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
+ file, func, line);
+ return -EPIPE;
+ }
+ cs->section = 0;
+
+ if ( cs->section_ndw != cs->section_cdw ) {
+ fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
+ cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
+ fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n",
+ cs->section_ndw, cs->cdw, cs->section_cdw);
+ fprintf(stderr, "CS section end at (%s,%s,%d)\n",
+ file, func, line);
+ return -EPIPE;
+ }
+
+ if (cs->cdw > cs->ndw) {
+ fprintf(stderr, "CS section overflow at (%s,%s,%d) cdw %d ndw %d\n",
+ cs->section_file, cs->section_func, cs->section_line,cs->cdw,cs->ndw);
+ fprintf(stderr, "CS section end at (%s,%s,%d)\n",
+ file, func, line);
+ assert(0);
+ }
+
+ return 0;
+}
+
+static int r600_cs_process_relocs(struct radeon_cs *cs,
+ uint32_t * reloc_chunk,
+ uint32_t * length_dw_reloc_chunk)
+{
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct r600_cs_reloc_legacy *relocs;
+ int i, j, r;
+
+ uint32_t offset_dw = 0;
+
+ csm = (struct r600_cs_manager_legacy*)cs->csm;
+ relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
+restart:
+ for (i = 0; i < cs->crelocs; i++) {
+ uint32_t soffset, eoffset, asicoffset;
+
+ r = radeon_bo_legacy_validate(relocs[i].base.bo,
+ &soffset, &eoffset);
+ if (r == -EAGAIN) {
+ goto restart;
+ }
+ if (r) {
+ fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
+ relocs[i].base.bo, soffset, eoffset);
+ return r;
+ }
+ asicoffset = soffset;
+
+ for (j = 0; j < relocs[i].cindices; j++) {
+ if (asicoffset >= eoffset) {
+ /* radeon_bo_debug(relocs[i].base.bo, 12); */
+ fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
+ relocs[i].base.bo, soffset, eoffset);
+ fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
+ relocs[i].base.bo,
+ cs->packets[relocs[i].indices[j]],
+ eoffset);
+ exit(0);
+ return -EINVAL;
+ }
+ /* pkt3 nop header in ib chunk */
+ cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
+ /* reloc index in ib chunk */
+ cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
+ }
+
+ /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */
+ reloc_chunk[offset_dw] = asicoffset;
+ reloc_chunk[offset_dw + 3] = 0;
+
+ offset_dw += 4;
+ }
+
+ *length_dw_reloc_chunk = offset_dw;
+
+ return 0;
+}
+
+static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
+{
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct r600_cs_reloc_legacy *relocs;
+ int i;
+
+ relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
+ for (i = 0; i < cs->crelocs; i++) {
+ radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
+ radeon_bo_unref(relocs[i].base.bo);
+ }
+ return 0;
+}
+
+#if 0
+static void dump_cmdbuf(struct radeon_cs *cs)
+{
+ int i;
+ fprintf(stderr,"--start--\n");
+ for (i = 0; i < cs->cdw; i++){
+ fprintf(stderr,"0x%08x\n", cs->packets[i]);
+ }
+ fprintf(stderr,"--end--\n");
+
+}
+#endif
+
+static int r600_cs_emit(struct radeon_cs *cs)
+{
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct drm_radeon_cs cs_cmd;
+ struct drm_radeon_cs_chunk cs_chunk[2];
+ uint32_t length_dw_reloc_chunk;
+ uint64_t chunk_ptrs[2];
+ uint32_t *reloc_chunk;
+ int r;
+ int retry = 0;
+
+ /* TODO : put chip level things here if need. */
+ /* csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); */
+
+ csm->pending_count = 1;
+
+ reloc_chunk = (uint32_t*)calloc(1, cs->crelocs * 4 * 4);
+
+ r = r600_cs_process_relocs(cs, reloc_chunk, &length_dw_reloc_chunk);
+ if (r) {
+ free(reloc_chunk);
+ return 0;
+ }
+
+ /* raw ib chunk */
+ cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB;
+ cs_chunk[0].length_dw = cs->cdw;
+ cs_chunk[0].chunk_data = (unsigned long)(cs->packets);
+
+ /* reloc chaunk */
+ cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
+ cs_chunk[1].length_dw = length_dw_reloc_chunk;
+ cs_chunk[1].chunk_data = (unsigned long)reloc_chunk;
+
+ chunk_ptrs[0] = (uint64_t)(unsigned long)&(cs_chunk[0]);
+ chunk_ptrs[1] = (uint64_t)(unsigned long)&(cs_chunk[1]);
+
+ cs_cmd.num_chunks = 2;
+ /* cs_cmd.cs_id = 0; */
+ cs_cmd.chunks = (uint64_t)(unsigned long)chunk_ptrs;
+
+ //dump_cmdbuf(cs);
+
+ do
+ {
+ r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
+ retry++;
+ } while (r == -EAGAIN && retry < 1000);
+
+ if (r) {
+ free(reloc_chunk);
+ return r;
+ }
+
+ csm->pending_age = cs_cmd.cs_id;
+
+ r600_cs_set_age(cs);
+
+ cs->csm->read_used = 0;
+ cs->csm->vram_write_used = 0;
+ cs->csm->gart_write_used = 0;
+
+ free(reloc_chunk);
+
+ return 0;
+}
+
+static void inline r600_cs_free_reloc(void *relocs_p, int crelocs)
+{
+ struct r600_cs_reloc_legacy *relocs = relocs_p;
+ int i;
+ if (!relocs_p)
+ return;
+ for (i = 0; i < crelocs; i++)
+ {
+ free(relocs[i].indices);
+ free(relocs[i].reloc_indices);
+ }
+}
+
+static int r600_cs_destroy(struct radeon_cs *cs)
+{
+ r600_cs_free_reloc(cs->relocs, cs->crelocs);
+ free(cs->relocs);
+ free(cs->packets);
+ free(cs);
+ return 0;
+}
+
+static int r600_cs_erase(struct radeon_cs *cs)
+{
+ r600_cs_free_reloc(cs->relocs, cs->crelocs);
+ free(cs->relocs);
+ cs->relocs_total_size = 0;
+ cs->relocs = NULL;
+ cs->crelocs = 0;
+ cs->cdw = 0;
+ cs->section = 0;
+ return 0;
+}
+
+static int r600_cs_need_flush(struct radeon_cs *cs)
+{
+ /* this function used to flush when the BO usage got to
+ * a certain size, now the higher levels handle this better */
+ return 0;
+}
+
+static void r600_cs_print(struct radeon_cs *cs, FILE *file)
+{
+}
+
+static struct radeon_cs_funcs r600_cs_funcs = {
+ r600_cs_create,
+ r600_cs_write_reloc,
+ r600_cs_begin,
+ r600_cs_end,
+ r600_cs_emit,
+ r600_cs_destroy,
+ r600_cs_erase,
+ r600_cs_need_flush,
+ r600_cs_print
+};
+
+struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
+{
+ struct r600_cs_manager_legacy *csm;
+
+ csm = (struct r600_cs_manager_legacy*)
+ calloc(1, sizeof(struct r600_cs_manager_legacy));
+ if (csm == NULL) {
+ return NULL;
+ }
+ csm->base.funcs = &r600_cs_funcs;
+ csm->base.fd = ctx->dri.fd;
+ csm->ctx = ctx;
+ csm->pending_age = 1;
+ return (struct radeon_cs_manager*)csm;
+}
+
+void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */
+{
+ radeonContextPtr rmesa = &r600->radeon;
+ GLuint size;
+
+ r600InitAtoms(r600);
+
+ /* Initialize command buffer */
+ size = 256 * driQueryOptioni(&rmesa->optionCache,
+ "command_buffer_size");
+ if (size < 2 * rmesa->hw.max_state_size) {
+ size = 2 * rmesa->hw.max_state_size + 65535;
+ }
+ if (size > 64 * 256)
+ size = 64 * 256;
+
+ if (rmesa->radeonScreen->kernel_mm) {
+ int fd = rmesa->radeonScreen->driScreen->fd;
+ rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
+ } else {
+ rmesa->cmdbuf.csm = r600_radeon_cs_manager_legacy_ctor(rmesa);
+ }
+ if (rmesa->cmdbuf.csm == NULL) {
+ /* FIXME: fatal error */
+ return;
+ }
+ rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
+ assert(rmesa->cmdbuf.cs != NULL);
+ rmesa->cmdbuf.size = size;
+
+ radeon_cs_space_set_flush(rmesa->cmdbuf.cs,
+ (void (*)(void *))rmesa->glCtx->Driver.Flush, rmesa->glCtx);
+
+ if (!rmesa->radeonScreen->kernel_mm) {
+ radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
+ radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
+ } else {
+ struct drm_radeon_gem_info mminfo;
+
+ if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
+ {
+ radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible);
+ radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
+ }
+ }
+}
+
diff --git a/r600/r600_cmdbuf.h b/r600/r600_cmdbuf.h
new file mode 100644
index 0000000..eba43d3
--- /dev/null
+++ b/r600/r600_cmdbuf.h
@@ -0,0 +1,212 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R600_CMDBUF_H__
+#define __R600_CMDBUF_H__
+
+#include "r600_context.h"
+#include "r600_emit.h"
+
+#define RADEON_CP_PACKET3_NOP 0xC0001000
+#define RADEON_CP_PACKET3_NEXT_CHAR 0xC0001900
+#define RADEON_CP_PACKET3_PLY_NEXTSCAN 0xC0001D00
+#define RADEON_CP_PACKET3_SET_SCISSORS 0xC0001E00
+#define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM 0xC0002300
+#define RADEON_CP_PACKET3_LOAD_MICROCODE 0xC0002400
+#define RADEON_CP_PACKET3_WAIT_FOR_IDLE 0xC0002600
+#define RADEON_CP_PACKET3_3D_DRAW_VBUF 0xC0002800
+#define RADEON_CP_PACKET3_3D_DRAW_IMMD 0xC0002900
+#define RADEON_CP_PACKET3_3D_DRAW_INDX 0xC0002A00
+#define RADEON_CP_PACKET3_LOAD_PALETTE 0xC0002C00
+#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00
+#define RADEON_CP_PACKET3_CNTL_PAINT 0xC0009100
+#define RADEON_CP_PACKET3_CNTL_BITBLT 0xC0009200
+#define RADEON_CP_PACKET3_CNTL_SMALLTEXT 0xC0009300
+#define RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT 0xC0009400
+#define RADEON_CP_PACKET3_CNTL_POLYLINE 0xC0009500
+#define RADEON_CP_PACKET3_CNTL_POLYSCANLINES 0xC0009800
+#define RADEON_CP_PACKET3_CNTL_PAINT_MULTI 0xC0009A00
+#define RADEON_CP_PACKET3_CNTL_BITBLT_MULTI 0xC0009B00
+#define RADEON_CP_PACKET3_CNTL_TRANS_BITBLT 0xC0009C00
+
+/* r6xx/r7xx packet 3 type offsets */
+#define R600_SET_CONFIG_REG_OFFSET 0x00008000
+#define R600_SET_CONFIG_REG_END 0x0000ac00
+#define R600_SET_CONTEXT_REG_OFFSET 0x00028000
+#define R600_SET_CONTEXT_REG_END 0x00029000
+#define R600_SET_ALU_CONST_OFFSET 0x00030000
+#define R600_SET_ALU_CONST_END 0x00032000
+#define R600_SET_RESOURCE_OFFSET 0x00038000
+#define R600_SET_RESOURCE_END 0x0003c000
+#define R600_SET_SAMPLER_OFFSET 0x0003c000
+#define R600_SET_SAMPLER_END 0x0003cff0
+#define R600_SET_CTL_CONST_OFFSET 0x0003cff0
+#define R600_SET_CTL_CONST_END 0x0003e200
+#define R600_SET_LOOP_CONST_OFFSET 0x0003e200
+#define R600_SET_LOOP_CONST_END 0x0003e380
+#define R600_SET_BOOL_CONST_OFFSET 0x0003e380
+#define R600_SET_BOOL_CONST_END 0x00040000
+
+/* r6xx/r7xx packet 3 types */
+#define R600_IT_INDIRECT_BUFFER_END 0x00001700
+#define R600_IT_SET_PREDICATION 0x00002000
+#define R600_IT_REG_RMW 0x00002100
+#define R600_IT_COND_EXEC 0x00002200
+#define R600_IT_PRED_EXEC 0x00002300
+#define R600_IT_START_3D_CMDBUF 0x00002400
+#define R600_IT_DRAW_INDEX_2 0x00002700
+#define R600_IT_CONTEXT_CONTROL 0x00002800
+#define R600_IT_DRAW_INDEX_IMMD_BE 0x00002900
+#define R600_IT_INDEX_TYPE 0x00002A00
+#define R600_IT_DRAW_INDEX 0x00002B00
+#define R600_IT_DRAW_INDEX_AUTO 0x00002D00
+#define R600_IT_DRAW_INDEX_IMMD 0x00002E00
+#define R600_IT_NUM_INSTANCES 0x00002F00
+#define R600_IT_STRMOUT_BUFFER_UPDATE 0x00003400
+#define R600_IT_INDIRECT_BUFFER_MP 0x00003800
+#define R600_IT_MEM_SEMAPHORE 0x00003900
+#define R600_IT_MPEG_INDEX 0x00003A00
+#define R600_IT_WAIT_REG_MEM 0x00003C00
+#define R600_IT_MEM_WRITE 0x00003D00
+#define R600_IT_INDIRECT_BUFFER 0x00003200
+#define R600_IT_CP_INTERRUPT 0x00004000
+#define R600_IT_SURFACE_SYNC 0x00004300
+#define R600_IT_ME_INITIALIZE 0x00004400
+#define R600_IT_COND_WRITE 0x00004500
+#define R600_IT_EVENT_WRITE 0x00004600
+#define R600_IT_EVENT_WRITE_EOP 0x00004700
+#define R600_IT_ONE_REG_WRITE 0x00005700
+#define R600_IT_SET_CONFIG_REG 0x00006800
+#define R600_IT_SET_CONTEXT_REG 0x00006900
+#define R600_IT_SET_ALU_CONST 0x00006A00
+#define R600_IT_SET_BOOL_CONST 0x00006B00
+#define R600_IT_SET_LOOP_CONST 0x00006C00
+#define R600_IT_SET_RESOURCE 0x00006D00
+#define R600_IT_SET_SAMPLER 0x00006E00
+#define R600_IT_SET_CTL_CONST 0x00006F00
+#define R600_IT_SURFACE_BASE_UPDATE 0x00007300
+
+struct r600_cs_manager_legacy
+{
+ struct radeon_cs_manager base;
+ struct radeon_context *ctx;
+ /* hack for scratch stuff */
+ uint32_t pending_age;
+ uint32_t pending_count;
+};
+
+struct r600_cs_reloc_legacy {
+ struct radeon_cs_reloc base;
+ uint32_t cindices;
+ uint32_t *indices;
+ uint32_t *reloc_indices;
+};
+
+struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
+
+/**
+ * Write one dword to the command buffer.
+ */
+#define R600_OUT_BATCH(data) \
+do { \
+ radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, data); \
+} while(0)
+
+/**
+ * Write n dwords from ptr to the command buffer.
+ */
+#define R600_OUT_BATCH_TABLE(ptr,n) \
+do { \
+ radeon_cs_write_table(b_l_rmesa->cmdbuf.cs, ptr, n); \
+} while(0)
+
+/**
+ * Write a relocated dword to the command buffer.
+ */
+#define R600_OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) \
+ do { \
+ if (0 && offset) { \
+ fprintf(stderr, "(%s:%s:%d) offset : %d\n", \
+ __FILE__, __FUNCTION__, __LINE__, offset); \
+ } \
+ radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, \
+ bo, rd, wd, flags); \
+ } while(0)
+
+/* R600/R700 */
+#define R600_OUT_BATCH_REGS(reg, num) \
+do { \
+ if ((reg) >= R600_SET_CONFIG_REG_OFFSET && (reg) < R600_SET_CONFIG_REG_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_CONFIG_REG_OFFSET) >> 2); \
+ } else if ((reg) >= R600_SET_CONTEXT_REG_OFFSET && (reg) < R600_SET_CONTEXT_REG_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONTEXT_REG, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_CONTEXT_REG_OFFSET) >> 2); \
+ } else if ((reg) >= R600_SET_ALU_CONST_OFFSET && (reg) < R600_SET_ALU_CONST_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_ALU_CONST_OFFSET) >> 2); \
+ } else if ((reg) >= R600_SET_RESOURCE_OFFSET && (reg) < R600_SET_RESOURCE_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_RESOURCE_OFFSET) >> 2); \
+ } else if ((reg) >= R600_SET_SAMPLER_OFFSET && (reg) < R600_SET_SAMPLER_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_SAMPLER_OFFSET) >> 2); \
+ } else if ((reg) >= R600_SET_CTL_CONST_OFFSET && (reg) < R600_SET_CTL_CONST_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_CTL_CONST_OFFSET) >> 2); \
+ } else if ((reg) >= R600_SET_LOOP_CONST_OFFSET && (reg) < R600_SET_LOOP_CONST_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_LOOP_CONST, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_LOOP_CONST_OFFSET) >> 2); \
+ } else if ((reg) >= R600_SET_BOOL_CONST_OFFSET && (reg) < R600_SET_BOOL_CONST_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_BOOL_CONST, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_BOOL_CONST_OFFSET) >> 2); \
+ } else { \
+ R600_OUT_BATCH(CP_PACKET0((reg), (num))); \
+ } \
+} while (0)
+
+/** Single register write to command buffer; requires 3 dwords for most things. */
+#define R600_OUT_BATCH_REGVAL(reg, val) \
+ R600_OUT_BATCH_REGS((reg), 1); \
+ R600_OUT_BATCH((val))
+
+/** Continuous register range write to command buffer; requires 1 dword,
+ * expects count dwords afterwards for register contents. */
+#define R600_OUT_BATCH_REGSEQ(reg, count) \
+ R600_OUT_BATCH_REGS((reg), (count))
+
+extern void r600InitCmdBuf(context_t *r600);
+
+#endif /* __R600_CMDBUF_H__ */
diff --git a/r600/r600_context.c b/r600/r600_context.c
new file mode 100644
index 0000000..e0b77d4
--- /dev/null
+++ b/r600/r600_context.c
@@ -0,0 +1,394 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "main/glheader.h"
+#include "main/api_arrayelt.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/matrix.h"
+#include "main/extensions.h"
+#include "main/state.h"
+#include "main/bufferobj.h"
+#include "main/texobj.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vp_build.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "radeon_debug.h"
+#include "r600_context.h"
+#include "radeon_common_context.h"
+#include "radeon_span.h"
+#include "r600_cmdbuf.h"
+#include "r600_emit.h"
+#include "radeon_bocs_wrapper.h"
+
+#include "r700_state.h"
+#include "r700_ioctl.h"
+
+
+#include "vblank.h"
+#include "utils.h"
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+
+/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */
+int future_hw_tcl_on = 1;
+int hw_tcl_on = 1;
+
+#define need_GL_VERSION_2_0
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_vertex_program
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_framebuffer_object
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_gpu_program_parameters
+#define need_GL_EXT_secondary_color
+#define need_GL_EXT_stencil_two_side
+#define need_GL_ATI_separate_stencil
+#define need_GL_NV_vertex_program
+
+#include "extension_helper.h"
+
+extern const struct tnl_pipeline_stage *r700_pipeline[];
+
+const struct dri_extension card_extensions[] = {
+ /* *INDENT-OFF* */
+ {"GL_ARB_depth_texture", NULL},
+ {"GL_ARB_fragment_program", NULL},
+ {"GL_ARB_multitexture", NULL},
+ {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
+ {"GL_ARB_shadow", NULL},
+ {"GL_ARB_shadow_ambient", NULL},
+ {"GL_ARB_texture_border_clamp", NULL},
+ {"GL_ARB_texture_cube_map", NULL},
+ {"GL_ARB_texture_env_add", NULL},
+ {"GL_ARB_texture_env_combine", NULL},
+ {"GL_ARB_texture_env_crossbar", NULL},
+ {"GL_ARB_texture_env_dot3", NULL},
+ {"GL_ARB_texture_mirrored_repeat", NULL},
+ {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
+ {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
+ {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
+ {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
+ {"GL_EXT_blend_subtract", NULL},
+ {"GL_EXT_packed_depth_stencil", NULL},
+ {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+ {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
+ {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
+ {"GL_EXT_shadow_funcs", NULL},
+ {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions},
+ {"GL_EXT_stencil_wrap", NULL},
+ {"GL_EXT_texture_edge_clamp", NULL},
+ {"GL_EXT_texture_env_combine", NULL},
+ {"GL_EXT_texture_env_dot3", NULL},
+ {"GL_EXT_texture_filter_anisotropic", NULL},
+ {"GL_EXT_texture_lod_bias", NULL},
+ {"GL_EXT_texture_mirror_clamp", NULL},
+ {"GL_EXT_texture_rectangle", NULL},
+ {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions},
+ {"GL_ATI_texture_env_combine3", NULL},
+ {"GL_ATI_texture_mirror_once", NULL},
+ {"GL_MESA_pack_invert", NULL},
+ {"GL_MESA_ycbcr_texture", NULL},
+ {"GL_MESAX_texture_float", NULL},
+ {"GL_NV_blend_square", NULL},
+ {"GL_NV_vertex_program", GL_NV_vertex_program_functions},
+ {"GL_SGIS_generate_mipmap", NULL},
+ {NULL, NULL}
+ /* *INDENT-ON* */
+};
+
+
+const struct dri_extension mm_extensions[] = {
+ { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+ { NULL, NULL }
+};
+
+/**
+ * The GL 2.0 functions are needed to make display lists work with
+ * functions added by GL_ATI_separate_stencil.
+ */
+const struct dri_extension gl_20_extension[] = {
+ {"GL_VERSION_2_0", GL_VERSION_2_0_functions },
+};
+
+
+static void r600RunPipeline(GLcontext * ctx)
+{
+ _mesa_lock_context_textures(ctx);
+
+ if (ctx->NewState)
+ _mesa_update_state_locked(ctx);
+
+ _tnl_run_pipeline(ctx);
+ _mesa_unlock_context_textures(ctx);
+}
+
+static void r600_get_lock(radeonContextPtr rmesa)
+{
+ drm_radeon_sarea_t *sarea = rmesa->sarea;
+
+ if (sarea->ctx_owner != rmesa->dri.hwContext) {
+ sarea->ctx_owner = rmesa->dri.hwContext;
+ if (!rmesa->radeonScreen->kernel_mm)
+ radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
+ }
+}
+
+static void r600_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
+{
+ /* please flush pipe do all pending work */
+ /* to be enabled */
+}
+
+static void r600_vtbl_pre_emit_atoms(radeonContextPtr radeon)
+{
+ r700Start3D((context_t *)radeon);
+}
+
+static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ if (mode)
+ context->radeon.Fallback |= bit;
+ else
+ context->radeon.Fallback &= ~bit;
+}
+
+static void r600_init_vtbl(radeonContextPtr radeon)
+{
+ radeon->vtbl.get_lock = r600_get_lock;
+ radeon->vtbl.update_viewport_offset = r700UpdateViewportOffset;
+ radeon->vtbl.emit_cs_header = r600_vtbl_emit_cs_header;
+ radeon->vtbl.swtcl_flush = NULL;
+ radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms;
+ radeon->vtbl.fallback = r600_fallback;
+}
+
+/* Create the device specific rendering context.
+ */
+GLboolean r600CreateContext(const __GLcontextModes * glVisual,
+ __DRIcontextPrivate * driContextPriv,
+ void *sharedContextPrivate)
+{
+ __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+ radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+ struct dd_function_table functions;
+ context_t *r600;
+ GLcontext *ctx;
+
+ assert(glVisual);
+ assert(driContextPriv);
+ assert(screen);
+
+ /* Allocate the R600 context */
+ r600 = (context_t*) CALLOC(sizeof(*r600));
+ if (!r600) {
+ radeon_error("Failed to allocate memory for context.\n");
+ return GL_FALSE;
+ }
+
+ if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
+ hw_tcl_on = future_hw_tcl_on = 0;
+
+ r600_init_vtbl(&r600->radeon);
+ /* Parse configuration files.
+ * Do this here so that initialMaxAnisotropy is set before we create
+ * the default textures.
+ */
+ driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache,
+ screen->driScreen->myNum, "r600");
+
+ r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache,
+ "def_max_anisotropy");
+
+ /* Init default driver functions then plug in our R600-specific functions
+ * (the texture functions are especially important)
+ */
+ _mesa_init_driver_functions(&functions);
+
+ r700InitStateFuncs(&functions);
+ r600InitTextureFuncs(&functions);
+ r700InitShaderFuncs(&functions);
+ r700InitIoctlFuncs(&functions);
+
+ if (!radeonInitContext(&r600->radeon, &functions,
+ glVisual, driContextPriv,
+ sharedContextPrivate)) {
+ radeon_error("Initializing context failed.\n");
+ FREE(r600);
+ return GL_FALSE;
+ }
+
+ /* Init r600 context data */
+ /* Set the maximum texture size small enough that we can guarentee that
+ * all texture units can bind a maximal texture and have them both in
+ * texturable memory at once.
+ */
+
+ ctx = r600->radeon.glCtx;
+
+ ctx->Const.MaxTextureImageUnits =
+ driQueryOptioni(&r600->radeon.optionCache, "texture_image_units");
+ ctx->Const.MaxTextureCoordUnits =
+ driQueryOptioni(&r600->radeon.optionCache, "texture_coord_units");
+ ctx->Const.MaxTextureUnits =
+ MIN2(ctx->Const.MaxTextureImageUnits,
+ ctx->Const.MaxTextureCoordUnits);
+ ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+ ctx->Const.MaxTextureLodBias = 16.0;
+
+ ctx->Const.MaxTextureLevels = 13; /* hw support 14 */
+ ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */
+
+ ctx->Const.MinPointSize = 0x0001 / 8.0;
+ ctx->Const.MinPointSizeAA = 0x0001 / 8.0;
+ ctx->Const.MaxPointSize = 0xffff / 8.0;
+ ctx->Const.MaxPointSizeAA = 0xffff / 8.0;
+
+ ctx->Const.MinLineWidth = 0x0001 / 8.0;
+ ctx->Const.MinLineWidthAA = 0x0001 / 8.0;
+ ctx->Const.MaxLineWidth = 0xffff / 8.0;
+ ctx->Const.MaxLineWidthAA = 0xffff / 8.0;
+
+ /* Needs further modifications */
+#if 0
+ ctx->Const.MaxArrayLockSize =
+ ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
+#endif
+
+ ctx->Const.MaxDrawBuffers = 1;
+
+ /* Initialize the software rasterizer and helper modules.
+ */
+ _swrast_CreateContext(ctx);
+ _vbo_CreateContext(ctx);
+ _tnl_CreateContext(ctx);
+ _swsetup_CreateContext(ctx);
+ _swsetup_Wakeup(ctx);
+ _ae_create_context(ctx);
+
+ /* Install the customized pipeline:
+ */
+ _tnl_destroy_pipeline(ctx);
+ _tnl_install_pipeline(ctx, r700_pipeline);
+
+ /* Try and keep materials and vertices separate:
+ */
+/* _tnl_isolate_materials(ctx, GL_TRUE); */
+
+ /* Configure swrast and TNL to match hardware characteristics:
+ */
+ _swrast_allow_pixel_fog(ctx, GL_FALSE);
+ _swrast_allow_vertex_fog(ctx, GL_TRUE);
+ _tnl_allow_pixel_fog(ctx, GL_FALSE);
+ _tnl_allow_vertex_fog(ctx, GL_TRUE);
+
+ /* 256 for reg-based consts, inline consts also supported */
+ ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */
+ ctx->Const.VertexProgram.MaxNativeInstructions = 8192;
+ ctx->Const.VertexProgram.MaxNativeAttribs = 160;
+ ctx->Const.VertexProgram.MaxTemps = 128;
+ ctx->Const.VertexProgram.MaxNativeTemps = 128;
+ ctx->Const.VertexProgram.MaxNativeParameters = 256;
+ ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */
+
+ ctx->Const.FragmentProgram.MaxNativeTemps = 128;
+ ctx->Const.FragmentProgram.MaxNativeAttribs = 32;
+ ctx->Const.FragmentProgram.MaxNativeParameters = 256;
+ ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192;
+ /* 8 per clause on r6xx, 16 on rv670/r7xx */
+ if ((screen->chip_family == CHIP_FAMILY_RV670) ||
+ (screen->chip_family >= CHIP_FAMILY_RV770))
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16;
+ else
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8;
+ ctx->Const.FragmentProgram.MaxNativeInstructions = 8192;
+ ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */
+ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */
+ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+
+ radeon_init_debug();
+
+ driInitExtensions(ctx, card_extensions, GL_TRUE);
+ if (r600->radeon.radeonScreen->kernel_mm)
+ driInitExtensions(ctx, mm_extensions, GL_FALSE);
+
+ if (driQueryOptionb
+ (&r600->radeon.optionCache, "disable_stencil_two_side"))
+ _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
+
+ if (r600->radeon.glCtx->Mesa_DXTn
+ && !driQueryOptionb(&r600->radeon.optionCache, "disable_s3tc")) {
+ _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+ _mesa_enable_extension(ctx, "GL_S3_s3tc");
+ } else
+ if (driQueryOptionb(&r600->radeon.optionCache, "force_s3tc_enable"))
+ {
+ _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+ }
+
+ radeon_fbo_init(&r600->radeon);
+ radeonInitSpanFuncs( ctx );
+
+ r600InitCmdBuf(r600);
+
+ r700InitState(r600->radeon.glCtx);
+
+ TNL_CONTEXT(ctx)->Driver.RunPipeline = r600RunPipeline;
+
+ if (driQueryOptionb(&r600->radeon.optionCache, "no_rast")) {
+ radeon_warning("disabling 3D acceleration\n");
+#if R200_MERGED
+ FALLBACK(&r600->radeon, RADEON_FALLBACK_DISABLE, 1);
+#endif
+ }
+
+ return GL_TRUE;
+}
+
+
diff --git a/r600/r600_context.h b/r600/r600_context.h
new file mode 100644
index 0000000..9397ecd
--- /dev/null
+++ b/r600/r600_context.h
@@ -0,0 +1,189 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R600_CONTEXT_H__
+#define __R600_CONTEXT_H__
+
+#include "tnl/t_vertex.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "dri_util.h"
+#include "texmem.h"
+#include "radeon_common.h"
+
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+
+#include "r700_chip.h"
+#include "r600_tex.h"
+#include "r700_oglprog.h"
+#include "r700_vertprog.h"
+
+struct r600_context;
+typedef struct r600_context context_t;
+
+#include "main/mm.h"
+
+/************ DMA BUFFERS **************/
+
+/* The blit width for texture uploads
+ */
+#define R600_BLIT_WIDTH_BYTES 1024
+#define R600_MAX_TEXTURE_UNITS 8
+
+struct r600_texture_state {
+ int tc_count; /* number of incoming texture coordinates from VAP */
+};
+
+/* Perhaps more if we store programs in vmem? */
+/* drm_r600_cmd_header_t->vpu->count is unsigned char */
+#define VSF_MAX_FRAGMENT_LENGTH (255*4)
+
+/* Can be tested with colormat currently. */
+#define VSF_MAX_FRAGMENT_TEMPS (14)
+
+#define STATE_R600_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+#define STATE_R600_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
+
+extern int hw_tcl_on;
+
+#define COLOR_IS_RGBA
+#define TAG(x) r600##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#define R600_FALLBACK_NONE 0
+#define R600_FALLBACK_TCL 1
+#define R600_FALLBACK_RAST 2
+
+struct r600_hw_state {
+ struct radeon_state_atom sq;
+ struct radeon_state_atom db;
+ struct radeon_state_atom stencil;
+ struct radeon_state_atom db_target;
+ struct radeon_state_atom sc;
+ struct radeon_state_atom scissor;
+ struct radeon_state_atom aa;
+ struct radeon_state_atom cl;
+ struct radeon_state_atom gb;
+ struct radeon_state_atom ucp;
+ struct radeon_state_atom su;
+ struct radeon_state_atom poly;
+ struct radeon_state_atom cb;
+ struct radeon_state_atom clrcmp;
+ struct radeon_state_atom blnd;
+ struct radeon_state_atom blnd_clr;
+ struct radeon_state_atom cb_target;
+ struct radeon_state_atom sx;
+ struct radeon_state_atom vgt;
+ struct radeon_state_atom spi;
+ struct radeon_state_atom vpt;
+
+ struct radeon_state_atom fs;
+ struct radeon_state_atom vs;
+ struct radeon_state_atom ps;
+
+ struct radeon_state_atom vs_consts;
+ struct radeon_state_atom ps_consts;
+
+ struct radeon_state_atom vtx;
+ struct radeon_state_atom tx;
+ struct radeon_state_atom tx_smplr;
+ struct radeon_state_atom tx_brdr_clr;
+};
+
+/**
+ * \brief R600 context structure.
+ */
+struct r600_context {
+ struct radeon_context radeon; /* parent class, must be first */
+
+ /* ------ */
+ R700_CHIP_CONTEXT hw;
+
+ struct r600_hw_state atoms;
+
+ struct r700_vertex_program *selected_vp;
+
+ /* Vertex buffers
+ */
+ GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
+ GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
+
+};
+
+#define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx))
+#define GL_CONTEXT(context) ((GLcontext *)(context->radeon.glCtx))
+
+extern GLboolean r600CreateContext(const __GLcontextModes * glVisual,
+ __DRIcontextPrivate * driContextPriv,
+ void *sharedContextPrivate);
+
+#define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(&context->hw))
+
+#define R600_NEWPRIM( rmesa ) \
+do { \
+ if ( rmesa->radeon.dma.flush ) \
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \
+} while (0)
+
+#define R600_STATECHANGE(r600, ATOM) \
+do { \
+ R600_NEWPRIM(r600); \
+ r600->atoms.ATOM.dirty = GL_TRUE; \
+ r600->radeon.hw.is_dirty = GL_TRUE; \
+} while(0)
+
+extern GLboolean r700SyncSurf(context_t *context,
+ struct radeon_bo *pbo,
+ uint32_t read_domain,
+ uint32_t write_domain,
+ uint32_t sync_type);
+
+extern void r700SetupStreams(GLcontext * ctx);
+extern void r700Start3D(context_t *context);
+extern void r600InitAtoms(context_t *context);
+
+#define RADEON_D_CAPTURE 0
+#define RADEON_D_PLAYBACK 1
+#define RADEON_D_PLAYBACK_RAW 2
+#define RADEON_D_T 3
+
+#define r600PackFloat32 radeonPackFloat32
+#define r600PackFloat24 radeonPackFloat24
+
+#endif /* __R600_CONTEXT_H__ */
diff --git a/r600/r600_emit.c b/r600/r600_emit.c
new file mode 100644
index 0000000..5c250c2
--- /dev/null
+++ b/r600/r600_emit.c
@@ -0,0 +1,117 @@
+/**************************************************************************
+
+Copyright 2008, 2009 Advanced Micro Devices Inc. (AMD)
+
+Copyright (C) Advanced Micro Devices Inc. (AMD) 2009. All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/image.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+#include "r600_context.h"
+#include "r600_emit.h"
+
+void r600EmitCacheFlush(context_t *rmesa)
+{
+}
+
+GLboolean r600EmitShader(GLcontext * ctx,
+ void ** shaderbo,
+ GLvoid * data,
+ int sizeinDWORD,
+ char * szShaderUsage)
+{
+ radeonContextPtr radeonctx = RADEON_CONTEXT(ctx);
+ struct radeon_bo * pbo;
+ uint32_t *out;
+shader_again_alloc:
+ pbo = radeon_bo_open(radeonctx->radeonScreen->bom,
+ 0,
+ sizeinDWORD * 4,
+ 256,
+ RADEON_GEM_DOMAIN_GTT,
+ 0);
+
+ radeon_print(RADEON_SHADER, RADEON_NORMAL, "%s %p size %d: %s\n", __func__, pbo, sizeinDWORD, szShaderUsage);
+
+ if (!pbo) {
+ radeon_print(RADEON_MEMORY | RADEON_CS, RADEON_IMPORTANT, "No memory for buffer object. Flushing command buffer.\n");
+ rcommonFlushCmdBuf(radeonctx, __FUNCTION__);
+ goto shader_again_alloc;
+ }
+
+ radeon_cs_space_add_persistent_bo(radeonctx->cmdbuf.cs,
+ pbo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+
+ if (radeon_cs_space_check_with_bo(radeonctx->cmdbuf.cs,
+ pbo,
+ RADEON_GEM_DOMAIN_GTT, 0)) {
+ radeon_error("failure to revalidate BOs - badness\n");
+ return GL_FALSE;
+ }
+
+ radeon_bo_map(pbo, 1);
+
+ out = (uint32_t*)(pbo->ptr);
+
+ memcpy(out, data, sizeinDWORD * 4);
+
+ radeon_bo_unmap(pbo);
+
+ *shaderbo = (void*)pbo;
+
+ return GL_TRUE;
+}
+
+GLboolean r600DeleteShader(GLcontext * ctx,
+ void * shaderbo)
+{
+ struct radeon_bo * pbo = (struct radeon_bo *)shaderbo;
+
+ radeon_print(RADEON_SHADER, RADEON_NORMAL, "%s: %p\n", __func__, pbo);
+
+ if (pbo) {
+ if (pbo->ptr)
+ radeon_bo_unmap(pbo);
+ radeon_bo_unref(pbo); /* when bo->cref <= 0, bo will be bo_free */
+ }
+
+ return GL_TRUE;
+}
diff --git a/r200/r200_span.h b/r600/r600_emit.h
index bae5644..661774d 100644
--- a/r200/r200_span.h
+++ b/r600/r600_emit.h
@@ -1,9 +1,8 @@
-/*
-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+/**************************************************************************
+
+Copyright 2008, 2009 Advanced Micro Devices Inc. (AMD)
-The Weather Channel (TM) funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86 license.
-This notice must be preserved.
+Copyright (C) Advanced Micro Devices Inc. (AMD) 2009. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -29,17 +28,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/*
* Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
*/
-#ifndef __R200_SPAN_H__
-#define __R200_SPAN_H__
-#include "drirenderbuffer.h"
+#ifndef __R600_EMIT_H__
+#define __R600_EMIT_H__
+
+#include "main/glheader.h"
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+#include "radeon_reg.h"
+
+void r600EmitCacheFlush(context_t *rmesa);
-extern void r200InitSpanFuncs( GLcontext *ctx );
+extern GLboolean r600EmitShader(GLcontext * ctx,
+ void ** shaderbo,
+ GLvoid * data,
+ int sizeinDWORD,
+ char * szShaderUsage);
-extern void
-radeonSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+extern GLboolean r600DeleteShader(GLcontext * ctx,
+ void * shaderbo);
#endif
diff --git a/r600/r600_reg.h b/r600/r600_reg.h
new file mode 100644
index 0000000..ffe5ee4
--- /dev/null
+++ b/r600/r600_reg.h
@@ -0,0 +1,121 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_H_
+#define _R600_REG_H_
+
+/*
+ * Register definitions
+ */
+
+#include "r600_reg_auto_r6xx.h"
+#include "r600_reg_r6xx.h"
+#include "r600_reg_r7xx.h"
+
+
+/* SET_*_REG offsets + ends */
+enum
+{
+ SET_CONFIG_REG_offset = 0x00008000,
+ SET_CONFIG_REG_end = 0x0000ac00,
+ SET_CONTEXT_REG_offset = 0x00028000,
+ SET_CONTEXT_REG_end = 0x00029000,
+ SET_ALU_CONST_offset = 0x00030000,
+ SET_ALU_CONST_end = 0x00032000,
+ SET_RESOURCE_offset = 0x00038000,
+ SET_RESOURCE_end = 0x0003c000,
+ SET_SAMPLER_offset = 0x0003c000,
+ SET_SAMPLER_end = 0x0003cff0,
+ SET_CTL_CONST_offset = 0x0003cff0,
+ SET_CTL_CONST_end = 0x0003e200,
+ SET_LOOP_CONST_offset = 0x0003e200,
+ SET_LOOP_CONST_end = 0x0003e380,
+ SET_BOOL_CONST_offset = 0x0003e380,
+ SET_BOOL_CONST_end = 0x00040000,
+};
+
+/* packet3 IT_SURFACE_BASE_UPDATE bits */
+enum
+{
+ DEPTH_BASE = (1 << 0),
+ COLOR0_BASE = (1 << 1),
+ COLOR1_BASE = (1 << 2),
+ COLOR2_BASE = (1 << 3),
+ COLOR3_BASE = (1 << 4),
+ COLOR4_BASE = (1 << 5),
+ COLOR5_BASE = (1 << 6),
+ COLOR6_BASE = (1 << 7),
+ COLOR7_BASE = (1 << 8),
+ STRMOUT_BASE0 = (1 << 9),
+ STRMOUT_BASE1 = (1 << 10),
+ STRMOUT_BASE2 = (1 << 11),
+ STRMOUT_BASE3 = (1 << 12),
+ COHER_BASE0 = (1 << 13),
+ COHER_BASE1 = (1 << 14),
+};
+
+/* Packet3 commands */
+enum
+{
+ IT_NOP = 0x10,
+ IT_INDIRECT_BUFFER_END = 0x17,
+ IT_SET_PREDICATION = 0x20,
+ IT_REG_RMW = 0x21,
+ IT_COND_EXEC = 0x22,
+ IT_PRED_EXEC = 0x23,
+ IT_START_3D_CMDBUF = 0x24,
+ IT_DRAW_INDEX_2 = 0x27,
+ IT_CONTEXT_CONTROL = 0x28,
+ IT_DRAW_INDEX_IMMD_BE = 0x29,
+ IT_INDEX_TYPE = 0x2A,
+ IT_DRAW_INDEX = 0x2B,
+ IT_DRAW_INDEX_AUTO = 0x2D,
+ IT_DRAW_INDEX_IMMD = 0x2E,
+ IT_NUM_INSTANCES = 0x2F,
+ IT_STRMOUT_BUFFER_UPDATE = 0x34,
+ IT_INDIRECT_BUFFER_MP = 0x38,
+ IT_MEM_SEMAPHORE = 0x39,
+ IT_MPEG_INDEX = 0x3A,
+ IT_WAIT_REG_MEM = 0x3C,
+ IT_MEM_WRITE = 0x3D,
+ IT_INDIRECT_BUFFER = 0x32,
+ IT_CP_INTERRUPT = 0x40,
+ IT_SURFACE_SYNC = 0x43,
+ IT_ME_INITIALIZE = 0x44,
+ IT_COND_WRITE = 0x45,
+ IT_EVENT_WRITE = 0x46,
+ IT_EVENT_WRITE_EOP = 0x47,
+ IT_ONE_REG_WRITE = 0x57,
+ IT_SET_CONFIG_REG = 0x68,
+ IT_SET_CONTEXT_REG = 0x69,
+ IT_SET_ALU_CONST = 0x6A,
+ IT_SET_BOOL_CONST = 0x6B,
+ IT_SET_LOOP_CONST = 0x6C,
+ IT_SET_RESOURCE = 0x6D,
+ IT_SET_SAMPLER = 0x6E,
+ IT_SET_CTL_CONST = 0x6F,
+ IT_SURFACE_BASE_UPDATE = 0x73,
+};
+
+#endif
diff --git a/r600/r600_reg_auto_r6xx.h b/r600/r600_reg_auto_r6xx.h
new file mode 100644
index 0000000..edd85b0
--- /dev/null
+++ b/r600/r600_reg_auto_r6xx.h
@@ -0,0 +1,3089 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _AUTOREGS
+#define _AUTOREGS
+
+enum {
+
+ VGT_VTX_VECT_EJECT_REG = 0x000088b0,
+ PRIM_COUNT_mask = 0x3ff << 0,
+ PRIM_COUNT_shift = 0,
+ VGT_LAST_COPY_STATE = 0x000088c0,
+ SRC_STATE_ID_mask = 0x07 << 0,
+ SRC_STATE_ID_shift = 0,
+ DST_STATE_ID_mask = 0x07 << 16,
+ DST_STATE_ID_shift = 16,
+ VGT_CACHE_INVALIDATION = 0x000088c4,
+ CACHE_INVALIDATION_mask = 0x03 << 0,
+ CACHE_INVALIDATION_shift = 0,
+ VC_ONLY = 0x00,
+ TC_ONLY = 0x01,
+ VC_AND_TC = 0x02,
+ VS_NO_EXTRA_BUFFER_bit = 1 << 5,
+ VGT_GS_PER_ES = 0x000088c8,
+ VGT_ES_PER_GS = 0x000088cc,
+ VGT_GS_VERTEX_REUSE = 0x000088d4,
+ VERT_REUSE_mask = 0x1f << 0,
+ VERT_REUSE_shift = 0,
+ VGT_MC_LAT_CNTL = 0x000088d8,
+ MC_TIME_STAMP_RES_mask = 0x03 << 0,
+ MC_TIME_STAMP_RES_shift = 0,
+ X_0_992_MAX_LATENCY = 0x00,
+ X_0_496_MAX_LATENCY = 0x01,
+ X_0_248_MAX_LATENCY = 0x02,
+ X_0_124_MAX_LATENCY = 0x03,
+ VGT_GS_PER_VS = 0x000088e8,
+ GS_PER_VS_mask = 0x0f << 0,
+ GS_PER_VS_shift = 0,
+ VGT_CNTL_STATUS = 0x000088f0,
+ VGT_OUT_INDX_BUSY_bit = 1 << 0,
+ VGT_OUT_BUSY_bit = 1 << 1,
+ VGT_PT_BUSY_bit = 1 << 2,
+ VGT_TE_BUSY_bit = 1 << 3,
+ VGT_VR_BUSY_bit = 1 << 4,
+ VGT_GRP_BUSY_bit = 1 << 5,
+ VGT_DMA_REQ_BUSY_bit = 1 << 6,
+ VGT_DMA_BUSY_bit = 1 << 7,
+ VGT_GS_BUSY_bit = 1 << 8,
+ VGT_BUSY_bit = 1 << 9,
+ VGT_PRIMITIVE_TYPE = 0x00008958,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0,
+ DI_PT_NONE = 0x00,
+ DI_PT_POINTLIST = 0x01,
+ DI_PT_LINELIST = 0x02,
+ DI_PT_LINESTRIP = 0x03,
+ DI_PT_TRILIST = 0x04,
+ DI_PT_TRIFAN = 0x05,
+ DI_PT_TRISTRIP = 0x06,
+ DI_PT_UNUSED_0 = 0x07,
+ DI_PT_UNUSED_1 = 0x08,
+ DI_PT_UNUSED_2 = 0x09,
+ DI_PT_LINELIST_ADJ = 0x0a,
+ DI_PT_LINESTRIP_ADJ = 0x0b,
+ DI_PT_TRILIST_ADJ = 0x0c,
+ DI_PT_TRISTRIP_ADJ = 0x0d,
+ DI_PT_UNUSED_3 = 0x0e,
+ DI_PT_UNUSED_4 = 0x0f,
+ DI_PT_TRI_WITH_WFLAGS = 0x10,
+ DI_PT_RECTLIST = 0x11,
+ DI_PT_LINELOOP = 0x12,
+ DI_PT_QUADLIST = 0x13,
+ DI_PT_QUADSTRIP = 0x14,
+ DI_PT_POLYGON = 0x15,
+ DI_PT_2D_COPY_RECT_LIST_V0 = 0x16,
+ DI_PT_2D_COPY_RECT_LIST_V1 = 0x17,
+ DI_PT_2D_COPY_RECT_LIST_V2 = 0x18,
+ DI_PT_2D_COPY_RECT_LIST_V3 = 0x19,
+ DI_PT_2D_FILL_RECT_LIST = 0x1a,
+ DI_PT_2D_LINE_STRIP = 0x1b,
+ DI_PT_2D_TRI_STRIP = 0x1c,
+ VGT_INDEX_TYPE = 0x0000895c,
+ INDEX_TYPE_mask = 0x03 << 0,
+ INDEX_TYPE_shift = 0,
+ DI_INDEX_SIZE_16_BIT = 0x00,
+ DI_INDEX_SIZE_32_BIT = 0x01,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c,
+ VGT_NUM_INDICES = 0x00008970,
+ VGT_NUM_INSTANCES = 0x00008974,
+ PA_CL_CNTL_STATUS = 0x00008a10,
+ CL_BUSY_bit = 1 << 31,
+ PA_CL_ENHANCE = 0x00008a14,
+ CLIP_VTX_REORDER_ENA_bit = 1 << 0,
+ NUM_CLIP_SEQ_mask = 0x03 << 1,
+ NUM_CLIP_SEQ_shift = 1,
+ CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3,
+ VE_NAN_PROC_DISABLE_bit = 1 << 4,
+ PA_SU_CNTL_STATUS = 0x00008a50,
+ SU_BUSY_bit = 1 << 31,
+ PA_SC_LINE_STIPPLE_STATE = 0x00008b10,
+ CURRENT_PTR_mask = 0x0f << 0,
+ CURRENT_PTR_shift = 0,
+ CURRENT_COUNT_mask = 0xff << 8,
+ CURRENT_COUNT_shift = 8,
+ PA_SC_MULTI_CHIP_CNTL = 0x00008b20,
+ LOG2_NUM_CHIPS_mask = 0x07 << 0,
+ LOG2_NUM_CHIPS_shift = 0,
+ MULTI_CHIP_TILE_SIZE_mask = 0x03 << 3,
+ MULTI_CHIP_TILE_SIZE_shift = 3,
+ X_16_X_16_PIXEL_TILE_PER_CHIP = 0x00,
+ X_32_X_32_PIXEL_TILE_PER_CHIP = 0x01,
+ X_64_X_64_PIXEL_TILE_PER_CHIP = 0x02,
+ X_128X128_PIXEL_TILE_PER_CHIP = 0x03,
+ CHIP_TILE_X_LOC_mask = 0x07 << 5,
+ CHIP_TILE_X_LOC_shift = 5,
+ CHIP_TILE_Y_LOC_mask = 0x07 << 8,
+ CHIP_TILE_Y_LOC_shift = 8,
+ CHIP_SUPER_TILE_B_bit = 1 << 11,
+ PA_SC_AA_SAMPLE_LOCS_2S = 0x00008b40,
+ S0_X_mask = 0x0f << 0,
+ S0_X_shift = 0,
+ S0_Y_mask = 0x0f << 4,
+ S0_Y_shift = 4,
+ S1_X_mask = 0x0f << 8,
+ S1_X_shift = 8,
+ S1_Y_mask = 0x0f << 12,
+ S1_Y_shift = 12,
+ PA_SC_AA_SAMPLE_LOCS_4S = 0x00008b44,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+ S2_X_mask = 0x0f << 16,
+ S2_X_shift = 16,
+ S2_Y_mask = 0x0f << 20,
+ S2_Y_shift = 20,
+ S3_X_mask = 0x0f << 24,
+ S3_X_shift = 24,
+ S3_Y_mask = 0x0f << 28,
+ S3_Y_shift = 28,
+ PA_SC_AA_SAMPLE_LOCS_8S_WD0 = 0x00008b48,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_8S_WD1 = 0x00008b4c,
+ S4_X_mask = 0x0f << 0,
+ S4_X_shift = 0,
+ S4_Y_mask = 0x0f << 4,
+ S4_Y_shift = 4,
+ S5_X_mask = 0x0f << 8,
+ S5_X_shift = 8,
+ S5_Y_mask = 0x0f << 12,
+ S5_Y_shift = 12,
+ S6_X_mask = 0x0f << 16,
+ S6_X_shift = 16,
+ S6_Y_mask = 0x0f << 20,
+ S6_Y_shift = 20,
+ S7_X_mask = 0x0f << 24,
+ S7_X_shift = 24,
+ S7_Y_mask = 0x0f << 28,
+ S7_Y_shift = 28,
+ PA_SC_CNTL_STATUS = 0x00008be0,
+ MPASS_OVERFLOW_bit = 1 << 30,
+ PA_SC_ENHANCE = 0x00008bf0,
+ FORCE_EOV_MAX_CLK_CNT_mask = 0xfff << 0,
+ FORCE_EOV_MAX_CLK_CNT_shift = 0,
+ FORCE_EOV_MAX_TILE_CNT_mask = 0xfff << 12,
+ FORCE_EOV_MAX_TILE_CNT_shift = 12,
+ SQ_CONFIG = 0x00008c00,
+ VC_ENABLE_bit = 1 << 0,
+ EXPORT_SRC_C_bit = 1 << 1,
+ DX9_CONSTS_bit = 1 << 2,
+ ALU_INST_PREFER_VECTOR_bit = 1 << 3,
+ SQ_CONFIG__DX10_CLAMP_bit = 1 << 4,
+ ALU_PREFER_ONE_WATERFALL_bit = 1 << 5,
+ ALU_MAX_ONE_WATERFALL_bit = 1 << 6,
+ CLAUSE_SEQ_PRIO_mask = 0x03 << 8,
+ CLAUSE_SEQ_PRIO_shift = 8,
+ SQ_CL_PRIO_RND_ROBIN = 0x00,
+ SQ_CL_PRIO_MACRO_SEQ = 0x01,
+ SQ_CL_PRIO_NONE = 0x02,
+ PS_PRIO_mask = 0x03 << 24,
+ PS_PRIO_shift = 24,
+ VS_PRIO_mask = 0x03 << 26,
+ VS_PRIO_shift = 26,
+ GS_PRIO_mask = 0x03 << 28,
+ GS_PRIO_shift = 28,
+ ES_PRIO_mask = 0x03 << 30,
+ ES_PRIO_shift = 30,
+ SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04,
+ NUM_PS_GPRS_mask = 0xff << 0,
+ NUM_PS_GPRS_shift = 0,
+ NUM_VS_GPRS_mask = 0xff << 16,
+ NUM_VS_GPRS_shift = 16,
+ NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28,
+ NUM_CLAUSE_TEMP_GPRS_shift = 28,
+ SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08,
+ NUM_GS_GPRS_mask = 0xff << 0,
+ NUM_GS_GPRS_shift = 0,
+ NUM_ES_GPRS_mask = 0xff << 16,
+ NUM_ES_GPRS_shift = 16,
+ SQ_THREAD_RESOURCE_MGMT = 0x00008c0c,
+ NUM_PS_THREADS_mask = 0xff << 0,
+ NUM_PS_THREADS_shift = 0,
+ NUM_VS_THREADS_mask = 0xff << 8,
+ NUM_VS_THREADS_shift = 8,
+ NUM_GS_THREADS_mask = 0xff << 16,
+ NUM_GS_THREADS_shift = 16,
+ NUM_ES_THREADS_mask = 0xff << 24,
+ NUM_ES_THREADS_shift = 24,
+ SQ_STACK_RESOURCE_MGMT_1 = 0x00008c10,
+ NUM_PS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_PS_STACK_ENTRIES_shift = 0,
+ NUM_VS_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_VS_STACK_ENTRIES_shift = 16,
+ SQ_STACK_RESOURCE_MGMT_2 = 0x00008c14,
+ NUM_GS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_GS_STACK_ENTRIES_shift = 0,
+ NUM_ES_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_ES_STACK_ENTRIES_shift = 16,
+ SQ_ESGS_RING_BASE = 0x00008c40,
+ SQ_ESGS_RING_SIZE = 0x00008c44,
+ SQ_GSVS_RING_BASE = 0x00008c48,
+ SQ_GSVS_RING_SIZE = 0x00008c4c,
+ SQ_ESTMP_RING_BASE = 0x00008c50,
+ SQ_ESTMP_RING_SIZE = 0x00008c54,
+ SQ_GSTMP_RING_BASE = 0x00008c58,
+ SQ_GSTMP_RING_SIZE = 0x00008c5c,
+ SQ_VSTMP_RING_BASE = 0x00008c60,
+ SQ_VSTMP_RING_SIZE = 0x00008c64,
+ SQ_PSTMP_RING_BASE = 0x00008c68,
+ SQ_PSTMP_RING_SIZE = 0x00008c6c,
+ SQ_FBUF_RING_BASE = 0x00008c70,
+ SQ_FBUF_RING_SIZE = 0x00008c74,
+ SQ_REDUC_RING_BASE = 0x00008c78,
+ SQ_REDUC_RING_SIZE = 0x00008c7c,
+ SQ_ALU_WORD1_OP3 = 0x00008dfc,
+ SRC2_SEL_mask = 0x1ff << 0,
+ SRC2_SEL_shift = 0,
+ SQ_ALU_SRC_0 = 0xf8,
+ SQ_ALU_SRC_1 = 0xf9,
+ SQ_ALU_SRC_1_INT = 0xfa,
+ SQ_ALU_SRC_M_1_INT = 0xfb,
+ SQ_ALU_SRC_0_5 = 0xfc,
+ SQ_ALU_SRC_LITERAL = 0xfd,
+ SQ_ALU_SRC_PV = 0xfe,
+ SQ_ALU_SRC_PS = 0xff,
+ SRC2_REL_bit = 1 << 9,
+ SRC2_CHAN_mask = 0x03 << 10,
+ SRC2_CHAN_shift = 10,
+ SQ_CHAN_X = 0x00,
+ SQ_CHAN_Y = 0x01,
+ SQ_CHAN_Z = 0x02,
+ SQ_CHAN_W = 0x03,
+ SRC2_NEG_bit = 1 << 12,
+ SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13,
+ SQ_ALU_WORD1_OP3__ALU_INST_shift = 13,
+ SQ_OP3_INST_MUL_LIT = 0x0c,
+ SQ_OP3_INST_MUL_LIT_M2 = 0x0d,
+ SQ_OP3_INST_MUL_LIT_M4 = 0x0e,
+ SQ_OP3_INST_MUL_LIT_D2 = 0x0f,
+ SQ_OP3_INST_MULADD = 0x10,
+ SQ_OP3_INST_MULADD_M2 = 0x11,
+ SQ_OP3_INST_MULADD_M4 = 0x12,
+ SQ_OP3_INST_MULADD_D2 = 0x13,
+ SQ_OP3_INST_MULADD_IEEE = 0x14,
+ SQ_OP3_INST_MULADD_IEEE_M2 = 0x15,
+ SQ_OP3_INST_MULADD_IEEE_M4 = 0x16,
+ SQ_OP3_INST_MULADD_IEEE_D2 = 0x17,
+ SQ_OP3_INST_CNDE = 0x18,
+ SQ_OP3_INST_CNDGT = 0x19,
+ SQ_OP3_INST_CNDGE = 0x1a,
+ SQ_OP3_INST_CNDE_INT = 0x1c,
+ SQ_OP3_INST_CNDGT_INT = 0x1d,
+ SQ_OP3_INST_CNDGE_INT = 0x1e,
+ SQ_TEX_WORD2 = 0x00008dfc,
+ OFFSET_X_mask = 0x1f << 0,
+ OFFSET_X_shift = 0,
+ OFFSET_Y_mask = 0x1f << 5,
+ OFFSET_Y_shift = 5,
+ OFFSET_Z_mask = 0x1f << 10,
+ OFFSET_Z_shift = 10,
+ SAMPLER_ID_mask = 0x1f << 15,
+ SAMPLER_ID_shift = 15,
+ SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20,
+ SQ_TEX_WORD2__SRC_SEL_X_shift = 20,
+ SQ_SEL_X = 0x00,
+ SQ_SEL_Y = 0x01,
+ SQ_SEL_Z = 0x02,
+ SQ_SEL_W = 0x03,
+ SQ_SEL_0 = 0x04,
+ SQ_SEL_1 = 0x05,
+ SRC_SEL_Y_mask = 0x07 << 23,
+ SRC_SEL_Y_shift = 23,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SRC_SEL_Z_mask = 0x07 << 26,
+ SRC_SEL_Z_shift = 26,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SRC_SEL_W_mask = 0x07 << 29,
+ SRC_SEL_W_shift = 29,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc,
+ BURST_COUNT_mask = 0x0f << 17,
+ BURST_COUNT_shift = 17,
+ END_OF_PROGRAM_bit = 1 << 21,
+ VALID_PIXEL_MODE_bit = 1 << 22,
+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23,
+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 23,
+ SQ_CF_INST_MEM_STREAM0 = 0x20,
+ SQ_CF_INST_MEM_STREAM1 = 0x21,
+ SQ_CF_INST_MEM_STREAM2 = 0x22,
+ SQ_CF_INST_MEM_STREAM3 = 0x23,
+ SQ_CF_INST_MEM_SCRATCH = 0x24,
+ SQ_CF_INST_MEM_REDUCTION = 0x25,
+ SQ_CF_INST_MEM_RING = 0x26,
+ SQ_CF_INST_EXPORT = 0x27,
+ SQ_CF_INST_EXPORT_DONE = 0x28,
+ WHOLE_QUAD_MODE_bit = 1 << 30,
+ BARRIER_bit = 1 << 31,
+ SQ_CF_ALU_WORD1 = 0x00008dfc,
+ KCACHE_MODE1_mask = 0x03 << 0,
+ KCACHE_MODE1_shift = 0,
+ SQ_CF_KCACHE_NOP = 0x00,
+ SQ_CF_KCACHE_LOCK_1 = 0x01,
+ SQ_CF_KCACHE_LOCK_2 = 0x02,
+ SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03,
+ KCACHE_ADDR0_mask = 0xff << 2,
+ KCACHE_ADDR0_shift = 2,
+ KCACHE_ADDR1_mask = 0xff << 10,
+ KCACHE_ADDR1_shift = 10,
+ SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18,
+ SQ_CF_ALU_WORD1__COUNT_shift = 18,
+ SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25,
+ SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26,
+ SQ_CF_ALU_WORD1__CF_INST_shift = 26,
+ SQ_CF_INST_ALU = 0x08,
+ SQ_CF_INST_ALU_PUSH_BEFORE = 0x09,
+ SQ_CF_INST_ALU_POP_AFTER = 0x0a,
+ SQ_CF_INST_ALU_POP2_AFTER = 0x0b,
+ SQ_CF_INST_ALU_CONTINUE = 0x0d,
+ SQ_CF_INST_ALU_BREAK = 0x0e,
+ SQ_CF_INST_ALU_ELSE_AFTER = 0x0f,
+/* WHOLE_QUAD_MODE_bit = 1 << 30, */
+/* BARRIER_bit = 1 << 31, */
+ SQ_TEX_WORD1 = 0x00008dfc,
+ SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0,
+ SQ_TEX_WORD1__DST_GPR_shift = 0,
+ SQ_TEX_WORD1__DST_REL_bit = 1 << 7,
+ SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_TEX_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_SEL_MASK = 0x07,
+ SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_TEX_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_TEX_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_TEX_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21,
+ SQ_TEX_WORD1__LOD_BIAS_shift = 21,
+ COORD_TYPE_X_bit = 1 << 28,
+ COORD_TYPE_Y_bit = 1 << 29,
+ COORD_TYPE_Z_bit = 1 << 30,
+ COORD_TYPE_W_bit = 1 << 31,
+ SQ_VTX_WORD0 = 0x00008dfc,
+ VTX_INST_mask = 0x1f << 0,
+ VTX_INST_shift = 0,
+ SQ_VTX_INST_FETCH = 0x00,
+ SQ_VTX_INST_SEMANTIC = 0x01,
+ FETCH_TYPE_mask = 0x03 << 5,
+ FETCH_TYPE_shift = 5,
+ SQ_VTX_FETCH_VERTEX_DATA = 0x00,
+ SQ_VTX_FETCH_INSTANCE_DATA = 0x01,
+ SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02,
+ FETCH_WHOLE_QUAD_bit = 1 << 7,
+ BUFFER_ID_mask = 0xff << 8,
+ BUFFER_ID_shift = 8,
+ SRC_GPR_mask = 0x7f << 16,
+ SRC_GPR_shift = 16,
+ SRC_REL_bit = 1 << 23,
+ SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24,
+ SQ_VTX_WORD0__SRC_SEL_X_shift = 24,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+ MEGA_FETCH_COUNT_mask = 0x3f << 26,
+ MEGA_FETCH_COUNT_shift = 26,
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc,
+ SEL_X_mask = 0x07 << 0,
+ SEL_X_shift = 0,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_Y_mask = 0x07 << 3,
+ SEL_Y_shift = 3,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_Z_mask = 0x07 << 6,
+ SEL_Z_shift = 6,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_W_mask = 0x07 << 9,
+ SEL_W_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_ALU_WORD1 = 0x00008dfc,
+ ENCODING_mask = 0x07 << 15,
+ ENCODING_shift = 15,
+ BANK_SWIZZLE_mask = 0x07 << 18,
+ BANK_SWIZZLE_shift = 18,
+ SQ_ALU_VEC_012 = 0x00,
+ SQ_ALU_VEC_021 = 0x01,
+ SQ_ALU_VEC_120 = 0x02,
+ SQ_ALU_VEC_102 = 0x03,
+ SQ_ALU_VEC_201 = 0x04,
+ SQ_ALU_VEC_210 = 0x05,
+ SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21,
+ SQ_ALU_WORD1__DST_GPR_shift = 21,
+ SQ_ALU_WORD1__DST_REL_bit = 1 << 28,
+ DST_CHAN_mask = 0x03 << 29,
+ DST_CHAN_shift = 29,
+ CHAN_X = 0x00,
+ CHAN_Y = 0x01,
+ CHAN_Z = 0x02,
+ CHAN_W = 0x03,
+ SQ_ALU_WORD1__CLAMP_bit = 1 << 31,
+ SQ_CF_ALU_WORD0 = 0x00008dfc,
+ SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0,
+ SQ_CF_ALU_WORD0__ADDR_shift = 0,
+ KCACHE_BANK0_mask = 0x0f << 22,
+ KCACHE_BANK0_shift = 22,
+ KCACHE_BANK1_mask = 0x0f << 26,
+ KCACHE_BANK1_shift = 26,
+ KCACHE_MODE0_mask = 0x03 << 30,
+ KCACHE_MODE0_shift = 30,
+/* SQ_CF_KCACHE_NOP = 0x00, */
+/* SQ_CF_KCACHE_LOCK_1 = 0x01, */
+/* SQ_CF_KCACHE_LOCK_2 = 0x02, */
+/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */
+ SQ_VTX_WORD2 = 0x00008dfc,
+ SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0,
+ SQ_VTX_WORD2__OFFSET_shift = 0,
+ SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16,
+ SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16,
+ SQ_ENDIAN_NONE = 0x00,
+ SQ_ENDIAN_8IN16 = 0x01,
+ SQ_ENDIAN_8IN32 = 0x02,
+ CONST_BUF_NO_STRIDE_bit = 1 << 18,
+ MEGA_FETCH_bit = 1 << 19,
+ SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20,
+ SQ_ALU_WORD1_OP2_V2 = 0x00008dfc,
+ SRC0_ABS_bit = 1 << 0,
+ SRC1_ABS_bit = 1 << 1,
+ UPDATE_EXECUTE_MASK_bit = 1 << 2,
+ UPDATE_PRED_bit = 1 << 3,
+ WRITE_MASK_bit = 1 << 4,
+ SQ_ALU_WORD1_OP2_V2__OMOD_mask = 0x03 << 5,
+ SQ_ALU_WORD1_OP2_V2__OMOD_shift = 5,
+ SQ_ALU_OMOD_OFF = 0x00,
+ SQ_ALU_OMOD_M2 = 0x01,
+ SQ_ALU_OMOD_M4 = 0x02,
+ SQ_ALU_OMOD_D2 = 0x03,
+ SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7,
+ SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7,
+ SQ_OP2_INST_ADD = 0x00,
+ SQ_OP2_INST_MUL = 0x01,
+ SQ_OP2_INST_MUL_IEEE = 0x02,
+ SQ_OP2_INST_MAX = 0x03,
+ SQ_OP2_INST_MIN = 0x04,
+ SQ_OP2_INST_MAX_DX10 = 0x05,
+ SQ_OP2_INST_MIN_DX10 = 0x06,
+ SQ_OP2_INST_SETE = 0x08,
+ SQ_OP2_INST_SETGT = 0x09,
+ SQ_OP2_INST_SETGE = 0x0a,
+ SQ_OP2_INST_SETNE = 0x0b,
+ SQ_OP2_INST_SETE_DX10 = 0x0c,
+ SQ_OP2_INST_SETGT_DX10 = 0x0d,
+ SQ_OP2_INST_SETGE_DX10 = 0x0e,
+ SQ_OP2_INST_SETNE_DX10 = 0x0f,
+ SQ_OP2_INST_FRACT = 0x10,
+ SQ_OP2_INST_TRUNC = 0x11,
+ SQ_OP2_INST_CEIL = 0x12,
+ SQ_OP2_INST_RNDNE = 0x13,
+ SQ_OP2_INST_FLOOR = 0x14,
+ SQ_OP2_INST_MOVA = 0x15,
+ SQ_OP2_INST_MOVA_FLOOR = 0x16,
+ SQ_OP2_INST_MOVA_INT = 0x18,
+ SQ_OP2_INST_MOV = 0x19,
+ SQ_OP2_INST_NOP = 0x1a,
+ SQ_OP2_INST_PRED_SETGT_UINT = 0x1e,
+ SQ_OP2_INST_PRED_SETGE_UINT = 0x1f,
+ SQ_OP2_INST_PRED_SETE = 0x20,
+ SQ_OP2_INST_PRED_SETGT = 0x21,
+ SQ_OP2_INST_PRED_SETGE = 0x22,
+ SQ_OP2_INST_PRED_SETNE = 0x23,
+ SQ_OP2_INST_PRED_SET_INV = 0x24,
+ SQ_OP2_INST_PRED_SET_POP = 0x25,
+ SQ_OP2_INST_PRED_SET_CLR = 0x26,
+ SQ_OP2_INST_PRED_SET_RESTORE = 0x27,
+ SQ_OP2_INST_PRED_SETE_PUSH = 0x28,
+ SQ_OP2_INST_PRED_SETGT_PUSH = 0x29,
+ SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a,
+ SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b,
+ SQ_OP2_INST_KILLE = 0x2c,
+ SQ_OP2_INST_KILLGT = 0x2d,
+ SQ_OP2_INST_KILLGE = 0x2e,
+ SQ_OP2_INST_KILLNE = 0x2f,
+ SQ_OP2_INST_AND_INT = 0x30,
+ SQ_OP2_INST_OR_INT = 0x31,
+ SQ_OP2_INST_XOR_INT = 0x32,
+ SQ_OP2_INST_NOT_INT = 0x33,
+ SQ_OP2_INST_ADD_INT = 0x34,
+ SQ_OP2_INST_SUB_INT = 0x35,
+ SQ_OP2_INST_MAX_INT = 0x36,
+ SQ_OP2_INST_MIN_INT = 0x37,
+ SQ_OP2_INST_MAX_UINT = 0x38,
+ SQ_OP2_INST_MIN_UINT = 0x39,
+ SQ_OP2_INST_SETE_INT = 0x3a,
+ SQ_OP2_INST_SETGT_INT = 0x3b,
+ SQ_OP2_INST_SETGE_INT = 0x3c,
+ SQ_OP2_INST_SETNE_INT = 0x3d,
+ SQ_OP2_INST_SETGT_UINT = 0x3e,
+ SQ_OP2_INST_SETGE_UINT = 0x3f,
+ SQ_OP2_INST_KILLGT_UINT = 0x40,
+ SQ_OP2_INST_KILLGE_UINT = 0x41,
+ SQ_OP2_INST_PRED_SETE_INT = 0x42,
+ SQ_OP2_INST_PRED_SETGT_INT = 0x43,
+ SQ_OP2_INST_PRED_SETGE_INT = 0x44,
+ SQ_OP2_INST_PRED_SETNE_INT = 0x45,
+ SQ_OP2_INST_KILLE_INT = 0x46,
+ SQ_OP2_INST_KILLGT_INT = 0x47,
+ SQ_OP2_INST_KILLGE_INT = 0x48,
+ SQ_OP2_INST_KILLNE_INT = 0x49,
+ SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a,
+ SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b,
+ SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c,
+ SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d,
+ SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e,
+ SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f,
+ SQ_OP2_INST_DOT4 = 0x50,
+ SQ_OP2_INST_DOT4_IEEE = 0x51,
+ SQ_OP2_INST_CUBE = 0x52,
+ SQ_OP2_INST_MAX4 = 0x53,
+ SQ_OP2_INST_MOVA_GPR_INT = 0x60,
+ SQ_OP2_INST_EXP_IEEE = 0x61,
+ SQ_OP2_INST_LOG_CLAMPED = 0x62,
+ SQ_OP2_INST_LOG_IEEE = 0x63,
+ SQ_OP2_INST_RECIP_CLAMPED = 0x64,
+ SQ_OP2_INST_RECIP_FF = 0x65,
+ SQ_OP2_INST_RECIP_IEEE = 0x66,
+ SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67,
+ SQ_OP2_INST_RECIPSQRT_FF = 0x68,
+ SQ_OP2_INST_RECIPSQRT_IEEE = 0x69,
+ SQ_OP2_INST_SQRT_IEEE = 0x6a,
+ SQ_OP2_INST_FLT_TO_INT = 0x6b,
+ SQ_OP2_INST_INT_TO_FLT = 0x6c,
+ SQ_OP2_INST_UINT_TO_FLT = 0x6d,
+ SQ_OP2_INST_SIN = 0x6e,
+ SQ_OP2_INST_COS = 0x6f,
+ SQ_OP2_INST_ASHR_INT = 0x70,
+ SQ_OP2_INST_LSHR_INT = 0x71,
+ SQ_OP2_INST_LSHL_INT = 0x72,
+ SQ_OP2_INST_MULLO_INT = 0x73,
+ SQ_OP2_INST_MULHI_INT = 0x74,
+ SQ_OP2_INST_MULLO_UINT = 0x75,
+ SQ_OP2_INST_MULHI_UINT = 0x76,
+ SQ_OP2_INST_RECIP_INT = 0x77,
+ SQ_OP2_INST_RECIP_UINT = 0x78,
+ SQ_OP2_INST_FLT_TO_UINT = 0x79,
+ SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc,
+ ARRAY_SIZE_mask = 0xfff << 0,
+ ARRAY_SIZE_shift = 0,
+ COMP_MASK_mask = 0x0f << 12,
+ COMP_MASK_shift = 12,
+ SQ_CF_WORD0 = 0x00008dfc,
+ SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc,
+ ARRAY_BASE_mask = 0x1fff << 0,
+ ARRAY_BASE_shift = 0,
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13,
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13,
+ SQ_EXPORT_PIXEL = 0x00,
+ SQ_EXPORT_POS = 0x01,
+ SQ_EXPORT_PARAM = 0x02,
+ X_UNUSED_FOR_SX_EXPORTS = 0x03,
+ RW_GPR_mask = 0x7f << 15,
+ RW_GPR_shift = 15,
+ RW_REL_bit = 1 << 22,
+ INDEX_GPR_mask = 0x7f << 23,
+ INDEX_GPR_shift = 23,
+ ELEM_SIZE_mask = 0x03 << 30,
+ ELEM_SIZE_shift = 30,
+ SQ_VTX_WORD1 = 0x00008dfc,
+ SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_VTX_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_VTX_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_VTX_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_VTX_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ USE_CONST_FIELDS_bit = 1 << 21,
+ SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22,
+ SQ_VTX_WORD1__DATA_FORMAT_shift = 22,
+ SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28,
+ SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28,
+ SQ_NUM_FORMAT_NORM = 0x00,
+ SQ_NUM_FORMAT_INT = 0x01,
+ SQ_NUM_FORMAT_SCALED = 0x02,
+ SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30,
+ SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31,
+ SQ_ALU_WORD1_OP2 = 0x00008dfc,
+/* SRC0_ABS_bit = 1 << 0, */
+/* SRC1_ABS_bit = 1 << 1, */
+/* UPDATE_EXECUTE_MASK_bit = 1 << 2, */
+/* UPDATE_PRED_bit = 1 << 3, */
+/* WRITE_MASK_bit = 1 << 4, */
+ FOG_MERGE_bit = 1 << 5,
+ SQ_ALU_WORD1_OP2__OMOD_mask = 0x03 << 6,
+ SQ_ALU_WORD1_OP2__OMOD_shift = 6,
+/* SQ_ALU_OMOD_OFF = 0x00, */
+/* SQ_ALU_OMOD_M2 = 0x01, */
+/* SQ_ALU_OMOD_M4 = 0x02, */
+/* SQ_ALU_OMOD_D2 = 0x03, */
+ SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8,
+ SQ_ALU_WORD1_OP2__ALU_INST_shift = 8,
+/* SQ_OP2_INST_ADD = 0x00, */
+/* SQ_OP2_INST_MUL = 0x01, */
+/* SQ_OP2_INST_MUL_IEEE = 0x02, */
+/* SQ_OP2_INST_MAX = 0x03, */
+/* SQ_OP2_INST_MIN = 0x04, */
+/* SQ_OP2_INST_MAX_DX10 = 0x05, */
+/* SQ_OP2_INST_MIN_DX10 = 0x06, */
+/* SQ_OP2_INST_SETE = 0x08, */
+/* SQ_OP2_INST_SETGT = 0x09, */
+/* SQ_OP2_INST_SETGE = 0x0a, */
+/* SQ_OP2_INST_SETNE = 0x0b, */
+/* SQ_OP2_INST_SETE_DX10 = 0x0c, */
+/* SQ_OP2_INST_SETGT_DX10 = 0x0d, */
+/* SQ_OP2_INST_SETGE_DX10 = 0x0e, */
+/* SQ_OP2_INST_SETNE_DX10 = 0x0f, */
+/* SQ_OP2_INST_FRACT = 0x10, */
+/* SQ_OP2_INST_TRUNC = 0x11, */
+/* SQ_OP2_INST_CEIL = 0x12, */
+/* SQ_OP2_INST_RNDNE = 0x13, */
+/* SQ_OP2_INST_FLOOR = 0x14, */
+/* SQ_OP2_INST_MOVA = 0x15, */
+/* SQ_OP2_INST_MOVA_FLOOR = 0x16, */
+/* SQ_OP2_INST_MOVA_INT = 0x18, */
+/* SQ_OP2_INST_MOV = 0x19, */
+/* SQ_OP2_INST_NOP = 0x1a, */
+/* SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, */
+/* SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, */
+/* SQ_OP2_INST_PRED_SETE = 0x20, */
+/* SQ_OP2_INST_PRED_SETGT = 0x21, */
+/* SQ_OP2_INST_PRED_SETGE = 0x22, */
+/* SQ_OP2_INST_PRED_SETNE = 0x23, */
+/* SQ_OP2_INST_PRED_SET_INV = 0x24, */
+/* SQ_OP2_INST_PRED_SET_POP = 0x25, */
+/* SQ_OP2_INST_PRED_SET_CLR = 0x26, */
+/* SQ_OP2_INST_PRED_SET_RESTORE = 0x27, */
+/* SQ_OP2_INST_PRED_SETE_PUSH = 0x28, */
+/* SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, */
+/* SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, */
+/* SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, */
+/* SQ_OP2_INST_KILLE = 0x2c, */
+/* SQ_OP2_INST_KILLGT = 0x2d, */
+/* SQ_OP2_INST_KILLGE = 0x2e, */
+/* SQ_OP2_INST_KILLNE = 0x2f, */
+/* SQ_OP2_INST_AND_INT = 0x30, */
+/* SQ_OP2_INST_OR_INT = 0x31, */
+/* SQ_OP2_INST_XOR_INT = 0x32, */
+/* SQ_OP2_INST_NOT_INT = 0x33, */
+/* SQ_OP2_INST_ADD_INT = 0x34, */
+/* SQ_OP2_INST_SUB_INT = 0x35, */
+/* SQ_OP2_INST_MAX_INT = 0x36, */
+/* SQ_OP2_INST_MIN_INT = 0x37, */
+/* SQ_OP2_INST_MAX_UINT = 0x38, */
+/* SQ_OP2_INST_MIN_UINT = 0x39, */
+/* SQ_OP2_INST_SETE_INT = 0x3a, */
+/* SQ_OP2_INST_SETGT_INT = 0x3b, */
+/* SQ_OP2_INST_SETGE_INT = 0x3c, */
+/* SQ_OP2_INST_SETNE_INT = 0x3d, */
+/* SQ_OP2_INST_SETGT_UINT = 0x3e, */
+/* SQ_OP2_INST_SETGE_UINT = 0x3f, */
+/* SQ_OP2_INST_KILLGT_UINT = 0x40, */
+/* SQ_OP2_INST_KILLGE_UINT = 0x41, */
+/* SQ_OP2_INST_PRED_SETE_INT = 0x42, */
+/* SQ_OP2_INST_PRED_SETGT_INT = 0x43, */
+/* SQ_OP2_INST_PRED_SETGE_INT = 0x44, */
+/* SQ_OP2_INST_PRED_SETNE_INT = 0x45, */
+/* SQ_OP2_INST_KILLE_INT = 0x46, */
+/* SQ_OP2_INST_KILLGT_INT = 0x47, */
+/* SQ_OP2_INST_KILLGE_INT = 0x48, */
+/* SQ_OP2_INST_KILLNE_INT = 0x49, */
+/* SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, */
+/* SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, */
+/* SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, */
+/* SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, */
+/* SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, */
+/* SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, */
+/* SQ_OP2_INST_DOT4 = 0x50, */
+/* SQ_OP2_INST_DOT4_IEEE = 0x51, */
+/* SQ_OP2_INST_CUBE = 0x52, */
+/* SQ_OP2_INST_MAX4 = 0x53, */
+/* SQ_OP2_INST_MOVA_GPR_INT = 0x60, */
+/* SQ_OP2_INST_EXP_IEEE = 0x61, */
+/* SQ_OP2_INST_LOG_CLAMPED = 0x62, */
+/* SQ_OP2_INST_LOG_IEEE = 0x63, */
+/* SQ_OP2_INST_RECIP_CLAMPED = 0x64, */
+/* SQ_OP2_INST_RECIP_FF = 0x65, */
+/* SQ_OP2_INST_RECIP_IEEE = 0x66, */
+/* SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, */
+/* SQ_OP2_INST_RECIPSQRT_FF = 0x68, */
+/* SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, */
+/* SQ_OP2_INST_SQRT_IEEE = 0x6a, */
+/* SQ_OP2_INST_FLT_TO_INT = 0x6b, */
+/* SQ_OP2_INST_INT_TO_FLT = 0x6c, */
+/* SQ_OP2_INST_UINT_TO_FLT = 0x6d, */
+/* SQ_OP2_INST_SIN = 0x6e, */
+/* SQ_OP2_INST_COS = 0x6f, */
+/* SQ_OP2_INST_ASHR_INT = 0x70, */
+/* SQ_OP2_INST_LSHR_INT = 0x71, */
+/* SQ_OP2_INST_LSHL_INT = 0x72, */
+/* SQ_OP2_INST_MULLO_INT = 0x73, */
+/* SQ_OP2_INST_MULHI_INT = 0x74, */
+/* SQ_OP2_INST_MULLO_UINT = 0x75, */
+/* SQ_OP2_INST_MULHI_UINT = 0x76, */
+/* SQ_OP2_INST_RECIP_INT = 0x77, */
+/* SQ_OP2_INST_RECIP_UINT = 0x78, */
+/* SQ_OP2_INST_FLT_TO_UINT = 0x79, */
+ SQ_CF_WORD1 = 0x00008dfc,
+ POP_COUNT_mask = 0x07 << 0,
+ POP_COUNT_shift = 0,
+ CF_CONST_mask = 0x1f << 3,
+ CF_CONST_shift = 3,
+ COND_mask = 0x03 << 8,
+ COND_shift = 8,
+ SQ_CF_COND_ACTIVE = 0x00,
+ SQ_CF_COND_FALSE = 0x01,
+ SQ_CF_COND_BOOL = 0x02,
+ SQ_CF_COND_NOT_BOOL = 0x03,
+ SQ_CF_WORD1__COUNT_mask = 0x07 << 10,
+ SQ_CF_WORD1__COUNT_shift = 10,
+ CALL_COUNT_mask = 0x3f << 13,
+ CALL_COUNT_shift = 13,
+ COUNT_3_bit = 1 << 19,
+/* END_OF_PROGRAM_bit = 1 << 21, */
+/* VALID_PIXEL_MODE_bit = 1 << 22, */
+ SQ_CF_WORD1__CF_INST_mask = 0x7f << 23,
+ SQ_CF_WORD1__CF_INST_shift = 23,
+ SQ_CF_INST_NOP = 0x00,
+ SQ_CF_INST_TEX = 0x01,
+ SQ_CF_INST_VTX = 0x02,
+ SQ_CF_INST_VTX_TC = 0x03,
+ SQ_CF_INST_LOOP_START = 0x04,
+ SQ_CF_INST_LOOP_END = 0x05,
+ SQ_CF_INST_LOOP_START_DX10 = 0x06,
+ SQ_CF_INST_LOOP_START_NO_AL = 0x07,
+ SQ_CF_INST_LOOP_CONTINUE = 0x08,
+ SQ_CF_INST_LOOP_BREAK = 0x09,
+ SQ_CF_INST_JUMP = 0x0a,
+ SQ_CF_INST_PUSH = 0x0b,
+ SQ_CF_INST_PUSH_ELSE = 0x0c,
+ SQ_CF_INST_ELSE = 0x0d,
+ SQ_CF_INST_POP = 0x0e,
+ SQ_CF_INST_POP_JUMP = 0x0f,
+ SQ_CF_INST_POP_PUSH = 0x10,
+ SQ_CF_INST_POP_PUSH_ELSE = 0x11,
+ SQ_CF_INST_CALL = 0x12,
+ SQ_CF_INST_CALL_FS = 0x13,
+ SQ_CF_INST_RETURN = 0x14,
+ SQ_CF_INST_EMIT_VERTEX = 0x15,
+ SQ_CF_INST_EMIT_CUT_VERTEX = 0x16,
+ SQ_CF_INST_CUT_VERTEX = 0x17,
+ SQ_CF_INST_KILL = 0x18,
+/* WHOLE_QUAD_MODE_bit = 1 << 30, */
+/* BARRIER_bit = 1 << 31, */
+ SQ_VTX_WORD1_SEM = 0x00008dfc,
+ SEMANTIC_ID_mask = 0xff << 0,
+ SEMANTIC_ID_shift = 0,
+ SQ_TEX_WORD0 = 0x00008dfc,
+ TEX_INST_mask = 0x1f << 0,
+ TEX_INST_shift = 0,
+ SQ_TEX_INST_VTX_FETCH = 0x00,
+ SQ_TEX_INST_VTX_SEMANTIC = 0x01,
+ SQ_TEX_INST_LD = 0x03,
+ SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04,
+ SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05,
+ SQ_TEX_INST_GET_LOD = 0x06,
+ SQ_TEX_INST_GET_GRADIENTS_H = 0x07,
+ SQ_TEX_INST_GET_GRADIENTS_V = 0x08,
+ SQ_TEX_INST_GET_LERP = 0x09,
+ SQ_TEX_INST_RESERVED_10 = 0x0a,
+ SQ_TEX_INST_SET_GRADIENTS_H = 0x0b,
+ SQ_TEX_INST_SET_GRADIENTS_V = 0x0c,
+ SQ_TEX_INST_PASS = 0x0d,
+ X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS = 0x0e,
+ SQ_TEX_INST_SAMPLE = 0x10,
+ SQ_TEX_INST_SAMPLE_L = 0x11,
+ SQ_TEX_INST_SAMPLE_LB = 0x12,
+ SQ_TEX_INST_SAMPLE_LZ = 0x13,
+ SQ_TEX_INST_SAMPLE_G = 0x14,
+ SQ_TEX_INST_SAMPLE_G_L = 0x15,
+ SQ_TEX_INST_SAMPLE_G_LB = 0x16,
+ SQ_TEX_INST_SAMPLE_G_LZ = 0x17,
+ SQ_TEX_INST_SAMPLE_C = 0x18,
+ SQ_TEX_INST_SAMPLE_C_L = 0x19,
+ SQ_TEX_INST_SAMPLE_C_LB = 0x1a,
+ SQ_TEX_INST_SAMPLE_C_LZ = 0x1b,
+ SQ_TEX_INST_SAMPLE_C_G = 0x1c,
+ SQ_TEX_INST_SAMPLE_C_G_L = 0x1d,
+ SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e,
+ SQ_TEX_INST_SAMPLE_C_G_LZ = 0x1f,
+ BC_FRAC_MODE_bit = 1 << 5,
+/* FETCH_WHOLE_QUAD_bit = 1 << 7, */
+ RESOURCE_ID_mask = 0xff << 8,
+ RESOURCE_ID_shift = 8,
+/* SRC_GPR_mask = 0x7f << 16, */
+/* SRC_GPR_shift = 16, */
+/* SRC_REL_bit = 1 << 23, */
+ SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24,
+ SQ_VTX_WORD1_GPR = 0x00008dfc,
+ SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0,
+ SQ_VTX_WORD1_GPR__DST_GPR_shift = 0,
+ SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7,
+ SQ_ALU_WORD0 = 0x00008dfc,
+ SRC0_SEL_mask = 0x1ff << 0,
+ SRC0_SEL_shift = 0,
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+ SRC0_REL_bit = 1 << 9,
+ SRC0_CHAN_mask = 0x03 << 10,
+ SRC0_CHAN_shift = 10,
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ SRC0_NEG_bit = 1 << 12,
+ SRC1_SEL_mask = 0x1ff << 13,
+ SRC1_SEL_shift = 13,
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+ SRC1_REL_bit = 1 << 22,
+ SRC1_CHAN_mask = 0x03 << 23,
+ SRC1_CHAN_shift = 23,
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ SRC1_NEG_bit = 1 << 25,
+ INDEX_MODE_mask = 0x07 << 26,
+ INDEX_MODE_shift = 26,
+ SQ_INDEX_AR_X = 0x00,
+ SQ_INDEX_AR_Y = 0x01,
+ SQ_INDEX_AR_Z = 0x02,
+ SQ_INDEX_AR_W = 0x03,
+ SQ_INDEX_LOOP = 0x04,
+ PRED_SEL_mask = 0x03 << 29,
+ PRED_SEL_shift = 29,
+ SQ_PRED_SEL_OFF = 0x00,
+ SQ_PRED_SEL_ZERO = 0x02,
+ SQ_PRED_SEL_ONE = 0x03,
+ LAST_bit = 1 << 31,
+ SX_EXPORT_BUFFER_SIZES = 0x0000900c,
+ COLOR_BUFFER_SIZE_mask = 0xff << 0,
+ COLOR_BUFFER_SIZE_shift = 0,
+ POSITION_BUFFER_SIZE_mask = 0xff << 8,
+ POSITION_BUFFER_SIZE_shift = 8,
+ SMX_BUFFER_SIZE_mask = 0xff << 16,
+ SMX_BUFFER_SIZE_shift = 16,
+ SX_MEMORY_EXPORT_BASE = 0x00009010,
+ SX_MEMORY_EXPORT_SIZE = 0x00009014,
+ SPI_CONFIG_CNTL = 0x00009100,
+ GPR_WRITE_PRIORITY_mask = 0x1f << 0,
+ GPR_WRITE_PRIORITY_shift = 0,
+ X_PRIORITY_ORDER = 0x00,
+ X_PRIORITY_ORDER_VS = 0x01,
+ DISABLE_INTERP_1_bit = 1 << 5,
+ DEBUG_THREAD_TYPE_SEL_mask = 0x03 << 6,
+ DEBUG_THREAD_TYPE_SEL_shift = 6,
+ DEBUG_GROUP_SEL_mask = 0x1f << 8,
+ DEBUG_GROUP_SEL_shift = 8,
+ DEBUG_GRBM_OVERRIDE_bit = 1 << 13,
+ SPI_CONFIG_CNTL_1 = 0x0000913c,
+ VTX_DONE_DELAY_mask = 0x0f << 0,
+ VTX_DONE_DELAY_shift = 0,
+ X_DELAY_10_CLKS = 0x00,
+ X_DELAY_11_CLKS = 0x01,
+ X_DELAY_12_CLKS = 0x02,
+ X_DELAY_13_CLKS = 0x03,
+ X_DELAY_14_CLKS = 0x04,
+ X_DELAY_15_CLKS = 0x05,
+ X_DELAY_16_CLKS = 0x06,
+ X_DELAY_17_CLKS = 0x07,
+ X_DELAY_2_CLKS = 0x08,
+ X_DELAY_3_CLKS = 0x09,
+ X_DELAY_4_CLKS = 0x0a,
+ X_DELAY_5_CLKS = 0x0b,
+ X_DELAY_6_CLKS = 0x0c,
+ X_DELAY_7_CLKS = 0x0d,
+ X_DELAY_8_CLKS = 0x0e,
+ X_DELAY_9_CLKS = 0x0f,
+ INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4,
+ TD_FILTER4 = 0x00009400,
+ WEIGHT_1_mask = 0x7ff << 0,
+ WEIGHT_1_shift = 0,
+ WEIGHT_0_mask = 0x7ff << 11,
+ WEIGHT_0_shift = 11,
+ WEIGHT_PAIR_bit = 1 << 22,
+ PHASE_mask = 0x0f << 23,
+ PHASE_shift = 23,
+ DIRECTION_bit = 1 << 27,
+ TD_FILTER4_1 = 0x00009404,
+ TD_FILTER4_1_num = 35,
+/* WEIGHT_1_mask = 0x7ff << 0, */
+/* WEIGHT_1_shift = 0, */
+/* WEIGHT_0_mask = 0x7ff << 11, */
+/* WEIGHT_0_shift = 11, */
+ TD_CNTL = 0x00009490,
+ SYNC_PHASE_SH_mask = 0x03 << 0,
+ SYNC_PHASE_SH_shift = 0,
+ SYNC_PHASE_VC_SMX_mask = 0x03 << 4,
+ SYNC_PHASE_VC_SMX_shift = 4,
+ TD0_CNTL = 0x00009494,
+ TD0_CNTL_num = 4,
+ ID_OVERRIDE_mask = 0x03 << 28,
+ ID_OVERRIDE_shift = 28,
+ TD0_STATUS = 0x000094a4,
+ TD0_STATUS_num = 4,
+ BUSY_bit = 1 << 31,
+ TA_CNTL = 0x00009504,
+ GRADIENT_CREDIT_mask = 0x1f << 0,
+ GRADIENT_CREDIT_shift = 0,
+ WALKER_CREDIT_mask = 0x1f << 8,
+ WALKER_CREDIT_shift = 8,
+ ALIGNER_CREDIT_mask = 0x1f << 16,
+ ALIGNER_CREDIT_shift = 16,
+ TD_FIFO_CREDIT_mask = 0x3ff << 22,
+ TD_FIFO_CREDIT_shift = 22,
+ TA_CNTL_AUX = 0x00009508,
+ DISABLE_CUBE_WRAP_bit = 1 << 0,
+ SYNC_GRADIENT_bit = 1 << 24,
+ SYNC_WALKER_bit = 1 << 25,
+ SYNC_ALIGNER_bit = 1 << 26,
+ BILINEAR_PRECISION_bit = 1 << 31,
+ TA0_CNTL = 0x00009510,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA1_CNTL = 0x00009514,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA2_CNTL = 0x00009518,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA3_CNTL = 0x0000951c,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA0_STATUS = 0x00009520,
+ FG_PFIFO_EMPTYB_bit = 1 << 12,
+ FG_LFIFO_EMPTYB_bit = 1 << 13,
+ FG_SFIFO_EMPTYB_bit = 1 << 14,
+ FL_PFIFO_EMPTYB_bit = 1 << 16,
+ FL_LFIFO_EMPTYB_bit = 1 << 17,
+ FL_SFIFO_EMPTYB_bit = 1 << 18,
+ FA_PFIFO_EMPTYB_bit = 1 << 20,
+ FA_LFIFO_EMPTYB_bit = 1 << 21,
+ FA_SFIFO_EMPTYB_bit = 1 << 22,
+ IN_BUSY_bit = 1 << 24,
+ FG_BUSY_bit = 1 << 25,
+ FL_BUSY_bit = 1 << 27,
+ TA_BUSY_bit = 1 << 28,
+ FA_BUSY_bit = 1 << 29,
+ AL_BUSY_bit = 1 << 30,
+/* BUSY_bit = 1 << 31, */
+ TA1_STATUS = 0x00009524,
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
+/* IN_BUSY_bit = 1 << 24, */
+/* FG_BUSY_bit = 1 << 25, */
+/* FL_BUSY_bit = 1 << 27, */
+/* TA_BUSY_bit = 1 << 28, */
+/* FA_BUSY_bit = 1 << 29, */
+/* AL_BUSY_bit = 1 << 30, */
+/* BUSY_bit = 1 << 31, */
+ TA2_STATUS = 0x00009528,
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
+/* IN_BUSY_bit = 1 << 24, */
+/* FG_BUSY_bit = 1 << 25, */
+/* FL_BUSY_bit = 1 << 27, */
+/* TA_BUSY_bit = 1 << 28, */
+/* FA_BUSY_bit = 1 << 29, */
+/* AL_BUSY_bit = 1 << 30, */
+/* BUSY_bit = 1 << 31, */
+ TA3_STATUS = 0x0000952c,
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
+/* IN_BUSY_bit = 1 << 24, */
+/* FG_BUSY_bit = 1 << 25, */
+/* FL_BUSY_bit = 1 << 27, */
+/* TA_BUSY_bit = 1 << 28, */
+/* FA_BUSY_bit = 1 << 29, */
+/* AL_BUSY_bit = 1 << 30, */
+/* BUSY_bit = 1 << 31, */
+ TC_STATUS = 0x00009600,
+ TC_BUSY_bit = 1 << 0,
+ TC_INVALIDATE = 0x00009604,
+ START_bit = 1 << 0,
+ TC_CNTL = 0x00009608,
+ FORCE_HIT_bit = 1 << 0,
+ FORCE_MISS_bit = 1 << 1,
+ L2_SIZE_mask = 0x0f << 5,
+ L2_SIZE_shift = 5,
+ _256K = 0x00,
+ _224K = 0x01,
+ _192K = 0x02,
+ _160K = 0x03,
+ _128K = 0x04,
+ _96K = 0x05,
+ _64K = 0x06,
+ _32K = 0x07,
+ L2_DISABLE_LATE_HIT_bit = 1 << 9,
+ DISABLE_VERT_PERF_bit = 1 << 10,
+ DISABLE_INVAL_BUSY_bit = 1 << 11,
+ DISABLE_INVAL_SAME_SURFACE_bit = 1 << 12,
+ PARTITION_MODE_mask = 0x03 << 13,
+ PARTITION_MODE_shift = 13,
+ X_VERTEX = 0x00,
+ MISS_ARB_MODE_bit = 1 << 15,
+ HIT_ARB_MODE_bit = 1 << 16,
+ DISABLE_WRITE_DELAY_bit = 1 << 17,
+ HIT_FIFO_DEPTH_bit = 1 << 18,
+ VC_CNTL = 0x00009700,
+ L2_INVALIDATE_bit = 1 << 0,
+ RESERVED_bit = 1 << 1,
+ CC_FORCE_MISS_bit = 1 << 2,
+ MI_CHAN_SEL_mask = 0x03 << 3,
+ MI_CHAN_SEL_shift = 3,
+ X_MC0_USES_CH_0_1 = 0x00,
+ X_MC0_USES_CH_0_3 = 0x01,
+ X_VC_MC0_IS_ACTIVE = 0x02,
+ X_VC_MC1_IS_DISABLED = 0x03,
+ MI_STEER_DISABLE_bit = 1 << 5,
+ MI_CREDIT_CTR_mask = 0x0f << 6,
+ MI_CREDIT_CTR_shift = 6,
+ MI_CREDIT_WE_bit = 1 << 10,
+ MI_REQ_STALL_THLD_mask = 0x07 << 11,
+ MI_REQ_STALL_THLD_shift = 11,
+ X_LATENCY_EXCEEDS_399_CLOCKS = 0x00,
+ X_LATENCY_EXCEEDS_415_CLOCKS = 0x01,
+ X_LATENCY_EXCEEDS_431_CLOCKS = 0x02,
+ X_LATENCY_EXCEEDS_447_CLOCKS = 0x03,
+ X_LATENCY_EXCEEDS_463_CLOCKS = 0x04,
+ X_LATENCY_EXCEEDS_479_CLOCKS = 0x05,
+ X_LATENCY_EXCEEDS_495_CLOCKS = 0x06,
+ X_LATENCY_EXCEEDS_511_CLOCKS = 0x07,
+ VC_CNTL__MI_TIMESTAMP_RES_mask = 0x1f << 14,
+ VC_CNTL__MI_TIMESTAMP_RES_shift = 14,
+ X_1X_SYSTEM_CLOCK = 0x00,
+ X_2X_SYSTEM_CLOCK = 0x01,
+ X_4X_SYSTEM_CLOCK = 0x02,
+ X_8X_SYSTEM_CLOCK = 0x03,
+ X_16X_SYSTEM_CLOCK = 0x04,
+ X_32X_SYSTEM_CLOCK = 0x05,
+ X_64X_SYSTEM_CLOCK = 0x06,
+ X_128X_SYSTEM_CLOCK = 0x07,
+ X_256X_SYSTEM_CLOCK = 0x08,
+ X_512X_SYSTEM_CLOCK = 0x09,
+ X_1024X_SYSTEM_CLOCK = 0x0a,
+ X_2048X_SYSTEM_CLOCK = 0x0b,
+ X_4092X_SYSTEM_CLOCK = 0x0c,
+ X_8192X_SYSTEM_CLOCK = 0x0d,
+ X_16384X_SYSTEM_CLOCK = 0x0e,
+ X_32768X_SYSTEM_CLOCK = 0x0f,
+ VC_CNTL_STATUS = 0x00009704,
+ RP_BUSY_bit = 1 << 0,
+ RG_BUSY_bit = 1 << 1,
+ VC_BUSY_bit = 1 << 2,
+ CLAMP_DETECT_bit = 1 << 3,
+ VC_CONFIG = 0x00009718,
+ WRITE_DIS_bit = 1 << 0,
+ GPR_DATA_PHASE_ADJ_mask = 0x07 << 1,
+ GPR_DATA_PHASE_ADJ_shift = 1,
+ X_LATENCY_BASE_0_CYCLES = 0x00,
+ X_LATENCY_BASE_1_CYCLES = 0x01,
+ X_LATENCY_BASE_2_CYCLES = 0x02,
+ X_LATENCY_BASE_3_CYCLES = 0x03,
+ TD_SIMD_SYNC_ADJ_mask = 0x07 << 4,
+ TD_SIMD_SYNC_ADJ_shift = 4,
+ X_0_CYCLES_DELAY = 0x00,
+ X_1_CYCLES_DELAY = 0x01,
+ X_2_CYCLES_DELAY = 0x02,
+ X_3_CYCLES_DELAY = 0x03,
+ X_4_CYCLES_DELAY = 0x04,
+ X_5_CYCLES_DELAY = 0x05,
+ X_6_CYCLES_DELAY = 0x06,
+ X_7_CYCLES_DELAY = 0x07,
+ SMX_DC_CTL0 = 0x0000a020,
+ WR_GATHER_STREAM0_bit = 1 << 0,
+ WR_GATHER_STREAM1_bit = 1 << 1,
+ WR_GATHER_STREAM2_bit = 1 << 2,
+ WR_GATHER_STREAM3_bit = 1 << 3,
+ WR_GATHER_SCRATCH_bit = 1 << 4,
+ WR_GATHER_REDUC_BUF_bit = 1 << 5,
+ WR_GATHER_RING_BUF_bit = 1 << 6,
+ WR_GATHER_F_BUF_bit = 1 << 7,
+ DISABLE_CACHES_bit = 1 << 8,
+ AUTO_FLUSH_INVAL_EN_bit = 1 << 10,
+ AUTO_FLUSH_EN_bit = 1 << 11,
+ AUTO_FLUSH_CNT_mask = 0xffff << 12,
+ AUTO_FLUSH_CNT_shift = 12,
+ MC_RD_STALL_FACTOR_mask = 0x03 << 28,
+ MC_RD_STALL_FACTOR_shift = 28,
+ MC_WR_STALL_FACTOR_mask = 0x03 << 30,
+ MC_WR_STALL_FACTOR_shift = 30,
+ SMX_DC_CTL1 = 0x0000a024,
+ OP_FIFO_SKID_mask = 0x7f << 0,
+ OP_FIFO_SKID_shift = 0,
+ CACHE_LINE_SIZE_bit = 1 << 8,
+ MULTI_FLUSH_MODE_bit = 1 << 9,
+ MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask = 0x0f << 10,
+ MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_shift = 10,
+ DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit = 1 << 16,
+ DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit = 1 << 17,
+ DISABLE_FLUSH_ES_ALSO_INVALS_bit = 1 << 18,
+ DISABLE_FLUSH_GS_ALSO_INVALS_bit = 1 << 19,
+ SMX_DC_CTL2 = 0x0000a028,
+ INVALIDATE_CACHES_bit = 1 << 0,
+ CACHES_INVALID_bit = 1 << 1,
+ CACHES_DIRTY_bit = 1 << 2,
+ FLUSH_ALL_bit = 1 << 4,
+ FLUSH_GS_THREADS_bit = 1 << 8,
+ FLUSH_ES_THREADS_bit = 1 << 9,
+ SMX_DC_MC_INTF_CTL = 0x0000a02c,
+ MC_RD_REQ_CRED_mask = 0xff << 0,
+ MC_RD_REQ_CRED_shift = 0,
+ MC_WR_REQ_CRED_mask = 0xff << 16,
+ MC_WR_REQ_CRED_shift = 16,
+ TD_PS_SAMPLER0_BORDER_RED = 0x0000a400,
+ TD_PS_SAMPLER0_BORDER_RED_num = 18,
+ TD_PS_SAMPLER0_BORDER_RED_offset = 16,
+ TD_PS_SAMPLER0_BORDER_GREEN = 0x0000a404,
+ TD_PS_SAMPLER0_BORDER_GREEN_num = 18,
+ TD_PS_SAMPLER0_BORDER_GREEN_offset = 16,
+ TD_PS_SAMPLER0_BORDER_BLUE = 0x0000a408,
+ TD_PS_SAMPLER0_BORDER_BLUE_num = 18,
+ TD_PS_SAMPLER0_BORDER_BLUE_offset = 16,
+ TD_PS_SAMPLER0_BORDER_ALPHA = 0x0000a40c,
+ TD_PS_SAMPLER0_BORDER_ALPHA_num = 18,
+ TD_PS_SAMPLER0_BORDER_ALPHA_offset = 16,
+ TD_VS_SAMPLER0_BORDER_RED = 0x0000a600,
+ TD_VS_SAMPLER0_BORDER_RED_num = 18,
+ TD_VS_SAMPLER0_BORDER_RED_offset = 16,
+ TD_VS_SAMPLER0_BORDER_GREEN = 0x0000a604,
+ TD_VS_SAMPLER0_BORDER_GREEN_num = 18,
+ TD_VS_SAMPLER0_BORDER_GREEN_offset = 16,
+ TD_VS_SAMPLER0_BORDER_BLUE = 0x0000a608,
+ TD_VS_SAMPLER0_BORDER_BLUE_num = 18,
+ TD_VS_SAMPLER0_BORDER_BLUE_offset = 16,
+ TD_VS_SAMPLER0_BORDER_ALPHA = 0x0000a60c,
+ TD_VS_SAMPLER0_BORDER_ALPHA_num = 18,
+ TD_VS_SAMPLER0_BORDER_ALPHA_offset = 16,
+ TD_GS_SAMPLER0_BORDER_RED = 0x0000a800,
+ TD_GS_SAMPLER0_BORDER_RED_num = 18,
+ TD_GS_SAMPLER0_BORDER_RED_offset = 16,
+ TD_GS_SAMPLER0_BORDER_GREEN = 0x0000a804,
+ TD_GS_SAMPLER0_BORDER_GREEN_num = 18,
+ TD_GS_SAMPLER0_BORDER_GREEN_offset = 16,
+ TD_GS_SAMPLER0_BORDER_BLUE = 0x0000a808,
+ TD_GS_SAMPLER0_BORDER_BLUE_num = 18,
+ TD_GS_SAMPLER0_BORDER_BLUE_offset = 16,
+ TD_GS_SAMPLER0_BORDER_ALPHA = 0x0000a80c,
+ TD_GS_SAMPLER0_BORDER_ALPHA_num = 18,
+ TD_GS_SAMPLER0_BORDER_ALPHA_offset = 16,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL = 0x0000aa00,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL_num = 18,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_mask = 0x07 << 0,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_shift = 0,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_mask = 0x07 << 3,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_shift = 3,
+ DB_DEPTH_SIZE = 0x00028000,
+ PITCH_TILE_MAX_mask = 0x3ff << 0,
+ PITCH_TILE_MAX_shift = 0,
+ SLICE_TILE_MAX_mask = 0xfffff << 10,
+ SLICE_TILE_MAX_shift = 10,
+ DB_DEPTH_VIEW = 0x00028004,
+ SLICE_START_mask = 0x7ff << 0,
+ SLICE_START_shift = 0,
+ SLICE_MAX_mask = 0x7ff << 13,
+ SLICE_MAX_shift = 13,
+ DB_DEPTH_BASE = 0x0002800c,
+ DB_DEPTH_INFO = 0x00028010,
+ DB_DEPTH_INFO__FORMAT_mask = 0x07 << 0,
+ DB_DEPTH_INFO__FORMAT_shift = 0,
+ DEPTH_INVALID = 0x00,
+ DEPTH_16 = 0x01,
+ DEPTH_X8_24 = 0x02,
+ DEPTH_8_24 = 0x03,
+ DEPTH_X8_24_FLOAT = 0x04,
+ DEPTH_8_24_FLOAT = 0x05,
+ DEPTH_32_FLOAT = 0x06,
+ DEPTH_X24_8_32_FLOAT = 0x07,
+ DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3,
+ DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15,
+ DB_DEPTH_INFO__ARRAY_MODE_shift = 15,
+ ARRAY_1D_TILED_THIN1 = 0x02,
+ ARRAY_2D_TILED_THIN1 = 0x04,
+ TILE_SURFACE_ENABLE_bit = 1 << 25,
+ TILE_COMPACT_bit = 1 << 26,
+ ZRANGE_PRECISION_bit = 1 << 31,
+ DB_HTILE_DATA_BASE = 0x00028014,
+ DB_STENCIL_CLEAR = 0x00028028,
+ DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0,
+ DB_STENCIL_CLEAR__CLEAR_shift = 0,
+ MIN_mask = 0xff << 16,
+ MIN_shift = 16,
+ DB_DEPTH_CLEAR = 0x0002802c,
+ PA_SC_SCREEN_SCISSOR_TL = 0x00028030,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0x7fff << 0,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16,
+ PA_SC_SCREEN_SCISSOR_BR = 0x00028034,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16,
+ CB_COLOR0_BASE = 0x00028040,
+ CB_COLOR0_BASE_num = 8,
+ CB_COLOR0_SIZE = 0x00028060,
+ CB_COLOR0_SIZE_num = 8,
+/* PITCH_TILE_MAX_mask = 0x3ff << 0, */
+/* PITCH_TILE_MAX_shift = 0, */
+/* SLICE_TILE_MAX_mask = 0xfffff << 10, */
+/* SLICE_TILE_MAX_shift = 10, */
+ CB_COLOR0_VIEW = 0x00028080,
+ CB_COLOR0_VIEW_num = 8,
+/* SLICE_START_mask = 0x7ff << 0, */
+/* SLICE_START_shift = 0, */
+/* SLICE_MAX_mask = 0x7ff << 13, */
+/* SLICE_MAX_shift = 13, */
+ CB_COLOR0_INFO = 0x000280a0,
+ CB_COLOR0_INFO_num = 8,
+ ENDIAN_mask = 0x03 << 0,
+ ENDIAN_shift = 0,
+ ENDIAN_NONE = 0x00,
+ ENDIAN_8IN16 = 0x01,
+ ENDIAN_8IN32 = 0x02,
+ ENDIAN_8IN64 = 0x03,
+ CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2,
+ CB_COLOR0_INFO__FORMAT_shift = 2,
+ COLOR_INVALID = 0x00,
+ COLOR_8 = 0x01,
+ COLOR_4_4 = 0x02,
+ COLOR_3_3_2 = 0x03,
+ COLOR_16 = 0x05,
+ COLOR_16_FLOAT = 0x06,
+ COLOR_8_8 = 0x07,
+ COLOR_5_6_5 = 0x08,
+ COLOR_6_5_5 = 0x09,
+ COLOR_1_5_5_5 = 0x0a,
+ COLOR_4_4_4_4 = 0x0b,
+ COLOR_5_5_5_1 = 0x0c,
+ COLOR_32 = 0x0d,
+ COLOR_32_FLOAT = 0x0e,
+ COLOR_16_16 = 0x0f,
+ COLOR_16_16_FLOAT = 0x10,
+ COLOR_8_24 = 0x11,
+ COLOR_8_24_FLOAT = 0x12,
+ COLOR_24_8 = 0x13,
+ COLOR_24_8_FLOAT = 0x14,
+ COLOR_10_11_11 = 0x15,
+ COLOR_10_11_11_FLOAT = 0x16,
+ COLOR_11_11_10 = 0x17,
+ COLOR_11_11_10_FLOAT = 0x18,
+ COLOR_2_10_10_10 = 0x19,
+ COLOR_8_8_8_8 = 0x1a,
+ COLOR_10_10_10_2 = 0x1b,
+ COLOR_X24_8_32_FLOAT = 0x1c,
+ COLOR_32_32 = 0x1d,
+ COLOR_32_32_FLOAT = 0x1e,
+ COLOR_16_16_16_16 = 0x1f,
+ COLOR_16_16_16_16_FLOAT = 0x20,
+ COLOR_32_32_32_32 = 0x22,
+ COLOR_32_32_32_32_FLOAT = 0x23,
+ CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8,
+ CB_COLOR0_INFO__ARRAY_MODE_shift = 8,
+ ARRAY_LINEAR_GENERAL = 0x00,
+ ARRAY_LINEAR_ALIGNED = 0x01,
+/* ARRAY_1D_TILED_THIN1 = 0x02, */
+/* ARRAY_2D_TILED_THIN1 = 0x04, */
+ NUMBER_TYPE_mask = 0x07 << 12,
+ NUMBER_TYPE_shift = 12,
+ NUMBER_UNORM = 0x00,
+ NUMBER_SNORM = 0x01,
+ NUMBER_USCALED = 0x02,
+ NUMBER_SSCALED = 0x03,
+ NUMBER_UINT = 0x04,
+ NUMBER_SINT = 0x05,
+ NUMBER_SRGB = 0x06,
+ NUMBER_FLOAT = 0x07,
+ CB_COLOR0_INFO__READ_SIZE_bit = 1 << 15,
+ COMP_SWAP_mask = 0x03 << 16,
+ COMP_SWAP_shift = 16,
+ SWAP_STD = 0x00,
+ SWAP_ALT = 0x01,
+ SWAP_STD_REV = 0x02,
+ SWAP_ALT_REV = 0x03,
+ CB_COLOR0_INFO__TILE_MODE_mask = 0x03 << 18,
+ CB_COLOR0_INFO__TILE_MODE_shift = 18,
+ TILE_DISABLE = 0x00,
+ TILE_CLEAR_ENABLE = 0x01,
+ TILE_FRAG_ENABLE = 0x02,
+ BLEND_CLAMP_bit = 1 << 20,
+ CLEAR_COLOR_bit = 1 << 21,
+ BLEND_BYPASS_bit = 1 << 22,
+ BLEND_FLOAT32_bit = 1 << 23,
+ SIMPLE_FLOAT_bit = 1 << 24,
+ CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 25,
+/* TILE_COMPACT_bit = 1 << 26, */
+ SOURCE_FORMAT_bit = 1 << 27,
+ CB_COLOR0_TILE = 0x000280c0,
+ CB_COLOR0_TILE_num = 8,
+ CB_COLOR0_FRAG = 0x000280e0,
+ CB_COLOR0_FRAG_num = 8,
+ CB_COLOR0_MASK = 0x00028100,
+ CB_COLOR0_MASK_num = 8,
+ CMASK_BLOCK_MAX_mask = 0xfff << 0,
+ CMASK_BLOCK_MAX_shift = 0,
+ FMASK_TILE_MAX_mask = 0xfffff << 12,
+ FMASK_TILE_MAX_shift = 12,
+ CB_CLEAR_RED = 0x00028120,
+ CB_CLEAR_GREEN = 0x00028124,
+ CB_CLEAR_BLUE = 0x00028128,
+ CB_CLEAR_ALPHA = 0x0002812c,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0,
+ PA_SC_WINDOW_OFFSET = 0x00028200,
+ WINDOW_X_OFFSET_mask = 0x7fff << 0,
+ WINDOW_X_OFFSET_shift = 0,
+ WINDOW_Y_OFFSET_mask = 0x7fff << 16,
+ WINDOW_Y_OFFSET_shift = 16,
+ PA_SC_WINDOW_SCISSOR_TL = 0x00028204,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16,
+ WINDOW_OFFSET_DISABLE_bit = 1 << 31,
+ PA_SC_WINDOW_SCISSOR_BR = 0x00028208,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16,
+ PA_SC_CLIPRECT_RULE = 0x0002820c,
+ CLIP_RULE_mask = 0xffff << 0,
+ CLIP_RULE_shift = 0,
+ PA_SC_CLIPRECT_0_TL = 0x00028210,
+ PA_SC_CLIPRECT_0_TL_num = 4,
+ PA_SC_CLIPRECT_0_TL_offset = 8,
+ PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_CLIPRECT_0_TL__TL_X_shift = 0,
+ PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16,
+ PA_SC_CLIPRECT_0_BR = 0x00028214,
+ PA_SC_CLIPRECT_0_BR_num = 4,
+ PA_SC_CLIPRECT_0_BR_offset = 8,
+ PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_CLIPRECT_0_BR__BR_X_shift = 0,
+ PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16,
+ CB_TARGET_MASK = 0x00028238,
+ TARGET0_ENABLE_mask = 0x0f << 0,
+ TARGET0_ENABLE_shift = 0,
+ TARGET1_ENABLE_mask = 0x0f << 4,
+ TARGET1_ENABLE_shift = 4,
+ TARGET2_ENABLE_mask = 0x0f << 8,
+ TARGET2_ENABLE_shift = 8,
+ TARGET3_ENABLE_mask = 0x0f << 12,
+ TARGET3_ENABLE_shift = 12,
+ TARGET4_ENABLE_mask = 0x0f << 16,
+ TARGET4_ENABLE_shift = 16,
+ TARGET5_ENABLE_mask = 0x0f << 20,
+ TARGET5_ENABLE_shift = 20,
+ TARGET6_ENABLE_mask = 0x0f << 24,
+ TARGET6_ENABLE_shift = 24,
+ TARGET7_ENABLE_mask = 0x0f << 28,
+ TARGET7_ENABLE_shift = 28,
+ CB_SHADER_MASK = 0x0002823c,
+ OUTPUT0_ENABLE_mask = 0x0f << 0,
+ OUTPUT0_ENABLE_shift = 0,
+ OUTPUT1_ENABLE_mask = 0x0f << 4,
+ OUTPUT1_ENABLE_shift = 4,
+ OUTPUT2_ENABLE_mask = 0x0f << 8,
+ OUTPUT2_ENABLE_shift = 8,
+ OUTPUT3_ENABLE_mask = 0x0f << 12,
+ OUTPUT3_ENABLE_shift = 12,
+ OUTPUT4_ENABLE_mask = 0x0f << 16,
+ OUTPUT4_ENABLE_shift = 16,
+ OUTPUT5_ENABLE_mask = 0x0f << 20,
+ OUTPUT5_ENABLE_shift = 20,
+ OUTPUT6_ENABLE_mask = 0x0f << 24,
+ OUTPUT6_ENABLE_shift = 24,
+ OUTPUT7_ENABLE_mask = 0x0f << 28,
+ OUTPUT7_ENABLE_shift = 28,
+ PA_SC_GENERIC_SCISSOR_TL = 0x00028240,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
+ PA_SC_GENERIC_SCISSOR_BR = 0x00028244,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16,
+ PA_SC_VPORT_SCISSOR_0_TL = 0x00028250,
+ PA_SC_VPORT_SCISSOR_0_TL_num = 16,
+ PA_SC_VPORT_SCISSOR_0_TL_offset = 8,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
+ PA_SC_VPORT_SCISSOR_0_BR = 0x00028254,
+ PA_SC_VPORT_SCISSOR_0_BR_num = 16,
+ PA_SC_VPORT_SCISSOR_0_BR_offset = 8,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16,
+ PA_SC_VPORT_ZMIN_0 = 0x000282d0,
+ PA_SC_VPORT_ZMIN_0_num = 16,
+ PA_SC_VPORT_ZMIN_0_offset = 8,
+ PA_SC_VPORT_ZMAX_0 = 0x000282d4,
+ PA_SC_VPORT_ZMAX_0_num = 16,
+ PA_SC_VPORT_ZMAX_0_offset = 8,
+ SX_MISC = 0x00028350,
+ MULTIPASS_bit = 1 << 0,
+ SQ_VTX_SEMANTIC_0 = 0x00028380,
+ SQ_VTX_SEMANTIC_0_num = 32,
+/* SEMANTIC_ID_mask = 0xff << 0, */
+/* SEMANTIC_ID_shift = 0, */
+ VGT_MAX_VTX_INDX = 0x00028400,
+ VGT_MIN_VTX_INDX = 0x00028404,
+ VGT_INDX_OFFSET = 0x00028408,
+ VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c,
+ SX_ALPHA_TEST_CONTROL = 0x00028410,
+ ALPHA_FUNC_mask = 0x07 << 0,
+ ALPHA_FUNC_shift = 0,
+ REF_NEVER = 0x00,
+ REF_LESS = 0x01,
+ REF_EQUAL = 0x02,
+ REF_LEQUAL = 0x03,
+ REF_GREATER = 0x04,
+ REF_NOTEQUAL = 0x05,
+ REF_GEQUAL = 0x06,
+ REF_ALWAYS = 0x07,
+ ALPHA_TEST_ENABLE_bit = 1 << 3,
+ ALPHA_TEST_BYPASS_bit = 1 << 8,
+ CB_BLEND_RED = 0x00028414,
+ CB_BLEND_GREEN = 0x00028418,
+ CB_BLEND_BLUE = 0x0002841c,
+ CB_BLEND_ALPHA = 0x00028420,
+ CB_FOG_RED = 0x00028424,
+ CB_FOG_GREEN = 0x00028428,
+ CB_FOG_BLUE = 0x0002842c,
+ DB_STENCILREFMASK = 0x00028430,
+ STENCILREF_mask = 0xff << 0,
+ STENCILREF_shift = 0,
+ STENCILMASK_mask = 0xff << 8,
+ STENCILMASK_shift = 8,
+ STENCILWRITEMASK_mask = 0xff << 16,
+ STENCILWRITEMASK_shift = 16,
+ DB_STENCILREFMASK_BF = 0x00028434,
+ STENCILREF_BF_mask = 0xff << 0,
+ STENCILREF_BF_shift = 0,
+ STENCILMASK_BF_mask = 0xff << 8,
+ STENCILMASK_BF_shift = 8,
+ STENCILWRITEMASK_BF_mask = 0xff << 16,
+ STENCILWRITEMASK_BF_shift = 16,
+ SX_ALPHA_REF = 0x00028438,
+ PA_CL_VPORT_XSCALE_0 = 0x0002843c,
+ PA_CL_VPORT_XSCALE_0_num = 16,
+ PA_CL_VPORT_XSCALE_0_offset = 24,
+ PA_CL_VPORT_XOFFSET_0 = 0x00028440,
+ PA_CL_VPORT_XOFFSET_0_num = 16,
+ PA_CL_VPORT_XOFFSET_0_offset = 24,
+ PA_CL_VPORT_YSCALE_0 = 0x00028444,
+ PA_CL_VPORT_YSCALE_0_num = 16,
+ PA_CL_VPORT_YSCALE_0_offset = 24,
+ PA_CL_VPORT_YOFFSET_0 = 0x00028448,
+ PA_CL_VPORT_YOFFSET_0_num = 16,
+ PA_CL_VPORT_YOFFSET_0_offset = 24,
+ PA_CL_VPORT_ZSCALE_0 = 0x0002844c,
+ PA_CL_VPORT_ZSCALE_0_num = 16,
+ PA_CL_VPORT_ZSCALE_0_offset = 24,
+ PA_CL_VPORT_ZOFFSET_0 = 0x00028450,
+ PA_CL_VPORT_ZOFFSET_0_num = 16,
+ PA_CL_VPORT_ZOFFSET_0_offset = 24,
+ SPI_VS_OUT_ID_0 = 0x00028614,
+ SPI_VS_OUT_ID_0_num = 10,
+ SEMANTIC_0_mask = 0xff << 0,
+ SEMANTIC_0_shift = 0,
+ SEMANTIC_1_mask = 0xff << 8,
+ SEMANTIC_1_shift = 8,
+ SEMANTIC_2_mask = 0xff << 16,
+ SEMANTIC_2_shift = 16,
+ SEMANTIC_3_mask = 0xff << 24,
+ SEMANTIC_3_shift = 24,
+ SPI_PS_INPUT_CNTL_0 = 0x00028644,
+ SPI_PS_INPUT_CNTL_0_num = 32,
+ SEMANTIC_mask = 0xff << 0,
+ SEMANTIC_shift = 0,
+ DEFAULT_VAL_mask = 0x03 << 8,
+ DEFAULT_VAL_shift = 8,
+ X_0_0F = 0x00,
+ FLAT_SHADE_bit = 1 << 10,
+ SEL_CENTROID_bit = 1 << 11,
+ SEL_LINEAR_bit = 1 << 12,
+ CYL_WRAP_mask = 0x0f << 13,
+ CYL_WRAP_shift = 13,
+ PT_SPRITE_TEX_bit = 1 << 17,
+ SEL_SAMPLE_bit = 1 << 18,
+ SPI_VS_OUT_CONFIG = 0x000286c4,
+ VS_PER_COMPONENT_bit = 1 << 0,
+ VS_EXPORT_COUNT_mask = 0x1f << 1,
+ VS_EXPORT_COUNT_shift = 1,
+ VS_EXPORTS_FOG_bit = 1 << 8,
+ VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9,
+ VS_OUT_FOG_VEC_ADDR_shift = 9,
+ SPI_PS_IN_CONTROL_0 = 0x000286cc,
+ NUM_INTERP_mask = 0x3f << 0,
+ NUM_INTERP_shift = 0,
+ POSITION_ENA_bit = 1 << 8,
+ POSITION_CENTROID_bit = 1 << 9,
+ POSITION_ADDR_mask = 0x1f << 10,
+ POSITION_ADDR_shift = 10,
+ PARAM_GEN_mask = 0x0f << 15,
+ PARAM_GEN_shift = 15,
+ PARAM_GEN_ADDR_mask = 0x7f << 19,
+ PARAM_GEN_ADDR_shift = 19,
+ BARYC_SAMPLE_CNTL_mask = 0x03 << 26,
+ BARYC_SAMPLE_CNTL_shift = 26,
+ CENTROIDS_ONLY = 0x00,
+ CENTERS_ONLY = 0x01,
+ CENTROIDS_AND_CENTERS = 0x02,
+ UNDEF = 0x03,
+ PERSP_GRADIENT_ENA_bit = 1 << 28,
+ LINEAR_GRADIENT_ENA_bit = 1 << 29,
+ POSITION_SAMPLE_bit = 1 << 30,
+ BARYC_AT_SAMPLE_ENA_bit = 1 << 31,
+ SPI_PS_IN_CONTROL_1 = 0x000286d0,
+ GEN_INDEX_PIX_bit = 1 << 0,
+ GEN_INDEX_PIX_ADDR_mask = 0x7f << 1,
+ GEN_INDEX_PIX_ADDR_shift = 1,
+ FRONT_FACE_ENA_bit = 1 << 8,
+ FRONT_FACE_CHAN_mask = 0x03 << 9,
+ FRONT_FACE_CHAN_shift = 9,
+ FRONT_FACE_ALL_BITS_bit = 1 << 11,
+ FRONT_FACE_ADDR_mask = 0x1f << 12,
+ FRONT_FACE_ADDR_shift = 12,
+ FOG_ADDR_mask = 0x7f << 17,
+ FOG_ADDR_shift = 17,
+ FIXED_PT_POSITION_ENA_bit = 1 << 24,
+ FIXED_PT_POSITION_ADDR_mask = 0x1f << 25,
+ FIXED_PT_POSITION_ADDR_shift = 25,
+ SPI_INTERP_CONTROL_0 = 0x000286d4,
+ FLAT_SHADE_ENA_bit = 1 << 0,
+ PNT_SPRITE_ENA_bit = 1 << 1,
+ PNT_SPRITE_OVRD_X_mask = 0x07 << 2,
+ PNT_SPRITE_OVRD_X_shift = 2,
+ SPI_PNT_SPRITE_SEL_0 = 0x00,
+ SPI_PNT_SPRITE_SEL_1 = 0x01,
+ SPI_PNT_SPRITE_SEL_S = 0x02,
+ SPI_PNT_SPRITE_SEL_T = 0x03,
+ SPI_PNT_SPRITE_SEL_NONE = 0x04,
+ PNT_SPRITE_OVRD_Y_mask = 0x07 << 5,
+ PNT_SPRITE_OVRD_Y_shift = 5,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_OVRD_Z_mask = 0x07 << 8,
+ PNT_SPRITE_OVRD_Z_shift = 8,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_OVRD_W_mask = 0x07 << 11,
+ PNT_SPRITE_OVRD_W_shift = 11,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_TOP_1_bit = 1 << 14,
+ SPI_INPUT_Z = 0x000286d8,
+ PROVIDE_Z_TO_SPI_bit = 1 << 0,
+ SPI_FOG_CNTL = 0x000286dc,
+ PASS_FOG_THROUGH_PS_bit = 1 << 0,
+ PIXEL_FOG_FUNC_mask = 0x03 << 1,
+ PIXEL_FOG_FUNC_shift = 1,
+ SPI_FOG_NONE = 0x00,
+ SPI_FOG_EXP = 0x01,
+ SPI_FOG_EXP2 = 0x02,
+ SPI_FOG_LINEAR = 0x03,
+ PIXEL_FOG_SRC_SEL_bit = 1 << 3,
+ VS_FOG_CLAMP_DISABLE_bit = 1 << 4,
+ SPI_FOG_FUNC_SCALE = 0x000286e0,
+ SPI_FOG_FUNC_BIAS = 0x000286e4,
+ CB_BLEND0_CONTROL = 0x00028780,
+ CB_BLEND0_CONTROL_num = 8,
+ COLOR_SRCBLEND_mask = 0x1f << 0,
+ COLOR_SRCBLEND_shift = 0,
+ COLOR_COMB_FCN_mask = 0x07 << 5,
+ COLOR_COMB_FCN_shift = 5,
+ COLOR_DESTBLEND_mask = 0x1f << 8,
+ COLOR_DESTBLEND_shift = 8,
+ OPACITY_WEIGHT_bit = 1 << 13,
+ ALPHA_SRCBLEND_mask = 0x1f << 16,
+ ALPHA_SRCBLEND_shift = 16,
+ ALPHA_COMB_FCN_mask = 0x07 << 21,
+ ALPHA_COMB_FCN_shift = 21,
+ ALPHA_DESTBLEND_mask = 0x1f << 24,
+ ALPHA_DESTBLEND_shift = 24,
+ SEPARATE_ALPHA_BLEND_bit = 1 << 29,
+ VGT_DMA_BASE_HI = 0x000287e4,
+ VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0,
+ VGT_DMA_BASE_HI__BASE_ADDR_shift = 0,
+ VGT_DMA_BASE = 0x000287e8,
+ VGT_DRAW_INITIATOR = 0x000287f0,
+ SOURCE_SELECT_mask = 0x03 << 0,
+ SOURCE_SELECT_shift = 0,
+ DI_SRC_SEL_DMA = 0x00,
+ DI_SRC_SEL_IMMEDIATE = 0x01,
+ DI_SRC_SEL_AUTO_INDEX = 0x02,
+ DI_SRC_SEL_RESERVED = 0x03,
+ MAJOR_MODE_mask = 0x03 << 2,
+ MAJOR_MODE_shift = 2,
+ DI_MAJOR_MODE_0 = 0x00,
+ DI_MAJOR_MODE_1 = 0x01,
+ SPRITE_EN_bit = 1 << 4,
+ NOT_EOP_bit = 1 << 5,
+ USE_OPAQUE_bit = 1 << 6,
+ VGT_IMMED_DATA = 0x000287f4,
+ VGT_EVENT_ADDRESS_REG = 0x000287f8,
+ ADDRESS_LOW_mask = 0xfffffff << 0,
+ ADDRESS_LOW_shift = 0,
+ DB_DEPTH_CONTROL = 0x00028800,
+ STENCIL_ENABLE_bit = 1 << 0,
+ Z_ENABLE_bit = 1 << 1,
+ Z_WRITE_ENABLE_bit = 1 << 2,
+ ZFUNC_mask = 0x07 << 4,
+ ZFUNC_shift = 4,
+ FRAG_NEVER = 0x00,
+ FRAG_LESS = 0x01,
+ FRAG_EQUAL = 0x02,
+ FRAG_LEQUAL = 0x03,
+ FRAG_GREATER = 0x04,
+ FRAG_NOTEQUAL = 0x05,
+ FRAG_GEQUAL = 0x06,
+ FRAG_ALWAYS = 0x07,
+ BACKFACE_ENABLE_bit = 1 << 7,
+ STENCILFUNC_mask = 0x07 << 8,
+ STENCILFUNC_shift = 8,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ STENCILFAIL_mask = 0x07 << 11,
+ STENCILFAIL_shift = 11,
+ STENCIL_KEEP = 0x00,
+ STENCIL_ZERO = 0x01,
+ STENCIL_REPLACE = 0x02,
+ STENCIL_INCR_CLAMP = 0x03,
+ STENCIL_DECR_CLAMP = 0x04,
+ STENCIL_INVERT = 0x05,
+ STENCIL_INCR_WRAP = 0x06,
+ STENCIL_DECR_WRAP = 0x07,
+ STENCILZPASS_mask = 0x07 << 14,
+ STENCILZPASS_shift = 14,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZFAIL_mask = 0x07 << 17,
+ STENCILZFAIL_shift = 17,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILFUNC_BF_mask = 0x07 << 20,
+ STENCILFUNC_BF_shift = 20,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ STENCILFAIL_BF_mask = 0x07 << 23,
+ STENCILFAIL_BF_shift = 23,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZPASS_BF_mask = 0x07 << 26,
+ STENCILZPASS_BF_shift = 26,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZFAIL_BF_mask = 0x07 << 29,
+ STENCILZFAIL_BF_shift = 29,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ CB_BLEND_CONTROL = 0x00028804,
+/* COLOR_SRCBLEND_mask = 0x1f << 0, */
+/* COLOR_SRCBLEND_shift = 0, */
+ BLEND_ZERO = 0x00,
+ BLEND_ONE = 0x01,
+ BLEND_SRC_COLOR = 0x02,
+ BLEND_ONE_MINUS_SRC_COLOR = 0x03,
+ BLEND_SRC_ALPHA = 0x04,
+ BLEND_ONE_MINUS_SRC_ALPHA = 0x05,
+ BLEND_DST_ALPHA = 0x06,
+ BLEND_ONE_MINUS_DST_ALPHA = 0x07,
+ BLEND_DST_COLOR = 0x08,
+ BLEND_ONE_MINUS_DST_COLOR = 0x09,
+ BLEND_SRC_ALPHA_SATURATE = 0x0a,
+ BLEND_BOTH_SRC_ALPHA = 0x0b,
+ BLEND_BOTH_INV_SRC_ALPHA = 0x0c,
+ BLEND_CONSTANT_COLOR = 0x0d,
+ BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e,
+ BLEND_SRC1_COLOR = 0x0f,
+ BLEND_INV_SRC1_COLOR = 0x10,
+ BLEND_SRC1_ALPHA = 0x11,
+ BLEND_INV_SRC1_ALPHA = 0x12,
+ BLEND_CONSTANT_ALPHA = 0x13,
+ BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14,
+/* COLOR_COMB_FCN_mask = 0x07 << 5, */
+/* COLOR_COMB_FCN_shift = 5, */
+ COMB_DST_PLUS_SRC = 0x00,
+ COMB_SRC_MINUS_DST = 0x01,
+ COMB_MIN_DST_SRC = 0x02,
+ COMB_MAX_DST_SRC = 0x03,
+ COMB_DST_MINUS_SRC = 0x04,
+/* COLOR_DESTBLEND_mask = 0x1f << 8, */
+/* COLOR_DESTBLEND_shift = 8, */
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+/* OPACITY_WEIGHT_bit = 1 << 13, */
+/* ALPHA_SRCBLEND_mask = 0x1f << 16, */
+/* ALPHA_SRCBLEND_shift = 16, */
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+/* ALPHA_COMB_FCN_mask = 0x07 << 21, */
+/* ALPHA_COMB_FCN_shift = 21, */
+/* COMB_DST_PLUS_SRC = 0x00, */
+/* COMB_SRC_MINUS_DST = 0x01, */
+/* COMB_MIN_DST_SRC = 0x02, */
+/* COMB_MAX_DST_SRC = 0x03, */
+/* COMB_DST_MINUS_SRC = 0x04, */
+/* ALPHA_DESTBLEND_mask = 0x1f << 24, */
+/* ALPHA_DESTBLEND_shift = 24, */
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+/* SEPARATE_ALPHA_BLEND_bit = 1 << 29, */
+ CB_COLOR_CONTROL = 0x00028808,
+ FOG_ENABLE_bit = 1 << 0,
+ MULTIWRITE_ENABLE_bit = 1 << 1,
+ DITHER_ENABLE_bit = 1 << 2,
+ DEGAMMA_ENABLE_bit = 1 << 3,
+ SPECIAL_OP_mask = 0x07 << 4,
+ SPECIAL_OP_shift = 4,
+ SPECIAL_NORMAL = 0x00,
+ SPECIAL_DISABLE = 0x01,
+ SPECIAL_FAST_CLEAR = 0x02,
+ SPECIAL_FORCE_CLEAR = 0x03,
+ SPECIAL_EXPAND_COLOR = 0x04,
+ SPECIAL_EXPAND_TEXTURE = 0x05,
+ SPECIAL_EXPAND_SAMPLES = 0x06,
+ SPECIAL_RESOLVE_BOX = 0x07,
+ PER_MRT_BLEND_bit = 1 << 7,
+ TARGET_BLEND_ENABLE_mask = 0xff << 8,
+ TARGET_BLEND_ENABLE_shift = 8,
+ ROP3_mask = 0xff << 16,
+ ROP3_shift = 16,
+ DB_SHADER_CONTROL = 0x0002880c,
+ Z_EXPORT_ENABLE_bit = 1 << 0,
+ STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1,
+ Z_ORDER_mask = 0x03 << 4,
+ Z_ORDER_shift = 4,
+ LATE_Z = 0x00,
+ EARLY_Z_THEN_LATE_Z = 0x01,
+ RE_Z = 0x02,
+ EARLY_Z_THEN_RE_Z = 0x03,
+ KILL_ENABLE_bit = 1 << 6,
+ COVERAGE_TO_MASK_ENABLE_bit = 1 << 7,
+ MASK_EXPORT_ENABLE_bit = 1 << 8,
+ DUAL_EXPORT_ENABLE_bit = 1 << 9,
+ EXEC_ON_HIER_FAIL_bit = 1 << 10,
+ EXEC_ON_NOOP_bit = 1 << 11,
+ PA_CL_CLIP_CNTL = 0x00028810,
+ UCP_ENA_0_bit = 1 << 0,
+ UCP_ENA_1_bit = 1 << 1,
+ UCP_ENA_2_bit = 1 << 2,
+ UCP_ENA_3_bit = 1 << 3,
+ UCP_ENA_4_bit = 1 << 4,
+ UCP_ENA_5_bit = 1 << 5,
+ PS_UCP_Y_SCALE_NEG_bit = 1 << 13,
+ PS_UCP_MODE_mask = 0x03 << 14,
+ PS_UCP_MODE_shift = 14,
+ CLIP_DISABLE_bit = 1 << 16,
+ UCP_CULL_ONLY_ENA_bit = 1 << 17,
+ BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18,
+ DX_CLIP_SPACE_DEF_bit = 1 << 19,
+ DIS_CLIP_ERR_DETECT_bit = 1 << 20,
+ VTX_KILL_OR_bit = 1 << 21,
+ DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24,
+ VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25,
+ ZCLIP_NEAR_DISABLE_bit = 1 << 26,
+ ZCLIP_FAR_DISABLE_bit = 1 << 27,
+ PA_SU_SC_MODE_CNTL = 0x00028814,
+ CULL_FRONT_bit = 1 << 0,
+ CULL_BACK_bit = 1 << 1,
+ FACE_bit = 1 << 2,
+ POLY_MODE_mask = 0x03 << 3,
+ POLY_MODE_shift = 3,
+ X_DISABLE_POLY_MODE = 0x00,
+ X_DUAL_MODE = 0x01,
+ POLYMODE_FRONT_PTYPE_mask = 0x07 << 5,
+ POLYMODE_FRONT_PTYPE_shift = 5,
+ X_DRAW_POINTS = 0x00,
+ X_DRAW_LINES = 0x01,
+ X_DRAW_TRIANGLES = 0x02,
+ POLYMODE_BACK_PTYPE_mask = 0x07 << 8,
+ POLYMODE_BACK_PTYPE_shift = 8,
+/* X_DRAW_POINTS = 0x00, */
+/* X_DRAW_LINES = 0x01, */
+/* X_DRAW_TRIANGLES = 0x02, */
+ POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11,
+ POLY_OFFSET_BACK_ENABLE_bit = 1 << 12,
+ POLY_OFFSET_PARA_ENABLE_bit = 1 << 13,
+ VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16,
+ PROVOKING_VTX_LAST_bit = 1 << 19,
+ PERSP_CORR_DIS_bit = 1 << 20,
+ MULTI_PRIM_IB_ENA_bit = 1 << 21,
+ PA_CL_VTE_CNTL = 0x00028818,
+ VPORT_X_SCALE_ENA_bit = 1 << 0,
+ VPORT_X_OFFSET_ENA_bit = 1 << 1,
+ VPORT_Y_SCALE_ENA_bit = 1 << 2,
+ VPORT_Y_OFFSET_ENA_bit = 1 << 3,
+ VPORT_Z_SCALE_ENA_bit = 1 << 4,
+ VPORT_Z_OFFSET_ENA_bit = 1 << 5,
+ VTX_XY_FMT_bit = 1 << 8,
+ VTX_Z_FMT_bit = 1 << 9,
+ VTX_W0_FMT_bit = 1 << 10,
+ PERFCOUNTER_REF_bit = 1 << 11,
+ PA_CL_VS_OUT_CNTL = 0x0002881c,
+ CLIP_DIST_ENA_0_bit = 1 << 0,
+ CLIP_DIST_ENA_1_bit = 1 << 1,
+ CLIP_DIST_ENA_2_bit = 1 << 2,
+ CLIP_DIST_ENA_3_bit = 1 << 3,
+ CLIP_DIST_ENA_4_bit = 1 << 4,
+ CLIP_DIST_ENA_5_bit = 1 << 5,
+ CLIP_DIST_ENA_6_bit = 1 << 6,
+ CLIP_DIST_ENA_7_bit = 1 << 7,
+ CULL_DIST_ENA_0_bit = 1 << 8,
+ CULL_DIST_ENA_1_bit = 1 << 9,
+ CULL_DIST_ENA_2_bit = 1 << 10,
+ CULL_DIST_ENA_3_bit = 1 << 11,
+ CULL_DIST_ENA_4_bit = 1 << 12,
+ CULL_DIST_ENA_5_bit = 1 << 13,
+ CULL_DIST_ENA_6_bit = 1 << 14,
+ CULL_DIST_ENA_7_bit = 1 << 15,
+ USE_VTX_POINT_SIZE_bit = 1 << 16,
+ USE_VTX_EDGE_FLAG_bit = 1 << 17,
+ USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18,
+ USE_VTX_VIEWPORT_INDX_bit = 1 << 19,
+ USE_VTX_KILL_FLAG_bit = 1 << 20,
+ VS_OUT_MISC_VEC_ENA_bit = 1 << 21,
+ VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22,
+ VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23,
+ PA_CL_NANINF_CNTL = 0x00028820,
+ VTE_XY_INF_DISCARD_bit = 1 << 0,
+ VTE_Z_INF_DISCARD_bit = 1 << 1,
+ VTE_W_INF_DISCARD_bit = 1 << 2,
+ VTE_0XNANINF_IS_0_bit = 1 << 3,
+ VTE_XY_NAN_RETAIN_bit = 1 << 4,
+ VTE_Z_NAN_RETAIN_bit = 1 << 5,
+ VTE_W_NAN_RETAIN_bit = 1 << 6,
+ VTE_W_RECIP_NAN_IS_0_bit = 1 << 7,
+ VS_XY_NAN_TO_INF_bit = 1 << 8,
+ VS_XY_INF_RETAIN_bit = 1 << 9,
+ VS_Z_NAN_TO_INF_bit = 1 << 10,
+ VS_Z_INF_RETAIN_bit = 1 << 11,
+ VS_W_NAN_TO_INF_bit = 1 << 12,
+ VS_W_INF_RETAIN_bit = 1 << 13,
+ VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14,
+ VTE_NO_OUTPUT_NEG_0_bit = 1 << 20,
+ SQ_PGM_START_PS = 0x00028840,
+ SQ_PGM_RESOURCES_PS = 0x00028850,
+ NUM_GPRS_mask = 0xff << 0,
+ NUM_GPRS_shift = 0,
+ STACK_SIZE_mask = 0xff << 8,
+ STACK_SIZE_shift = 8,
+ SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit = 1 << 21,
+ FETCH_CACHE_LINES_mask = 0x07 << 24,
+ FETCH_CACHE_LINES_shift = 24,
+ UNCACHED_FIRST_INST_bit = 1 << 28,
+ CLAMP_CONSTS_bit = 1 << 31,
+ SQ_PGM_EXPORTS_PS = 0x00028854,
+ EXPORT_MODE_mask = 0x1f << 0,
+ EXPORT_MODE_shift = 0,
+ SQ_PGM_START_VS = 0x00028858,
+ SQ_PGM_RESOURCES_VS = 0x00028868,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit = 1 << 21,
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+/* FETCH_CACHE_LINES_shift = 24, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_START_GS = 0x0002886c,
+ SQ_PGM_RESOURCES_GS = 0x0002887c,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit = 1 << 21,
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+/* FETCH_CACHE_LINES_shift = 24, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_START_ES = 0x00028880,
+ SQ_PGM_RESOURCES_ES = 0x00028890,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit = 1 << 21,
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+/* FETCH_CACHE_LINES_shift = 24, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_START_FS = 0x00028894,
+ SQ_PGM_RESOURCES_FS = 0x000288a4,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit = 1 << 21,
+ SQ_ESGS_RING_ITEMSIZE = 0x000288a8,
+ ITEMSIZE_mask = 0x7fff << 0,
+ ITEMSIZE_shift = 0,
+ SQ_GSVS_RING_ITEMSIZE = 0x000288ac,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_ESTMP_RING_ITEMSIZE = 0x000288b0,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GSTMP_RING_ITEMSIZE = 0x000288b4,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_VSTMP_RING_ITEMSIZE = 0x000288b8,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_PSTMP_RING_ITEMSIZE = 0x000288bc,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_FBUF_RING_ITEMSIZE = 0x000288c0,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_REDUC_RING_ITEMSIZE = 0x000288c4,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GS_VERT_ITEMSIZE = 0x000288c8,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_PGM_CF_OFFSET_PS = 0x000288cc,
+ PGM_CF_OFFSET_mask = 0xfffff << 0,
+ PGM_CF_OFFSET_shift = 0,
+ SQ_PGM_CF_OFFSET_VS = 0x000288d0,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_PGM_CF_OFFSET_GS = 0x000288d4,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_PGM_CF_OFFSET_ES = 0x000288d8,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_PGM_CF_OFFSET_FS = 0x000288dc,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_VTX_SEMANTIC_CLEAR = 0x000288e0,
+ SQ_ALU_CONST_CACHE_PS_0 = 0x00028940,
+ SQ_ALU_CONST_CACHE_PS_0_num = 16,
+ SQ_ALU_CONST_CACHE_VS_0 = 0x00028980,
+ SQ_ALU_CONST_CACHE_VS_0_num = 16,
+ SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0,
+ SQ_ALU_CONST_CACHE_GS_0_num = 16,
+ PA_SU_POINT_SIZE = 0x00028a00,
+ PA_SU_POINT_SIZE__HEIGHT_mask = 0xffff << 0,
+ PA_SU_POINT_SIZE__HEIGHT_shift = 0,
+ PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16,
+ PA_SU_POINT_SIZE__WIDTH_shift = 16,
+ PA_SU_POINT_MINMAX = 0x00028a04,
+ MIN_SIZE_mask = 0xffff << 0,
+ MIN_SIZE_shift = 0,
+ MAX_SIZE_mask = 0xffff << 16,
+ MAX_SIZE_shift = 16,
+ PA_SU_LINE_CNTL = 0x00028a08,
+ PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0,
+ PA_SU_LINE_CNTL__WIDTH_shift = 0,
+ PA_SC_LINE_STIPPLE = 0x00028a0c,
+ LINE_PATTERN_mask = 0xffff << 0,
+ LINE_PATTERN_shift = 0,
+ REPEAT_COUNT_mask = 0xff << 16,
+ REPEAT_COUNT_shift = 16,
+ PATTERN_BIT_ORDER_bit = 1 << 28,
+ AUTO_RESET_CNTL_mask = 0x03 << 29,
+ AUTO_RESET_CNTL_shift = 29,
+ VGT_OUTPUT_PATH_CNTL = 0x00028a10,
+ PATH_SELECT_mask = 0x03 << 0,
+ PATH_SELECT_shift = 0,
+ VGT_OUTPATH_VTX_REUSE = 0x00,
+ VGT_OUTPATH_TESS_EN = 0x01,
+ VGT_OUTPATH_PASSTHRU = 0x02,
+ VGT_OUTPATH_GS_BLOCK = 0x03,
+ VGT_HOS_CNTL = 0x00028a14,
+ TESS_MODE_mask = 0x03 << 0,
+ TESS_MODE_shift = 0,
+ VGT_HOS_MAX_TESS_LEVEL = 0x00028a18,
+ VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c,
+ VGT_HOS_REUSE_DEPTH = 0x00028a20,
+ REUSE_DEPTH_mask = 0xff << 0,
+ REUSE_DEPTH_shift = 0,
+ VGT_GROUP_PRIM_TYPE = 0x00028a24,
+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0,
+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0,
+ VGT_GRP_3D_POINT = 0x00,
+ VGT_GRP_3D_LINE = 0x01,
+ VGT_GRP_3D_TRI = 0x02,
+ VGT_GRP_3D_RECT = 0x03,
+ VGT_GRP_3D_QUAD = 0x04,
+ VGT_GRP_2D_COPY_RECT_V0 = 0x05,
+ VGT_GRP_2D_COPY_RECT_V1 = 0x06,
+ VGT_GRP_2D_COPY_RECT_V2 = 0x07,
+ VGT_GRP_2D_COPY_RECT_V3 = 0x08,
+ VGT_GRP_2D_FILL_RECT = 0x09,
+ VGT_GRP_2D_LINE = 0x0a,
+ VGT_GRP_2D_TRI = 0x0b,
+ VGT_GRP_PRIM_INDEX_LINE = 0x0c,
+ VGT_GRP_PRIM_INDEX_TRI = 0x0d,
+ VGT_GRP_PRIM_INDEX_QUAD = 0x0e,
+ VGT_GRP_3D_LINE_ADJ = 0x0f,
+ VGT_GRP_3D_TRI_ADJ = 0x10,
+ RETAIN_ORDER_bit = 1 << 14,
+ RETAIN_QUADS_bit = 1 << 15,
+ PRIM_ORDER_mask = 0x07 << 16,
+ PRIM_ORDER_shift = 16,
+ VGT_GRP_LIST = 0x00,
+ VGT_GRP_STRIP = 0x01,
+ VGT_GRP_FAN = 0x02,
+ VGT_GRP_LOOP = 0x03,
+ VGT_GRP_POLYGON = 0x04,
+ VGT_GROUP_FIRST_DECR = 0x00028a28,
+ FIRST_DECR_mask = 0x0f << 0,
+ FIRST_DECR_shift = 0,
+ VGT_GROUP_DECR = 0x00028a2c,
+ DECR_mask = 0x0f << 0,
+ DECR_shift = 0,
+ VGT_GROUP_VECT_0_CNTL = 0x00028a30,
+ COMP_X_EN_bit = 1 << 0,
+ COMP_Y_EN_bit = 1 << 1,
+ COMP_Z_EN_bit = 1 << 2,
+ COMP_W_EN_bit = 1 << 3,
+ VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8,
+ VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8,
+ SHIFT_mask = 0xff << 16,
+ SHIFT_shift = 16,
+ VGT_GROUP_VECT_1_CNTL = 0x00028a34,
+/* COMP_X_EN_bit = 1 << 0, */
+/* COMP_Y_EN_bit = 1 << 1, */
+/* COMP_Z_EN_bit = 1 << 2, */
+/* COMP_W_EN_bit = 1 << 3, */
+ VGT_GROUP_VECT_1_CNTL__STRIDE_mask = 0xff << 8,
+ VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8,
+/* SHIFT_mask = 0xff << 16, */
+/* SHIFT_shift = 16, */
+ VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38,
+ X_CONV_mask = 0x0f << 0,
+ X_CONV_shift = 0,
+ VGT_GRP_INDEX_16 = 0x00,
+ VGT_GRP_INDEX_32 = 0x01,
+ VGT_GRP_UINT_16 = 0x02,
+ VGT_GRP_UINT_32 = 0x03,
+ VGT_GRP_SINT_16 = 0x04,
+ VGT_GRP_SINT_32 = 0x05,
+ VGT_GRP_FLOAT_32 = 0x06,
+ VGT_GRP_AUTO_PRIM = 0x07,
+ VGT_GRP_FIX_1_23_TO_FLOAT = 0x08,
+ X_OFFSET_mask = 0x0f << 4,
+ X_OFFSET_shift = 4,
+ Y_CONV_mask = 0x0f << 8,
+ Y_CONV_shift = 8,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ Y_OFFSET_mask = 0x0f << 12,
+ Y_OFFSET_shift = 12,
+ Z_CONV_mask = 0x0f << 16,
+ Z_CONV_shift = 16,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ Z_OFFSET_mask = 0x0f << 20,
+ Z_OFFSET_shift = 20,
+ W_CONV_mask = 0x0f << 24,
+ W_CONV_shift = 24,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ W_OFFSET_mask = 0x0f << 28,
+ W_OFFSET_shift = 28,
+ VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c,
+/* X_CONV_mask = 0x0f << 0, */
+/* X_CONV_shift = 0, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* X_OFFSET_mask = 0x0f << 4, */
+/* X_OFFSET_shift = 4, */
+/* Y_CONV_mask = 0x0f << 8, */
+/* Y_CONV_shift = 8, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* Y_OFFSET_mask = 0x0f << 12, */
+/* Y_OFFSET_shift = 12, */
+/* Z_CONV_mask = 0x0f << 16, */
+/* Z_CONV_shift = 16, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* Z_OFFSET_mask = 0x0f << 20, */
+/* Z_OFFSET_shift = 20, */
+/* W_CONV_mask = 0x0f << 24, */
+/* W_CONV_shift = 24, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* W_OFFSET_mask = 0x0f << 28, */
+/* W_OFFSET_shift = 28, */
+ VGT_GS_MODE = 0x00028a40,
+ MODE_mask = 0x03 << 0,
+ MODE_shift = 0,
+ GS_OFF = 0x00,
+ GS_SCENARIO_A = 0x01,
+ GS_SCENARIO_B = 0x02,
+ GS_SCENARIO_G = 0x03,
+ ES_PASSTHRU_bit = 1 << 2,
+ CUT_MODE_mask = 0x03 << 3,
+ CUT_MODE_shift = 3,
+ GS_CUT_1024 = 0x00,
+ GS_CUT_512 = 0x01,
+ GS_CUT_256 = 0x02,
+ GS_CUT_128 = 0x03,
+ PA_SC_MPASS_PS_CNTL = 0x00028a48,
+ MPASS_PIX_VEC_PER_PASS_mask = 0xfffff << 0,
+ MPASS_PIX_VEC_PER_PASS_shift = 0,
+ MPASS_PS_ENA_bit = 1 << 31,
+ PA_SC_MODE_CNTL = 0x00028a4c,
+ MSAA_ENABLE_bit = 1 << 0,
+ CLIPRECT_ENABLE_bit = 1 << 1,
+ LINE_STIPPLE_ENABLE_bit = 1 << 2,
+ MULTI_CHIP_PRIM_DISCARD_ENAB_bit = 1 << 3,
+ WALK_ORDER_ENABLE_bit = 1 << 4,
+ HALVE_DETAIL_SAMPLE_PERF_bit = 1 << 5,
+ WALK_SIZE_bit = 1 << 6,
+ WALK_ALIGNMENT_bit = 1 << 7,
+ WALK_ALIGN8_PRIM_FITS_ST_bit = 1 << 8,
+ TILE_COVER_NO_SCISSOR_bit = 1 << 9,
+ KILL_PIX_POST_HI_Z_bit = 1 << 10,
+ KILL_PIX_POST_DETAIL_MASK_bit = 1 << 11,
+ MULTI_CHIP_SUPERTILE_ENABLE_bit = 1 << 12,
+ TILE_COVER_DISABLE_bit = 1 << 13,
+ FORCE_EOV_CNTDWN_ENABLE_bit = 1 << 14,
+ FORCE_EOV_TILE_ENABLE_bit = 1 << 15,
+ FORCE_EOV_REZ_ENABLE_bit = 1 << 16,
+ PS_ITER_SAMPLE_bit = 1 << 17,
+ VGT_ENHANCE = 0x00028a50,
+ VGT_ENHANCE__MI_TIMESTAMP_RES_mask = 0x03 << 0,
+ VGT_ENHANCE__MI_TIMESTAMP_RES_shift = 0,
+ X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32 = 0x00,
+ X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16 = 0x01,
+ X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8 = 0x02,
+ X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4 = 0x03,
+ MISC_mask = 0x3fffffff << 2,
+ MISC_shift = 2,
+ VGT_GS_OUT_PRIM_TYPE = 0x00028a6c,
+ OUTPRIM_TYPE_mask = 0x3f << 0,
+ OUTPRIM_TYPE_shift = 0,
+ POINTLIST = 0x00,
+ LINESTRIP = 0x01,
+ TRISTRIP = 0x02,
+ VGT_DMA_SIZE = 0x00028a74,
+ VGT_DMA_INDEX_TYPE = 0x00028a7c,
+/* INDEX_TYPE_mask = 0x03 << 0, */
+/* INDEX_TYPE_shift = 0, */
+ VGT_INDEX_16 = 0x00,
+ VGT_INDEX_32 = 0x01,
+ SWAP_MODE_mask = 0x03 << 2,
+ SWAP_MODE_shift = 2,
+ VGT_DMA_SWAP_NONE = 0x00,
+ VGT_DMA_SWAP_16_BIT = 0x01,
+ VGT_DMA_SWAP_32_BIT = 0x02,
+ VGT_DMA_SWAP_WORD = 0x03,
+ VGT_PRIMITIVEID_EN = 0x00028a84,
+ PRIMITIVEID_EN_bit = 1 << 0,
+ VGT_DMA_NUM_INSTANCES = 0x00028a88,
+ VGT_EVENT_INITIATOR = 0x00028a90,
+ EVENT_TYPE_mask = 0x3f << 0,
+ EVENT_TYPE_shift = 0,
+ CACHE_FLUSH_TS = 0x04,
+ CONTEXT_DONE = 0x05,
+ CACHE_FLUSH = 0x06,
+ VIZQUERY_START = 0x07,
+ VIZQUERY_END = 0x08,
+ SC_WAIT_WC = 0x09,
+ MPASS_PS_CP_REFETCH = 0x0a,
+ MPASS_PS_RST_START = 0x0b,
+ MPASS_PS_INCR_START = 0x0c,
+ RST_PIX_CNT = 0x0d,
+ RST_VTX_CNT = 0x0e,
+ VS_PARTIAL_FLUSH = 0x0f,
+ PS_PARTIAL_FLUSH = 0x10,
+ CACHE_FLUSH_AND_INV_TS_EVENT = 0x14,
+ ZPASS_DONE = 0x15,
+ CACHE_FLUSH_AND_INV_EVENT = 0x16,
+ PERFCOUNTER_START = 0x17,
+ PERFCOUNTER_STOP = 0x18,
+ PIPELINESTAT_START = 0x19,
+ PIPELINESTAT_STOP = 0x1a,
+ PERFCOUNTER_SAMPLE = 0x1b,
+ FLUSH_ES_OUTPUT = 0x1c,
+ FLUSH_GS_OUTPUT = 0x1d,
+ SAMPLE_PIPELINESTAT = 0x1e,
+ SO_VGTSTREAMOUT_FLUSH = 0x1f,
+ SAMPLE_STREAMOUTSTATS = 0x20,
+ RESET_VTX_CNT = 0x21,
+ BLOCK_CONTEXT_DONE = 0x22,
+ CR_CONTEXT_DONE = 0x23,
+ VGT_FLUSH = 0x24,
+ CR_DONE_TS = 0x25,
+ SQ_NON_EVENT = 0x26,
+ SC_SEND_DB_VPZ = 0x27,
+ BOTTOM_OF_PIPE_TS = 0x28,
+ DB_CACHE_FLUSH_AND_INV = 0x2a,
+ ADDRESS_HI_mask = 0xff << 19,
+ ADDRESS_HI_shift = 19,
+ EXTENDED_EVENT_bit = 1 << 27,
+ VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94,
+ RESET_EN_bit = 1 << 0,
+ VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0,
+ VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4,
+ VGT_STRMOUT_EN = 0x00028ab0,
+ STREAMOUT_bit = 1 << 0,
+ VGT_REUSE_OFF = 0x00028ab4,
+ REUSE_OFF_bit = 1 << 0,
+ VGT_VTX_CNT_EN = 0x00028ab8,
+ VTX_CNT_EN_bit = 1 << 0,
+ VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0,
+ VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4,
+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8,
+ VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc,
+ VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0,
+ VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4,
+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8,
+ VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec,
+ VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0,
+ VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4,
+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8,
+ VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc,
+ VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00,
+ VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04,
+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08,
+ VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c,
+ VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10,
+ VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14,
+ VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18,
+ VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c,
+ VGT_STRMOUT_BUFFER_EN = 0x00028b20,
+ BUFFER_0_EN_bit = 1 << 0,
+ BUFFER_1_EN_bit = 1 << 1,
+ BUFFER_2_EN_bit = 1 << 2,
+ BUFFER_3_EN_bit = 1 << 3,
+ VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28,
+ VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c,
+ VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30,
+ VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44,
+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48,
+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c,
+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50,
+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0,
+ PA_SC_LINE_CNTL = 0x00028c00,
+ BRES_CNTL_mask = 0xff << 0,
+ BRES_CNTL_shift = 0,
+ USE_BRES_CNTL_bit = 1 << 8,
+ EXPAND_LINE_WIDTH_bit = 1 << 9,
+ LAST_PIXEL_bit = 1 << 10,
+ PA_SC_AA_CONFIG = 0x00028c04,
+ MSAA_NUM_SAMPLES_mask = 0x03 << 0,
+ MSAA_NUM_SAMPLES_shift = 0,
+ AA_MASK_CENTROID_DTMN_bit = 1 << 4,
+ MAX_SAMPLE_DIST_mask = 0x0f << 13,
+ MAX_SAMPLE_DIST_shift = 13,
+ PA_SU_VTX_CNTL = 0x00028c08,
+ PIX_CENTER_bit = 1 << 0,
+ PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1,
+ PA_SU_VTX_CNTL__ROUND_MODE_shift = 1,
+ X_TRUNCATE = 0x00,
+ X_ROUND = 0x01,
+ X_ROUND_TO_EVEN = 0x02,
+ X_ROUND_TO_ODD = 0x03,
+ QUANT_MODE_mask = 0x07 << 3,
+ QUANT_MODE_shift = 3,
+ X_1_16TH = 0x00,
+ X_1_8TH = 0x01,
+ X_1_4TH = 0x02,
+ X_1_2 = 0x03,
+ X_1 = 0x04,
+ X_1_256TH = 0x05,
+ PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c,
+ PA_CL_GB_VERT_DISC_ADJ = 0x00028c10,
+ PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14,
+ PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18,
+ PA_SC_AA_SAMPLE_LOCS_MCTX = 0x00028c1c,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX = 0x00028c20,
+/* S4_X_mask = 0x0f << 0, */
+/* S4_X_shift = 0, */
+/* S4_Y_mask = 0x0f << 4, */
+/* S4_Y_shift = 4, */
+/* S5_X_mask = 0x0f << 8, */
+/* S5_X_shift = 8, */
+/* S5_Y_mask = 0x0f << 12, */
+/* S5_Y_shift = 12, */
+/* S6_X_mask = 0x0f << 16, */
+/* S6_X_shift = 16, */
+/* S6_Y_mask = 0x0f << 20, */
+/* S6_Y_shift = 20, */
+/* S7_X_mask = 0x0f << 24, */
+/* S7_X_shift = 24, */
+/* S7_Y_mask = 0x0f << 28, */
+/* S7_Y_shift = 28, */
+ CB_CLRCMP_CONTROL = 0x00028c30,
+ CLRCMP_FCN_SRC_mask = 0x07 << 0,
+ CLRCMP_FCN_SRC_shift = 0,
+ CLRCMP_DRAW_ALWAYS = 0x00,
+ CLRCMP_DRAW_NEVER = 0x01,
+ CLRCMP_DRAW_ON_NEQ = 0x04,
+ CLRCMP_DRAW_ON_EQ = 0x05,
+ CLRCMP_FCN_DST_mask = 0x07 << 8,
+ CLRCMP_FCN_DST_shift = 8,
+/* CLRCMP_DRAW_ALWAYS = 0x00, */
+/* CLRCMP_DRAW_NEVER = 0x01, */
+/* CLRCMP_DRAW_ON_NEQ = 0x04, */
+/* CLRCMP_DRAW_ON_EQ = 0x05, */
+ CLRCMP_FCN_SEL_mask = 0x03 << 24,
+ CLRCMP_FCN_SEL_shift = 24,
+ CLRCMP_SEL_DST = 0x00,
+ CLRCMP_SEL_SRC = 0x01,
+ CLRCMP_SEL_AND = 0x02,
+ CB_CLRCMP_SRC = 0x00028c34,
+ CB_CLRCMP_DST = 0x00028c38,
+ CB_CLRCMP_MSK = 0x00028c3c,
+ PA_SC_AA_MASK = 0x00028c48,
+ VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58,
+ VTX_REUSE_DEPTH_mask = 0xff << 0,
+ VTX_REUSE_DEPTH_shift = 0,
+ VGT_OUT_DEALLOC_CNTL = 0x00028c5c,
+ DEALLOC_DIST_mask = 0x7f << 0,
+ DEALLOC_DIST_shift = 0,
+ DB_RENDER_CONTROL = 0x00028d0c,
+ DEPTH_CLEAR_ENABLE_bit = 1 << 0,
+ STENCIL_CLEAR_ENABLE_bit = 1 << 1,
+ DEPTH_COPY_bit = 1 << 2,
+ STENCIL_COPY_bit = 1 << 3,
+ RESUMMARIZE_ENABLE_bit = 1 << 4,
+ STENCIL_COMPRESS_DISABLE_bit = 1 << 5,
+ DEPTH_COMPRESS_DISABLE_bit = 1 << 6,
+ COPY_CENTROID_bit = 1 << 7,
+ COPY_SAMPLE_mask = 0x07 << 8,
+ COPY_SAMPLE_shift = 8,
+ ZPASS_INCREMENT_DISABLE_bit = 1 << 11,
+ DB_RENDER_OVERRIDE = 0x00028d10,
+ FORCE_HIZ_ENABLE_mask = 0x03 << 0,
+ FORCE_HIZ_ENABLE_shift = 0,
+ FORCE_OFF = 0x00,
+ FORCE_ENABLE = 0x01,
+ FORCE_DISABLE = 0x02,
+ FORCE_RESERVED = 0x03,
+ FORCE_HIS_ENABLE0_mask = 0x03 << 2,
+ FORCE_HIS_ENABLE0_shift = 2,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_HIS_ENABLE1_mask = 0x03 << 4,
+ FORCE_HIS_ENABLE1_shift = 4,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_SHADER_Z_ORDER_bit = 1 << 6,
+ FAST_Z_DISABLE_bit = 1 << 7,
+ FAST_STENCIL_DISABLE_bit = 1 << 8,
+ NOOP_CULL_DISABLE_bit = 1 << 9,
+ FORCE_COLOR_KILL_bit = 1 << 10,
+ FORCE_Z_READ_bit = 1 << 11,
+ FORCE_STENCIL_READ_bit = 1 << 12,
+ FORCE_FULL_Z_RANGE_mask = 0x03 << 13,
+ FORCE_FULL_Z_RANGE_shift = 13,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_QC_SMASK_CONFLICT_bit = 1 << 15,
+ DISABLE_VIEWPORT_CLAMP_bit = 1 << 16,
+ IGNORE_SC_ZRANGE_bit = 1 << 17,
+ DB_HTILE_SURFACE = 0x00028d24,
+ HTILE_WIDTH_bit = 1 << 0,
+ HTILE_HEIGHT_bit = 1 << 1,
+ LINEAR_bit = 1 << 2,
+ FULL_CACHE_bit = 1 << 3,
+ HTILE_USES_PRELOAD_WIN_bit = 1 << 4,
+ PRELOAD_bit = 1 << 5,
+ PREFETCH_WIDTH_mask = 0x3f << 6,
+ PREFETCH_WIDTH_shift = 6,
+ PREFETCH_HEIGHT_mask = 0x3f << 12,
+ PREFETCH_HEIGHT_shift = 12,
+ DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c,
+ COMPAREFUNC1_mask = 0x07 << 0,
+ COMPAREFUNC1_shift = 0,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ COMPAREVALUE1_mask = 0xff << 4,
+ COMPAREVALUE1_shift = 4,
+ COMPAREMASK1_mask = 0xff << 12,
+ COMPAREMASK1_shift = 12,
+ ENABLE1_bit = 1 << 24,
+ DB_PRELOAD_CONTROL = 0x00028d30,
+ START_X_mask = 0xff << 0,
+ START_X_shift = 0,
+ START_Y_mask = 0xff << 8,
+ START_Y_shift = 8,
+ MAX_X_mask = 0xff << 16,
+ MAX_X_shift = 16,
+ MAX_Y_mask = 0xff << 24,
+ MAX_Y_shift = 24,
+ DB_PREFETCH_LIMIT = 0x00028d34,
+ DEPTH_HEIGHT_TILE_MAX_mask = 0x3ff << 0,
+ DEPTH_HEIGHT_TILE_MAX_shift = 0,
+ PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028df8,
+ POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0,
+ POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0,
+ POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8,
+ PA_SU_POLY_OFFSET_CLAMP = 0x00028dfc,
+ PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028e00,
+ PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028e04,
+ PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028e08,
+ PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028e0c,
+ PA_CL_POINT_X_RAD = 0x00028e10,
+ PA_CL_POINT_Y_RAD = 0x00028e14,
+ PA_CL_POINT_SIZE = 0x00028e18,
+ PA_CL_POINT_CULL_RAD = 0x00028e1c,
+ PA_CL_UCP_0_X = 0x00028e20,
+ PA_CL_UCP_0_X_num = 6,
+ PA_CL_UCP_0_X_offset = 16,
+ PA_CL_UCP_0_Y = 0x00028e24,
+ PA_CL_UCP_0_Y_num = 6,
+ PA_CL_UCP_0_Y_offset = 16,
+ PA_CL_UCP_0_Z = 0x00028e28,
+ PA_CL_UCP_0_Z_num = 6,
+ PA_CL_UCP_0_Z_offset = 16,
+ SQ_ALU_CONSTANT0_0 = 0x00030000,
+ SQ_ALU_CONSTANT1_0 = 0x00030004,
+ SQ_ALU_CONSTANT2_0 = 0x00030008,
+ SQ_ALU_CONSTANT3_0 = 0x0003000c,
+ SQ_VTX_CONSTANT_WORD0_0 = 0x00038000,
+ SQ_TEX_RESOURCE_WORD0_0 = 0x00038000,
+ DIM_mask = 0x07 << 0,
+ DIM_shift = 0,
+ SQ_TEX_DIM_1D = 0x00,
+ SQ_TEX_DIM_2D = 0x01,
+ SQ_TEX_DIM_3D = 0x02,
+ SQ_TEX_DIM_CUBEMAP = 0x03,
+ SQ_TEX_DIM_1D_ARRAY = 0x04,
+ SQ_TEX_DIM_2D_ARRAY = 0x05,
+ SQ_TEX_DIM_2D_MSAA = 0x06,
+ SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask = 0x0f << 3,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift = 3,
+ TILE_TYPE_bit = 1 << 7,
+ PITCH_mask = 0x7ff << 8,
+ PITCH_shift = 8,
+ TEX_WIDTH_mask = 0x1fff << 19,
+ TEX_WIDTH_shift = 19,
+ SQ_VTX_CONSTANT_WORD1_0 = 0x00038004,
+ SQ_TEX_RESOURCE_WORD1_0 = 0x00038004,
+ TEX_HEIGHT_mask = 0x1fff << 0,
+ TEX_HEIGHT_shift = 0,
+ TEX_DEPTH_mask = 0x1fff << 13,
+ TEX_DEPTH_shift = 13,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask = 0x3f << 26,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift = 26,
+ SQ_VTX_CONSTANT_WORD2_0 = 0x00038008,
+ BASE_ADDRESS_HI_mask = 0xff << 0,
+ BASE_ADDRESS_HI_shift = 0,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask = 0x7ff << 8,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift = 8,
+ SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit = 1 << 19,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28,
+ SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29,
+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30,
+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ SQ_TEX_RESOURCE_WORD2_0 = 0x00038008,
+ SQ_VTX_CONSTANT_WORD3_0 = 0x0003800c,
+ MEM_REQUEST_SIZE_mask = 0x03 << 0,
+ MEM_REQUEST_SIZE_shift = 0,
+ SQ_TEX_RESOURCE_WORD3_0 = 0x0003800c,
+ SQ_TEX_RESOURCE_WORD4_0 = 0x00038010,
+ FORMAT_COMP_X_mask = 0x03 << 0,
+ FORMAT_COMP_X_shift = 0,
+ SQ_FORMAT_COMP_UNSIGNED = 0x00,
+ SQ_FORMAT_COMP_SIGNED = 0x01,
+ SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02,
+ FORMAT_COMP_Y_mask = 0x03 << 2,
+ FORMAT_COMP_Y_shift = 2,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ FORMAT_COMP_Z_mask = 0x03 << 4,
+ FORMAT_COMP_Z_shift = 4,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ FORMAT_COMP_W_mask = 0x03 << 6,
+ FORMAT_COMP_W_shift = 6,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8,
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10,
+ SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ REQUEST_SIZE_mask = 0x03 << 14,
+ REQUEST_SIZE_shift = 14,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ BASE_LEVEL_mask = 0x0f << 28,
+ BASE_LEVEL_shift = 28,
+ SQ_TEX_RESOURCE_WORD5_0 = 0x00038014,
+ LAST_LEVEL_mask = 0x0f << 0,
+ LAST_LEVEL_shift = 0,
+ BASE_ARRAY_mask = 0x1fff << 4,
+ BASE_ARRAY_shift = 4,
+ LAST_ARRAY_mask = 0x1fff << 17,
+ LAST_ARRAY_shift = 17,
+ SQ_TEX_RESOURCE_WORD6_0 = 0x00038018,
+ MPEG_CLAMP_mask = 0x03 << 0,
+ MPEG_CLAMP_shift = 0,
+ SQ_TEX_MPEG_CLAMP_OFF = 0x00,
+ SQ_TEX_MPEG_9 = 0x01,
+ SQ_TEX_MPEG_10 = 0x02,
+ PERF_MODULATION_mask = 0x07 << 5,
+ PERF_MODULATION_shift = 5,
+ INTERLACED_bit = 1 << 8,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_mask = 0x03 << 30,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_shift = 30,
+ SQ_TEX_VTX_INVALID_TEXTURE = 0x00,
+ SQ_TEX_VTX_INVALID_BUFFER = 0x01,
+ SQ_TEX_VTX_VALID_TEXTURE = 0x02,
+ SQ_TEX_VTX_VALID_BUFFER = 0x03,
+ SQ_VTX_CONSTANT_WORD6_0 = 0x00038018,
+ SQ_VTX_CONSTANT_WORD6_0__TYPE_mask = 0x03 << 30,
+ SQ_VTX_CONSTANT_WORD6_0__TYPE_shift = 30,
+/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */
+/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */
+/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */
+/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */
+ SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0,
+ SQ_TEX_WRAP = 0x00,
+ SQ_TEX_MIRROR = 0x01,
+ SQ_TEX_CLAMP_LAST_TEXEL = 0x02,
+ SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03,
+ SQ_TEX_CLAMP_HALF_BORDER = 0x04,
+ SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05,
+ SQ_TEX_CLAMP_BORDER = 0x06,
+ SQ_TEX_MIRROR_ONCE_BORDER = 0x07,
+ CLAMP_Y_mask = 0x07 << 3,
+ CLAMP_Y_shift = 3,
+/* SQ_TEX_WRAP = 0x00, */
+/* SQ_TEX_MIRROR = 0x01, */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+ CLAMP_Z_mask = 0x07 << 6,
+ CLAMP_Z_shift = 6,
+/* SQ_TEX_WRAP = 0x00, */
+/* SQ_TEX_MIRROR = 0x01, */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+ XY_MAG_FILTER_mask = 0x07 << 9,
+ XY_MAG_FILTER_shift = 9,
+ SQ_TEX_XY_FILTER_POINT = 0x00,
+ SQ_TEX_XY_FILTER_BILINEAR = 0x01,
+ SQ_TEX_XY_FILTER_BICUBIC = 0x02,
+ XY_MIN_FILTER_mask = 0x07 << 12,
+ XY_MIN_FILTER_shift = 12,
+/* SQ_TEX_XY_FILTER_POINT = 0x00, */
+/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */
+/* SQ_TEX_XY_FILTER_BICUBIC = 0x02, */
+ Z_FILTER_mask = 0x03 << 15,
+ Z_FILTER_shift = 15,
+ SQ_TEX_Z_FILTER_NONE = 0x00,
+ SQ_TEX_Z_FILTER_POINT = 0x01,
+ SQ_TEX_Z_FILTER_LINEAR = 0x02,
+ MIP_FILTER_mask = 0x03 << 17,
+ MIP_FILTER_shift = 17,
+/* SQ_TEX_Z_FILTER_NONE = 0x00, */
+/* SQ_TEX_Z_FILTER_POINT = 0x01, */
+/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */
+ BORDER_COLOR_TYPE_mask = 0x03 << 22,
+ BORDER_COLOR_TYPE_shift = 22,
+ SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00,
+ SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01,
+ SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02,
+ SQ_TEX_BORDER_COLOR_REGISTER = 0x03,
+ POINT_SAMPLING_CLAMP_bit = 1 << 24,
+ TEX_ARRAY_OVERRIDE_bit = 1 << 25,
+ DEPTH_COMPARE_FUNCTION_mask = 0x07 << 26,
+ DEPTH_COMPARE_FUNCTION_shift = 26,
+ SQ_TEX_DEPTH_COMPARE_NEVER = 0x00,
+ SQ_TEX_DEPTH_COMPARE_LESS = 0x01,
+ SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02,
+ SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03,
+ SQ_TEX_DEPTH_COMPARE_GREATER = 0x04,
+ SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05,
+ SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06,
+ SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07,
+ CHROMA_KEY_mask = 0x03 << 29,
+ CHROMA_KEY_shift = 29,
+ SQ_TEX_CHROMA_KEY_DISABLED = 0x00,
+ SQ_TEX_CHROMA_KEY_KILL = 0x01,
+ SQ_TEX_CHROMA_KEY_BLEND = 0x02,
+ LOD_USES_MINOR_AXIS_bit = 1 << 31,
+ SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004,
+ MIN_LOD_mask = 0x3ff << 0,
+ MIN_LOD_shift = 0,
+ MAX_LOD_mask = 0x3ff << 10,
+ MAX_LOD_shift = 10,
+ SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask = 0xfff << 20,
+ SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift = 20,
+ SQ_TEX_SAMPLER_WORD2_0 = 0x0003c008,
+ LOD_BIAS_SEC_mask = 0xfff << 0,
+ LOD_BIAS_SEC_shift = 0,
+ MC_COORD_TRUNCATE_bit = 1 << 12,
+ SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 13,
+ HIGH_PRECISION_FILTER_bit = 1 << 14,
+ PERF_MIP_mask = 0x07 << 15,
+ PERF_MIP_shift = 15,
+ PERF_Z_mask = 0x03 << 18,
+ PERF_Z_shift = 18,
+ FETCH_4_bit = 1 << 26,
+ SAMPLE_IS_PCF_bit = 1 << 27,
+ SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31,
+ SQ_VTX_BASE_VTX_LOC = 0x0003cff0,
+ SQ_VTX_START_INST_LOC = 0x0003cff4,
+ SQ_LOOP_CONST_DX10_0 = 0x0003e200,
+ SQ_LOOP_CONST_0 = 0x0003e200,
+ SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0,
+ SQ_LOOP_CONST_0__COUNT_shift = 0,
+ INIT_mask = 0xfff << 12,
+ INIT_shift = 12,
+ INC_mask = 0xff << 24,
+ INC_shift = 24,
+ SQ_BOOL_CONST_0 = 0x0003e380,
+ SQ_BOOL_CONST_0_num = 3,
+
+} ;
+
+#endif /* _AUTOREGS */
+
diff --git a/r600/r600_reg_r6xx.h b/r600/r600_reg_r6xx.h
new file mode 100644
index 0000000..f7702c4
--- /dev/null
+++ b/r600/r600_reg_r6xx.h
@@ -0,0 +1,492 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_R6xx_H_
+#define _R600_REG_R6xx_H_
+
+/*
+ * Registers for R6xx chips that are not documented yet
+ */
+
+enum {
+
+ MM_INDEX = 0x0000,
+ MM_DATA = 0x0004,
+
+ SRBM_STATUS = 0x0e50,
+ RLC_RQ_PENDING_bit = 1 << 3,
+ RCU_RQ_PENDING_bit = 1 << 4,
+ GRBM_RQ_PENDING_bit = 1 << 5,
+ HI_RQ_PENDING_bit = 1 << 6,
+ IO_EXTERN_SIGNAL_bit = 1 << 7,
+ VMC_BUSY_bit = 1 << 8,
+ MCB_BUSY_bit = 1 << 9,
+ MCDZ_BUSY_bit = 1 << 10,
+ MCDY_BUSY_bit = 1 << 11,
+ MCDX_BUSY_bit = 1 << 12,
+ MCDW_BUSY_bit = 1 << 13,
+ SEM_BUSY_bit = 1 << 14,
+ SRBM_STATUS__RLC_BUSY_bit = 1 << 15,
+ PDMA_BUSY_bit = 1 << 16,
+ IH_BUSY_bit = 1 << 17,
+ CSC_BUSY_bit = 1 << 20,
+ CMC7_BUSY_bit = 1 << 21,
+ CMC6_BUSY_bit = 1 << 22,
+ CMC5_BUSY_bit = 1 << 23,
+ CMC4_BUSY_bit = 1 << 24,
+ CMC3_BUSY_bit = 1 << 25,
+ CMC2_BUSY_bit = 1 << 26,
+ CMC1_BUSY_bit = 1 << 27,
+ CMC0_BUSY_bit = 1 << 28,
+ BIF_BUSY_bit = 1 << 29,
+ IDCT_BUSY_bit = 1 << 30,
+
+ SRBM_READ_ERROR = 0x0e98,
+ READ_ADDRESS_mask = 0xffff << 2,
+ READ_ADDRESS_shift = 2,
+ READ_REQUESTER_HI_bit = 1 << 24,
+ READ_REQUESTER_GRBM_bit = 1 << 25,
+ READ_REQUESTER_RCU_bit = 1 << 26,
+ READ_REQUESTER_RLC_bit = 1 << 27,
+ READ_ERROR_bit = 1 << 31,
+
+ SRBM_INT_STATUS = 0x0ea4,
+ RDERR_INT_STAT_bit = 1 << 0,
+ GFX_CNTX_SWITCH_INT_STAT_bit = 1 << 1,
+ SRBM_INT_ACK = 0x0ea8,
+ RDERR_INT_ACK_bit = 1 << 0,
+ GFX_CNTX_SWITCH_INT_ACK_bit = 1 << 1,
+
+ R6XX_MC_VM_FB_LOCATION = 0x2180,
+
+ VENDOR_DEVICE_ID = 0x4000,
+
+ D1GRPH_PRIMARY_SURFACE_ADDRESS = 0x6110,
+ D1GRPH_PITCH = 0x6120,
+ D1GRPH_Y_END = 0x6138,
+
+ GRBM_STATUS = 0x8010,
+ CMDFIFO_AVAIL_mask = 0x1f << 0,
+ CMDFIFO_AVAIL_shift = 0,
+ SRBM_RQ_PENDING_bit = 1 << 5,
+ CP_RQ_PENDING_bit = 1 << 6,
+ CF_RQ_PENDING_bit = 1 << 7,
+ PF_RQ_PENDING_bit = 1 << 8,
+ GRBM_EE_BUSY_bit = 1 << 10,
+ GRBM_STATUS__VC_BUSY_bit = 1 << 11,
+ DB03_CLEAN_bit = 1 << 12,
+ CB03_CLEAN_bit = 1 << 13,
+ VGT_BUSY_NO_DMA_bit = 1 << 16,
+ GRBM_STATUS__VGT_BUSY_bit = 1 << 17,
+ TA03_BUSY_bit = 1 << 18,
+ GRBM_STATUS__TC_BUSY_bit = 1 << 19,
+ SX_BUSY_bit = 1 << 20,
+ SH_BUSY_bit = 1 << 21,
+ SPI03_BUSY_bit = 1 << 22,
+ SMX_BUSY_bit = 1 << 23,
+ SC_BUSY_bit = 1 << 24,
+ PA_BUSY_bit = 1 << 25,
+ DB03_BUSY_bit = 1 << 26,
+ CR_BUSY_bit = 1 << 27,
+ CP_COHERENCY_BUSY_bit = 1 << 28,
+ GRBM_STATUS__CP_BUSY_bit = 1 << 29,
+ CB03_BUSY_bit = 1 << 30,
+ GUI_ACTIVE_bit = 1 << 31,
+ GRBM_STATUS2 = 0x8014,
+ CR_CLEAN_bit = 1 << 0,
+ SMX_CLEAN_bit = 1 << 1,
+ SPI0_BUSY_bit = 1 << 8,
+ SPI1_BUSY_bit = 1 << 9,
+ SPI2_BUSY_bit = 1 << 10,
+ SPI3_BUSY_bit = 1 << 11,
+ TA0_BUSY_bit = 1 << 12,
+ TA1_BUSY_bit = 1 << 13,
+ TA2_BUSY_bit = 1 << 14,
+ TA3_BUSY_bit = 1 << 15,
+ DB0_BUSY_bit = 1 << 16,
+ DB1_BUSY_bit = 1 << 17,
+ DB2_BUSY_bit = 1 << 18,
+ DB3_BUSY_bit = 1 << 19,
+ CB0_BUSY_bit = 1 << 20,
+ CB1_BUSY_bit = 1 << 21,
+ CB2_BUSY_bit = 1 << 22,
+ CB3_BUSY_bit = 1 << 23,
+ GRBM_SOFT_RESET = 0x8020,
+ SOFT_RESET_CP_bit = 1 << 0,
+ SOFT_RESET_CB_bit = 1 << 1,
+ SOFT_RESET_CR_bit = 1 << 2,
+ SOFT_RESET_DB_bit = 1 << 3,
+ SOFT_RESET_PA_bit = 1 << 5,
+ SOFT_RESET_SC_bit = 1 << 6,
+ SOFT_RESET_SMX_bit = 1 << 7,
+ SOFT_RESET_SPI_bit = 1 << 8,
+ SOFT_RESET_SH_bit = 1 << 9,
+ SOFT_RESET_SX_bit = 1 << 10,
+ SOFT_RESET_TC_bit = 1 << 11,
+ SOFT_RESET_TA_bit = 1 << 12,
+ SOFT_RESET_VC_bit = 1 << 13,
+ SOFT_RESET_VGT_bit = 1 << 14,
+ SOFT_RESET_GRBM_GCA_bit = 1 << 15,
+
+ WAIT_UNTIL = 0x8040,
+ WAIT_CP_DMA_IDLE_bit = 1 << 8,
+ WAIT_CMDFIFO_bit = 1 << 10,
+ WAIT_2D_IDLE_bit = 1 << 14,
+ WAIT_3D_IDLE_bit = 1 << 15,
+ WAIT_2D_IDLECLEAN_bit = 1 << 16,
+ WAIT_3D_IDLECLEAN_bit = 1 << 17,
+ WAIT_EXTERN_SIG_bit = 1 << 19,
+ CMDFIFO_ENTRIES_mask = 0x1f << 20,
+ CMDFIFO_ENTRIES_shift = 20,
+
+ GRBM_READ_ERROR = 0x8058,
+/* READ_ADDRESS_mask = 0xffff << 2, */
+/* READ_ADDRESS_shift = 2, */
+ READ_REQUESTER_SRBM_bit = 1 << 28,
+ READ_REQUESTER_CP_bit = 1 << 29,
+ READ_REQUESTER_WU_POLL_bit = 1 << 30,
+/* READ_ERROR_bit = 1 << 31, */
+
+ SCRATCH_REG0 = 0x8500,
+ SCRATCH_REG1 = 0x8504,
+ SCRATCH_REG2 = 0x8508,
+ SCRATCH_REG3 = 0x850c,
+ SCRATCH_REG4 = 0x8510,
+ SCRATCH_REG5 = 0x8514,
+ SCRATCH_REG6 = 0x8518,
+ SCRATCH_REG7 = 0x851c,
+ SCRATCH_UMSK = 0x8540,
+ SCRATCH_ADDR = 0x8544,
+
+ CP_COHER_CNTL = 0x85f0,
+ DEST_BASE_0_ENA_bit = 1 << 0,
+ DEST_BASE_1_ENA_bit = 1 << 1,
+ SO0_DEST_BASE_ENA_bit = 1 << 2,
+ SO1_DEST_BASE_ENA_bit = 1 << 3,
+ SO2_DEST_BASE_ENA_bit = 1 << 4,
+ SO3_DEST_BASE_ENA_bit = 1 << 5,
+ CB0_DEST_BASE_ENA_bit = 1 << 6,
+ CB1_DEST_BASE_ENA_bit = 1 << 7,
+ CB2_DEST_BASE_ENA_bit = 1 << 8,
+ CB3_DEST_BASE_ENA_bit = 1 << 9,
+ CB4_DEST_BASE_ENA_bit = 1 << 10,
+ CB5_DEST_BASE_ENA_bit = 1 << 11,
+ CB6_DEST_BASE_ENA_bit = 1 << 12,
+ CB7_DEST_BASE_ENA_bit = 1 << 13,
+ DB_DEST_BASE_ENA_bit = 1 << 14,
+ CR_DEST_BASE_ENA_bit = 1 << 15,
+ TC_ACTION_ENA_bit = 1 << 23,
+ VC_ACTION_ENA_bit = 1 << 24,
+ CB_ACTION_ENA_bit = 1 << 25,
+ DB_ACTION_ENA_bit = 1 << 26,
+ SH_ACTION_ENA_bit = 1 << 27,
+ SMX_ACTION_ENA_bit = 1 << 28,
+ CR0_ACTION_ENA_bit = 1 << 29,
+ CR1_ACTION_ENA_bit = 1 << 30,
+ CR2_ACTION_ENA_bit = 1 << 31,
+ CP_COHER_SIZE = 0x85f4,
+ CP_COHER_BASE = 0x85f8,
+ CP_COHER_STATUS = 0x85fc,
+ MATCHING_GFX_CNTX_mask = 0xff << 0,
+ MATCHING_GFX_CNTX_shift = 0,
+ MATCHING_CR_CNTX_mask = 0xffff << 8,
+ MATCHING_CR_CNTX_shift = 8,
+ STATUS_bit = 1 << 31,
+
+ CP_STALLED_STAT1 = 0x8674,
+ RBIU_TO_DMA_NOT_RDY_TO_RCV_bit = 1 << 0,
+ RBIU_TO_IBS_NOT_RDY_TO_RCV_bit = 1 << 1,
+ RBIU_TO_SEM_NOT_RDY_TO_RCV_bit = 1 << 2,
+ RBIU_TO_2DREGS_NOT_RDY_TO_RCV_bit = 1 << 3,
+ RBIU_TO_MEMWR_NOT_RDY_TO_RCV_bit = 1 << 4,
+ RBIU_TO_MEMRD_NOT_RDY_TO_RCV_bit = 1 << 5,
+ RBIU_TO_EOPD_NOT_RDY_TO_RCV_bit = 1 << 6,
+ RBIU_TO_RECT_NOT_RDY_TO_RCV_bit = 1 << 7,
+ RBIU_TO_STRMO_NOT_RDY_TO_RCV_bit = 1 << 8,
+ RBIU_TO_PSTAT_NOT_RDY_TO_RCV_bit = 1 << 9,
+ MIU_WAITING_ON_RDREQ_FREE_bit = 1 << 16,
+ MIU_WAITING_ON_WRREQ_FREE_bit = 1 << 17,
+ MIU_NEEDS_AVAIL_WRREQ_PHASE_bit = 1 << 18,
+ RCIU_WAITING_ON_GRBM_FREE_bit = 1 << 24,
+ RCIU_WAITING_ON_VGT_FREE_bit = 1 << 25,
+ RCIU_STALLED_ON_ME_READ_bit = 1 << 26,
+ RCIU_STALLED_ON_DMA_READ_bit = 1 << 27,
+ RCIU_HALTED_BY_REG_VIOLATION_bit = 1 << 28,
+ CP_STALLED_STAT2 = 0x8678,
+ PFP_TO_CSF_NOT_RDY_TO_RCV_bit = 1 << 0,
+ PFP_TO_MEQ_NOT_RDY_TO_RCV_bit = 1 << 1,
+ PFP_TO_VGT_NOT_RDY_TO_RCV_bit = 1 << 2,
+ PFP_HALTED_BY_INSTR_VIOLATION_bit = 1 << 3,
+ MULTIPASS_IB_PENDING_IN_PFP_bit = 1 << 4,
+ ME_BRUSH_WC_NOT_RDY_TO_RCV_bit = 1 << 8,
+ ME_STALLED_ON_BRUSH_LOGIC_bit = 1 << 9,
+ CR_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 10,
+ GFX_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 11,
+ ME_RCIU_NOT_RDY_TO_RCV_bit = 1 << 12,
+ ME_TO_CONST_NOT_RDY_TO_RCV_bit = 1 << 13,
+ ME_WAITING_DATA_FROM_PFP_bit = 1 << 14,
+ ME_WAITING_ON_PARTIAL_FLUSH_bit = 1 << 15,
+ RECT_FIFO_NEEDS_CR_RECT_DONE_bit = 1 << 16,
+ RECT_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 17,
+ EOPD_FIFO_NEEDS_SC_EOP_DONE_bit = 1 << 18,
+ EOPD_FIFO_NEEDS_SMX_EOP_DONE_bit = 1 << 19,
+ EOPD_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 20,
+ EOPD_FIFO_NEEDS_SIGNAL_SEM_bit = 1 << 21,
+ SO_NUMPRIM_FIFO_NEEDS_SOADDR_bit = 1 << 22,
+ SO_NUMPRIM_FIFO_NEEDS_NUMPRIM_bit = 1 << 23,
+ PIPE_STATS_FIFO_NEEDS_SAMPLE_bit = 1 << 24,
+ SURF_SYNC_NEEDS_IDLE_CNTXS_bit = 1 << 30,
+ SURF_SYNC_NEEDS_ALL_CLEAN_bit = 1 << 31,
+ CP_BUSY_STAT = 0x867c,
+ REG_BUS_FIFO_BUSY_bit = 1 << 0,
+ RING_FETCHING_DATA_bit = 1 << 1,
+ INDR1_FETCHING_DATA_bit = 1 << 2,
+ INDR2_FETCHING_DATA_bit = 1 << 3,
+ STATE_FETCHING_DATA_bit = 1 << 4,
+ PRED_FETCHING_DATA_bit = 1 << 5,
+ COHER_CNTR_NEQ_ZERO_bit = 1 << 6,
+ PFP_PARSING_PACKETS_bit = 1 << 7,
+ ME_PARSING_PACKETS_bit = 1 << 8,
+ RCIU_PFP_BUSY_bit = 1 << 9,
+ RCIU_ME_BUSY_bit = 1 << 10,
+ OUTSTANDING_READ_TAGS_bit = 1 << 11,
+ SEM_CMDFIFO_NOT_EMPTY_bit = 1 << 12,
+ SEM_FAILED_AND_HOLDING_bit = 1 << 13,
+ SEM_POLLING_FOR_PASS_bit = 1 << 14,
+ _3D_BUSY_bit = 1 << 15,
+ _2D_BUSY_bit = 1 << 16,
+ CP_STAT = 0x8680,
+ CSF_RING_BUSY_bit = 1 << 0,
+ CSF_WPTR_POLL_BUSY_bit = 1 << 1,
+ CSF_INDIRECT1_BUSY_bit = 1 << 2,
+ CSF_INDIRECT2_BUSY_bit = 1 << 3,
+ CSF_STATE_BUSY_bit = 1 << 4,
+ CSF_PREDICATE_BUSY_bit = 1 << 5,
+ CSF_BUSY_bit = 1 << 6,
+ MIU_RDREQ_BUSY_bit = 1 << 7,
+ MIU_WRREQ_BUSY_bit = 1 << 8,
+ ROQ_RING_BUSY_bit = 1 << 9,
+ ROQ_INDIRECT1_BUSY_bit = 1 << 10,
+ ROQ_INDIRECT2_BUSY_bit = 1 << 11,
+ ROQ_STATE_BUSY_bit = 1 << 12,
+ ROQ_PREDICATE_BUSY_bit = 1 << 13,
+ ROQ_ALIGN_BUSY_bit = 1 << 14,
+ PFP_BUSY_bit = 1 << 15,
+ MEQ_BUSY_bit = 1 << 16,
+ ME_BUSY_bit = 1 << 17,
+ QUERY_BUSY_bit = 1 << 18,
+ SEMAPHORE_BUSY_bit = 1 << 19,
+ INTERRUPT_BUSY_bit = 1 << 20,
+ SURFACE_SYNC_BUSY_bit = 1 << 21,
+ DMA_BUSY_bit = 1 << 22,
+ RCIU_BUSY_bit = 1 << 23,
+ CP_STAT__CP_BUSY_bit = 1 << 31,
+
+ CP_ME_CNTL = 0x86d8,
+ ME_STATMUX_mask = 0xff << 0,
+ ME_STATMUX_shift = 0,
+ ME_HALT_bit = 1 << 28,
+ CP_ME_STATUS = 0x86dc,
+
+ CP_RB_RPTR = 0x8700,
+ RB_RPTR_mask = 0xfffff << 0,
+ RB_RPTR_shift = 0,
+ CP_RB_WPTR_DELAY = 0x8704,
+ PRE_WRITE_TIMER_mask = 0xfffffff << 0,
+ PRE_WRITE_TIMER_shift = 0,
+ PRE_WRITE_LIMIT_mask = 0x0f << 28,
+ PRE_WRITE_LIMIT_shift = 28,
+
+ CP_ROQ_RB_STAT = 0x8780,
+ ROQ_RPTR_PRIMARY_mask = 0x3ff << 0,
+ ROQ_RPTR_PRIMARY_shift = 0,
+ ROQ_WPTR_PRIMARY_mask = 0x3ff << 16,
+ ROQ_WPTR_PRIMARY_shift = 16,
+ CP_ROQ_IB1_STAT = 0x8784,
+ ROQ_RPTR_INDIRECT1_mask = 0x3ff << 0,
+ ROQ_RPTR_INDIRECT1_shift = 0,
+ ROQ_WPTR_INDIRECT1_mask = 0x3ff << 16,
+ ROQ_WPTR_INDIRECT1_shift = 16,
+ CP_ROQ_IB2_STAT = 0x8788,
+ ROQ_RPTR_INDIRECT2_mask = 0x3ff << 0,
+ ROQ_RPTR_INDIRECT2_shift = 0,
+ ROQ_WPTR_INDIRECT2_mask = 0x3ff << 16,
+ ROQ_WPTR_INDIRECT2_shift = 16,
+
+ CP_MEQ_STAT = 0x8794,
+ MEQ_RPTR_mask = 0x3ff << 0,
+ MEQ_RPTR_shift = 0,
+ MEQ_WPTR_mask = 0x3ff << 16,
+ MEQ_WPTR_shift = 16,
+
+ CC_GC_SHADER_PIPE_CONFIG = 0x8950,
+ INACTIVE_QD_PIPES_mask = 0xff << 8,
+ INACTIVE_QD_PIPES_shift = 8,
+ R6XX_MAX_QD_PIPES = 8,
+ INACTIVE_SIMDS_mask = 0xff << 16,
+ INACTIVE_SIMDS_shift = 16,
+ R6XX_MAX_SIMDS = 8,
+ GC_USER_SHADER_PIPE_CONFIG = 0x8954,
+
+ VC_ENHANCE = 0x9714,
+ DB_DEBUG = 0x9830,
+ PREZ_MUST_WAIT_FOR_POSTZ_DONE = 1 << 31,
+
+ DB_WATERMARKS = 0x00009838,
+ DEPTH_FREE_mask = 0x1f << 0,
+ DEPTH_FREE_shift = 0,
+ DEPTH_FLUSH_mask = 0x3f << 5,
+ DEPTH_FLUSH_shift = 5,
+ FORCE_SUMMARIZE_mask = 0x0f << 11,
+ FORCE_SUMMARIZE_shift = 11,
+ DEPTH_PENDING_FREE_mask = 0x1f << 15,
+ DEPTH_PENDING_FREE_shift = 15,
+ DEPTH_CACHELINE_FREE_mask = 0x1f << 20,
+ DEPTH_CACHELINE_FREE_shift = 20,
+ EARLY_Z_PANIC_DISABLE_bit = 1 << 25,
+ LATE_Z_PANIC_DISABLE_bit = 1 << 26,
+ RE_Z_PANIC_DISABLE_bit = 1 << 27,
+ DB_EXTRA_DEBUG_mask = 0x0f << 28,
+ DB_EXTRA_DEBUG_shift = 28,
+
+ CP_RB_BASE = 0xc100,
+ CP_RB_CNTL = 0xc104,
+ RB_BUFSZ_mask = 0x3f << 0,
+ CP_RB_WPTR = 0xc114,
+ RB_WPTR_mask = 0xfffff << 0,
+ RB_WPTR_shift = 0,
+ CP_RB_RPTR_WR = 0xc108,
+ RB_RPTR_WR_mask = 0xfffff << 0,
+ RB_RPTR_WR_shift = 0,
+
+ CP_INT_STATUS = 0xc128,
+ DISABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 0,
+ ENABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 1,
+ SEM_SIGNAL_INT_STAT_bit = 1 << 18,
+ CNTX_BUSY_INT_STAT_bit = 1 << 19,
+ CNTX_EMPTY_INT_STAT_bit = 1 << 20,
+ WAITMEM_SEM_INT_STAT_bit = 1 << 21,
+ PRIV_INSTR_INT_STAT_bit = 1 << 22,
+ PRIV_REG_INT_STAT_bit = 1 << 23,
+ OPCODE_ERROR_INT_STAT_bit = 1 << 24,
+ SCRATCH_INT_STAT_bit = 1 << 25,
+ TIME_STAMP_INT_STAT_bit = 1 << 26,
+ RESERVED_BIT_ERROR_INT_STAT_bit = 1 << 27,
+ DMA_INT_STAT_bit = 1 << 28,
+ IB2_INT_STAT_bit = 1 << 29,
+ IB1_INT_STAT_bit = 1 << 30,
+ RB_INT_STAT_bit = 1 << 31,
+
+// SX_ALPHA_TEST_CONTROL = 0x00028410,
+ ALPHA_FUNC__REF_NEVER = 0,
+ ALPHA_FUNC__REF_ALWAYS = 7,
+// DB_SHADER_CONTROL = 0x0002880c,
+ Z_ORDER__EARLY_Z_THEN_LATE_Z = 2,
+// PA_SU_SC_MODE_CNTL = 0x00028814,
+// POLY_MODE_mask = 0x03 << 3,
+ POLY_MODE__TRIANGLES = 0, POLY_MODE__DUAL_MODE,
+// POLYMODE_FRONT_PTYPE_mask = 0x07 << 5,
+ POLYMODE_PTYPE__POINTS = 0, POLYMODE_PTYPE__LINES, POLYMODE_PTYPE__TRIANGLES,
+ PA_SC_AA_SAMPLE_LOCS_8S_WD1_M = 0x00028c20,
+ DB_SRESULTS_COMPARE_STATE0 = 0x00028d28, /* See autoregs: DB_SRESULTS_COMPARE_STATE1 */
+// DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c,
+ DB_ALPHA_TO_MASK = 0x00028d44,
+ ALPHA_TO_MASK_ENABLE = 1 << 0,
+ ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET0_shift = 8,
+ ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET1_shift = 10,
+ ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET2_shift = 12,
+ ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET3_shift = 14,
+
+// SQ_VTX_CONSTANT_WORD2_0 = 0x00038008,
+// SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20,
+ FMT_INVALID=0, FMT_8, FMT_4_4, FMT_3_3_2,
+ FMT_16=5, FMT_16_FLOAT, FMT_8_8,
+ FMT_5_6_5, FMT_6_5_5, FMT_1_5_5_5, FMT_4_4_4_4,
+ FMT_5_5_5_1, FMT_32, FMT_32_FLOAT, FMT_16_16,
+ FMT_16_16_FLOAT=16, FMT_8_24, FMT_8_24_FLOAT, FMT_24_8,
+ FMT_24_8_FLOAT, FMT_10_11_11, FMT_10_11_11_FLOAT, FMT_11_11_10,
+ FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8, FMT_10_10_10_2,
+ FMT_X24_8_32_FLOAT, FMT_32_32, FMT_32_32_FLOAT, FMT_16_16_16_16,
+ FMT_16_16_16_16_FLOAT=32, FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT,
+ FMT_1 = 37, FMT_GB_GR=39,
+ FMT_BG_RG, FMT_32_AS_8, FMT_32_AS_8_8, FMT_5_9_9_9_SHAREDEXP,
+ FMT_8_8_8, FMT_16_16_16, FMT_16_16_16_FLOAT, FMT_32_32_32,
+ FMT_32_32_32_FLOAT=48,
+
+// High level register file lengths
+ SQ_ALU_CONSTANT = SQ_ALU_CONSTANT0_0, /* 256 PS, 256 VS */
+ SQ_ALU_CONSTANT_ps_num = 256,
+ SQ_ALU_CONSTANT_vs_num = 256,
+ SQ_ALU_CONSTANT_all_num = 512,
+ SQ_ALU_CONSTANT_offset = 16,
+ SQ_ALU_CONSTANT_ps = 0,
+ SQ_ALU_CONSTANT_vs = SQ_ALU_CONSTANT_ps + SQ_ALU_CONSTANT_ps_num,
+ SQ_TEX_RESOURCE = SQ_TEX_RESOURCE_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */
+ SQ_TEX_RESOURCE_ps_num = 160,
+ SQ_TEX_RESOURCE_vs_num = 160,
+ SQ_TEX_RESOURCE_fs_num = 16,
+ SQ_TEX_RESOURCE_gs_num = 160,
+ SQ_TEX_RESOURCE_all_num = 496,
+ SQ_TEX_RESOURCE_offset = 28,
+ SQ_TEX_RESOURCE_ps = 0,
+ SQ_TEX_RESOURCE_vs = SQ_TEX_RESOURCE_ps + SQ_TEX_RESOURCE_ps_num,
+ SQ_TEX_RESOURCE_fs = SQ_TEX_RESOURCE_vs + SQ_TEX_RESOURCE_vs_num,
+ SQ_TEX_RESOURCE_gs = SQ_TEX_RESOURCE_fs + SQ_TEX_RESOURCE_fs_num,
+ SQ_VTX_RESOURCE = SQ_VTX_CONSTANT_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */
+ SQ_VTX_RESOURCE_ps_num = 160,
+ SQ_VTX_RESOURCE_vs_num = 160,
+ SQ_VTX_RESOURCE_fs_num = 16,
+ SQ_VTX_RESOURCE_gs_num = 160,
+ SQ_VTX_RESOURCE_all_num = 496,
+ SQ_VTX_RESOURCE_offset = 28,
+ SQ_VTX_RESOURCE_ps = 0,
+ SQ_VTX_RESOURCE_vs = SQ_VTX_RESOURCE_ps + SQ_VTX_RESOURCE_ps_num,
+ SQ_VTX_RESOURCE_fs = SQ_VTX_RESOURCE_vs + SQ_VTX_RESOURCE_vs_num,
+ SQ_VTX_RESOURCE_gs = SQ_VTX_RESOURCE_fs + SQ_VTX_RESOURCE_fs_num,
+ SQ_TEX_SAMPLER_WORD = SQ_TEX_SAMPLER_WORD0_0, /* 18 per PS, VS, GS */
+ SQ_TEX_SAMPLER_WORD_ps_num = 18,
+ SQ_TEX_SAMPLER_WORD_vs_num = 18,
+ SQ_TEX_SAMPLER_WORD_gs_num = 18,
+ SQ_TEX_SAMPLER_WORD_all_num = 54,
+ SQ_TEX_SAMPLER_WORD_offset = 12,
+ SQ_TEX_SAMPLER_WORD_ps = 0,
+ SQ_TEX_SAMPLER_WORD_vs = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num,
+ SQ_TEX_SAMPLER_WORD_gs = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num,
+ SQ_LOOP_CONST = SQ_LOOP_CONST_0, /* 32 per PS, VS, GS */
+ SQ_LOOP_CONST_ps_num = 32,
+ SQ_LOOP_CONST_vs_num = 32,
+ SQ_LOOP_CONST_gs_num = 32,
+ SQ_LOOP_CONST_all_num = 96,
+ SQ_LOOP_CONST_offset = 4,
+ SQ_LOOP_CONST_ps = 0,
+ SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num,
+ SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num,
+} ;
+
+
+#endif
diff --git a/r600/r600_reg_r7xx.h b/r600/r600_reg_r7xx.h
new file mode 100644
index 0000000..e5c01c8
--- /dev/null
+++ b/r600/r600_reg_r7xx.h
@@ -0,0 +1,149 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_R7xx_H_
+#define _R600_REG_R7xx_H_
+
+/*
+ * Register update for R7xx chips
+ */
+
+enum {
+
+ R7XX_MC_VM_FB_LOCATION = 0x00002024,
+
+// GRBM_STATUS = 0x00008010,
+ R7XX_TA_BUSY_bit = 1 << 14,
+
+ R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x00008d8c,
+ RING0_OFFSET_mask = 0xff << 0,
+ RING0_OFFSET_shift = 0,
+ ISOLATE_ES_ENABLE_bit = 1 << 12,
+ ISOLATE_GS_ENABLE_bit = 1 << 13,
+ VS_PC_LIMIT_ENABLE_bit = 1 << 14,
+
+// SQ_ALU_WORD0 = 0x00008dfc,
+// SRC0_SEL_mask = 0x1ff << 0,
+// SRC1_SEL_mask = 0x1ff << 13,
+ R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4,
+ R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5,
+ R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6,
+ R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7,
+// INDEX_MODE_mask = 0x07 << 26,
+ R7xx_SQ_INDEX_GLOBAL = 0x05,
+ R7xx_SQ_INDEX_GLOBAL_AR_X = 0x06,
+ R6xx_SQ_ALU_WORD1_OP2 = 0x00008dfc,
+ R7xx_SQ_ALU_WORD1_OP2_V2 = 0x00008dfc,
+ R6xx_FOG_MERGE_bit = 1 << 5,
+ R6xx_OMOD_mask = 0x03 << 6,
+ R7xx_OMOD_mask = 0x03 << 5,
+ R6xx_OMOD_shift = 6,
+ R7xx_OMOD_shift = 5,
+ R6xx_SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8,
+ R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7,
+ R6xx_SQ_ALU_WORD1_OP2__ALU_INST_shift = 8,
+ R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7,
+ R7xx_SQ_OP2_INST_FREXP_64 = 0x07,
+ R7xx_SQ_OP2_INST_ADD_64 = 0x17,
+ R7xx_SQ_OP2_INST_MUL_64 = 0x1b,
+ R7xx_SQ_OP2_INST_FLT64_TO_FLT32 = 0x1c,
+ R7xx_SQ_OP2_INST_FLT32_TO_FLT64 = 0x1d,
+ R7xx_SQ_OP2_INST_LDEXP_64 = 0x7a,
+ R7xx_SQ_OP2_INST_FRACT_64 = 0x7b,
+ R7xx_SQ_OP2_INST_PRED_SETGT_64 = 0x7c,
+ R7xx_SQ_OP2_INST_PRED_SETE_64 = 0x7d,
+ R7xx_SQ_OP2_INST_PRED_SETGE_64 = 0x7e,
+// SQ_ALU_WORD1_OP3 = 0x00008dfc,
+// SRC2_SEL_mask = 0x1ff << 0,
+// R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4,
+// R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5,
+// R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6,
+// R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7,
+// SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13,
+ R7xx_SQ_OP3_INST_MULADD_64 = 0x08,
+ R7xx_SQ_OP3_INST_MULADD_64_M2 = 0x09,
+ R7xx_SQ_OP3_INST_MULADD_64_M4 = 0x0a,
+ R7xx_SQ_OP3_INST_MULADD_64_D2 = 0x0b,
+// SQ_CF_ALU_WORD1 = 0x00008dfc,
+ R6xx_USES_WATERFALL_bit = 1 << 25,
+ R7xx_SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25,
+// SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc,
+// ARRAY_BASE_mask = 0x1fff << 0,
+// TYPE_mask = 0x03 << 13,
+// SQ_EXPORT_PARAM = 0x02,
+// X_UNUSED_FOR_SX_EXPORTS = 0x03,
+// ELEM_SIZE_mask = 0x03 << 30,
+// SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc,
+// SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23,
+ R7xx_SQ_CF_INST_MEM_EXPORT = 0x3a,
+// SQ_CF_WORD1 = 0x00008dfc,
+// SQ_CF_WORD1__COUNT_mask = 0x07 << 10,
+ R7xx_COUNT_3_bit = 1 << 19,
+// SQ_CF_WORD1__CF_INST_mask = 0x7f << 23,
+ R7xx_SQ_CF_INST_END_PROGRAM = 0x19,
+ R7xx_SQ_CF_INST_WAIT_ACK = 0x1a,
+ R7xx_SQ_CF_INST_TEX_ACK = 0x1b,
+ R7xx_SQ_CF_INST_VTX_ACK = 0x1c,
+ R7xx_SQ_CF_INST_VTX_TC_ACK = 0x1d,
+// SQ_VTX_WORD0 = 0x00008dfc,
+// VTX_INST_mask = 0x1f << 0,
+ R7xx_SQ_VTX_INST_MEM = 0x02,
+// SQ_VTX_WORD2 = 0x00008dfc,
+ R7xx_SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20,
+
+// SQ_TEX_WORD0 = 0x00008dfc,
+// TEX_INST_mask = 0x1f << 0,
+ R7xx_X_MEMORY_READ = 0x02,
+ R7xx_SQ_TEX_INST_KEEP_GRADIENTS = 0x0a,
+ R7xx_X_FETCH4_LOAD4_INSTRUCTION_FOR_DX10_1 = 0x0f,
+ R7xx_SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24,
+
+ R7xx_PA_SC_EDGERULE = 0x00028230,
+ R7xx_SPI_THREAD_GROUPING = 0x000286c8,
+ PS_GROUPING_mask = 0x1f << 0,
+ PS_GROUPING_shift = 0,
+ VS_GROUPING_mask = 0x1f << 8,
+ VS_GROUPING_shift = 8,
+ GS_GROUPING_mask = 0x1f << 16,
+ GS_GROUPING_shift = 16,
+ ES_GROUPING_mask = 0x1f << 24,
+ ES_GROUPING_shift = 24,
+ R7xx_CB_SHADER_CONTROL = 0x000287a0,
+ RT0_ENABLE_bit = 1 << 0,
+ RT1_ENABLE_bit = 1 << 1,
+ RT2_ENABLE_bit = 1 << 2,
+ RT3_ENABLE_bit = 1 << 3,
+ RT4_ENABLE_bit = 1 << 4,
+ RT5_ENABLE_bit = 1 << 5,
+ RT6_ENABLE_bit = 1 << 6,
+ RT7_ENABLE_bit = 1 << 7,
+// DB_ALPHA_TO_MASK = 0x00028d44,
+ R7xx_OFFSET_ROUND_bit = 1 << 16,
+// SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c,
+ R7xx_TRUNCATE_COORD_bit = 1 << 9,
+ R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10,
+
+} ;
+
+#endif /* _R600_REG_R7xx_H_ */
diff --git a/r600/r600_tex.c b/r600/r600_tex.c
new file mode 100644
index 0000000..d105b90
--- /dev/null
+++ b/r600/r600_tex.c
@@ -0,0 +1,440 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mipmap.h"
+#include "main/simple_list.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+
+#include "texmem.h"
+
+#include "r600_context.h"
+#include "r700_state.h"
+#include "radeon_mipmap_tree.h"
+#include "r600_tex.h"
+
+#include "xmlpool.h"
+
+
+static unsigned int translate_wrap_mode(GLenum wrapmode)
+{
+ switch(wrapmode) {
+ case GL_REPEAT: return SQ_TEX_WRAP;
+ case GL_CLAMP: return SQ_TEX_CLAMP_HALF_BORDER;
+ case GL_CLAMP_TO_EDGE: return SQ_TEX_CLAMP_LAST_TEXEL;
+ case GL_CLAMP_TO_BORDER: return SQ_TEX_CLAMP_BORDER;
+ case GL_MIRRORED_REPEAT: return SQ_TEX_MIRROR;
+ case GL_MIRROR_CLAMP_EXT: return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
+ case GL_MIRROR_CLAMP_TO_EDGE_EXT: return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+ case GL_MIRROR_CLAMP_TO_BORDER_EXT: return SQ_TEX_MIRROR_ONCE_BORDER;
+ default:
+ radeon_error("bad wrap mode in %s", __FUNCTION__);
+ return 0;
+ }
+}
+
+
+/**
+ * Update the cached hardware registers based on the current texture wrap modes.
+ *
+ * \param t Texture object whose wrap modes are to be set
+ */
+static void r600UpdateTexWrap(radeonTexObjPtr t)
+{
+ struct gl_texture_object *tObj = &t->base;
+
+ SETfield(t->SQ_TEX_SAMPLER0, translate_wrap_mode(tObj->WrapS),
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift, SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask);
+
+ if (tObj->Target != GL_TEXTURE_1D) {
+ SETfield(t->SQ_TEX_SAMPLER0, translate_wrap_mode(tObj->WrapT),
+ CLAMP_Y_shift, CLAMP_Y_mask);
+
+ if (tObj->Target == GL_TEXTURE_3D)
+ SETfield(t->SQ_TEX_SAMPLER0, translate_wrap_mode(tObj->WrapR),
+ CLAMP_Z_shift, CLAMP_Z_mask);
+ }
+}
+
+static void r600SetTexDefaultState(radeonTexObjPtr t)
+{
+ /* Init text object to default states. */
+ t->SQ_TEX_RESOURCE0 = 0;
+ SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_2D, DIM_shift, DIM_mask);
+ SETfield(t->SQ_TEX_RESOURCE0, ARRAY_LINEAR_GENERAL,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+ CLEARbit(t->SQ_TEX_RESOURCE0, TILE_TYPE_bit);
+
+ t->SQ_TEX_RESOURCE1 = 0;
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ t->SQ_TEX_RESOURCE2 = 0;
+ t->SQ_TEX_RESOURCE3 = 0;
+
+ t->SQ_TEX_RESOURCE4 = 0;
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_NUM_FORMAT_NORM,
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift, SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask);
+ CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit);
+ CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_ENDIAN_NONE,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift, SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, 1, REQUEST_SIZE_shift, REQUEST_SIZE_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); /* mip-maps */
+
+ t->SQ_TEX_RESOURCE5 = 0;
+ t->SQ_TEX_RESOURCE6 = 0;
+
+ SETfield(t->SQ_TEX_RESOURCE6, SQ_TEX_VTX_VALID_TEXTURE,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
+
+ /* Initialize sampler registers */
+ t->SQ_TEX_SAMPLER0 = 0;
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, CLAMP_Y_shift, CLAMP_Y_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, CLAMP_Z_shift, CLAMP_Z_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT, XY_MAG_FILTER_shift, XY_MAG_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT, XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE, Z_FILTER_shift, Z_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE, MIP_FILTER_shift, MIP_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_TRANS_BLACK, BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask);
+
+ t->SQ_TEX_SAMPLER1 = 0;
+ SETfield(t->SQ_TEX_SAMPLER1, 0x3ff, MAX_LOD_shift, MAX_LOD_mask);
+
+ t->SQ_TEX_SAMPLER2 = 0;
+ SETbit(t->SQ_TEX_SAMPLER2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit);
+}
+
+
+#if 0
+static GLuint aniso_filter(GLfloat anisotropy)
+{
+ if (anisotropy >= 16.0) {
+ return R300_TX_MAX_ANISO_16_TO_1;
+ } else if (anisotropy >= 8.0) {
+ return R300_TX_MAX_ANISO_8_TO_1;
+ } else if (anisotropy >= 4.0) {
+ return R300_TX_MAX_ANISO_4_TO_1;
+ } else if (anisotropy >= 2.0) {
+ return R300_TX_MAX_ANISO_2_TO_1;
+ } else {
+ return R300_TX_MAX_ANISO_1_TO_1;
+ }
+ return 0;
+}
+#endif
+
+/**
+ * Set the texture magnification and minification modes.
+ *
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ * \param anisotropy Maximum anisotropy level
+ */
+static void r600SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
+{
+ /* Force revalidation to account for switches from/to mipmapping. */
+ t->validated = GL_FALSE;
+
+ /* Note that EXT_texture_filter_anisotropic is extremely vague about
+ * how anisotropic filtering interacts with the "normal" filter modes.
+ * When anisotropic filtering is enabled, we override min and mag
+ * filter settings completely. This includes driconf's settings.
+ */
+ if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
+ /*t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO
+ | R300_TX_MIN_FILTER_ANISO
+ | R300_TX_MIN_FILTER_MIP_LINEAR
+ | aniso_filter(anisotropy);*/
+ radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "Using maximum anisotropy of %f\n", anisotropy);
+ return;
+ }
+
+ switch (minf) {
+ case GL_NEAREST:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+ XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_None,
+ MIP_FILTER_shift, MIP_FILTER_mask);
+ break;
+ case GL_LINEAR:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+ XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_None,
+ MIP_FILTER_shift, MIP_FILTER_mask);
+ break;
+ case GL_NEAREST_MIPMAP_NEAREST:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+ XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Point,
+ MIP_FILTER_shift, MIP_FILTER_mask);
+ break;
+ case GL_NEAREST_MIPMAP_LINEAR:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+ XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Linear,
+ MIP_FILTER_shift, MIP_FILTER_mask);
+ break;
+ case GL_LINEAR_MIPMAP_NEAREST:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+ XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Point,
+ MIP_FILTER_shift, MIP_FILTER_mask);
+ break;
+ case GL_LINEAR_MIPMAP_LINEAR:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+ XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Linear,
+ MIP_FILTER_shift, MIP_FILTER_mask);
+ break;
+ }
+
+ /* Note we don't have 3D mipmaps so only use the mag filter setting
+ * to set the 3D texture filter mode.
+ */
+ switch (magf) {
+ case GL_NEAREST:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+ XY_MAG_FILTER_shift, XY_MAG_FILTER_mask);
+ break;
+ case GL_LINEAR:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+ XY_MAG_FILTER_shift, XY_MAG_FILTER_mask);
+ break;
+ }
+}
+
+static void r600SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4])
+{
+ t->TD_PS_SAMPLER0_BORDER_ALPHA = *((uint32_t*)&(color[3]));
+ t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[2]));
+ t->TD_PS_SAMPLER0_BORDER_GREEN = *((uint32_t*)&(color[1]));
+ t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[0]));
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_REGISTER,
+ BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask);
+}
+
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+
+static void r600TexParameter(GLcontext * ctx, GLenum target,
+ struct gl_texture_object *texObj,
+ GLenum pname, const GLfloat * params)
+{
+ radeonTexObj* t = radeon_tex_obj(texObj);
+
+ radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_VERBOSE,
+ "%s( %s )\n", __FUNCTION__,
+ _mesa_lookup_enum_by_nr(pname));
+
+ switch (pname) {
+ case GL_TEXTURE_MIN_FILTER:
+ case GL_TEXTURE_MAG_FILTER:
+ case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+ r600SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
+ break;
+
+ case GL_TEXTURE_WRAP_S:
+ case GL_TEXTURE_WRAP_T:
+ case GL_TEXTURE_WRAP_R:
+ r600UpdateTexWrap(t);
+ break;
+
+ case GL_TEXTURE_BORDER_COLOR:
+ r600SetTexBorderColor(t, texObj->BorderColor);
+ break;
+
+ case GL_TEXTURE_BASE_LEVEL:
+ case GL_TEXTURE_MAX_LEVEL:
+ case GL_TEXTURE_MIN_LOD:
+ case GL_TEXTURE_MAX_LOD:
+ /* This isn't the most efficient solution but there doesn't appear to
+ * be a nice alternative. Since there's no LOD clamping,
+ * we just have to rely on loading the right subset of mipmap levels
+ * to simulate a clamped LOD.
+ */
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = 0;
+ t->validated = GL_FALSE;
+ }
+ break;
+
+ case GL_DEPTH_TEXTURE_MODE:
+ if (!texObj->Image[0][texObj->BaseLevel])
+ return;
+ if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat
+ == GL_DEPTH_COMPONENT) {
+ r600SetDepthTexMode(texObj);
+ break;
+ } else {
+ /* If the texture isn't a depth texture, changing this
+ * state won't cause any changes to the hardware.
+ * Don't force a flush of texture state.
+ */
+ return;
+ }
+
+ default:
+ return;
+ }
+}
+
+static void r600DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+ context_t* rmesa = R700_CONTEXT(ctx);
+ radeonTexObj* t = radeon_tex_obj(texObj);
+
+ radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL,
+ "%s( %p (target = %s) )\n", __FUNCTION__,
+ (void *)texObj,
+ _mesa_lookup_enum_by_nr(texObj->Target));
+
+ if (rmesa) {
+ int i;
+ radeon_firevertices(&rmesa->radeon);
+
+ for(i = 0; i < R700_MAX_TEXTURE_UNITS; ++i)
+ if (rmesa->hw.textures[i] == t)
+ rmesa->hw.textures[i] = 0;
+ }
+
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = 0;
+ }
+ _mesa_delete_texture_object(ctx, texObj);
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Fixup MaxAnisotropy according to user preference.
+ */
+static struct gl_texture_object *r600NewTextureObject(GLcontext * ctx,
+ GLuint name,
+ GLenum target)
+{
+ context_t* rmesa = R700_CONTEXT(ctx);
+ radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+
+
+ radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL,
+ "%s( %p (target = %s) )\n", __FUNCTION__,
+ t, _mesa_lookup_enum_by_nr(target));
+
+ _mesa_initialize_texture_object(&t->base, name, target);
+ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+
+ /* Initialize hardware state */
+ r600SetTexDefaultState(t);
+ r600UpdateTexWrap(t);
+ r600SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
+ r600SetTexBorderColor(t, t->base.BorderColor);
+
+ return &t->base;
+}
+
+void r600InitTextureFuncs(struct dd_function_table *functions)
+{
+ /* Note: we only plug in the functions we implement in the driver
+ * since _mesa_init_driver_functions() was already called.
+ */
+ functions->NewTextureImage = radeonNewTextureImage;
+ functions->FreeTexImageData = radeonFreeTexImageData;
+ functions->MapTexture = radeonMapTexture;
+ functions->UnmapTexture = radeonUnmapTexture;
+
+ functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
+ functions->TexImage1D = radeonTexImage1D;
+ functions->TexImage2D = radeonTexImage2D;
+ functions->TexImage3D = radeonTexImage3D;
+ functions->TexSubImage1D = radeonTexSubImage1D;
+ functions->TexSubImage2D = radeonTexSubImage2D;
+ functions->TexSubImage3D = radeonTexSubImage3D;
+ functions->GetTexImage = radeonGetTexImage;
+ functions->GetCompressedTexImage = radeonGetCompressedTexImage;
+ functions->NewTextureObject = r600NewTextureObject;
+ functions->DeleteTexture = r600DeleteTexture;
+ functions->IsTextureResident = driIsTextureResident;
+
+ functions->TexParameter = r600TexParameter;
+
+ functions->CompressedTexImage2D = radeonCompressedTexImage2D;
+ functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+
+ functions->GenerateMipmap = radeonGenerateMipmap;
+
+ driInitTextureFormats();
+}
diff --git a/r300/radeon_ioctl.h b/r600/r600_tex.h
index 3add775..fb0e1a0 100644
--- a/r300/radeon_ioctl.h
+++ b/r600/r600_tex.h
@@ -32,26 +32,32 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#ifndef __RADEON_IOCTL_H__
-#define __RADEON_IOCTL_H__
-
-#include "main/simple_list.h"
-#include "radeon_dri.h"
-#include "radeon_lock.h"
-
-#include "xf86drm.h"
-#include "drm.h"
-#if 0
-#include "r200context.h"
-#endif
-#include "radeon_drm.h"
-
-extern void radeonCopyBuffer(__DRIdrawablePrivate * drawable,
- const drm_clip_rect_t * rect);
-extern void radeonPageFlip(__DRIdrawablePrivate * drawable);
-extern void radeonFlush(GLcontext * ctx);
-extern void radeonFinish(GLcontext * ctx);
-extern void radeonWaitForIdleLocked(radeonContextPtr radeon);
-extern uint32_t radeonGetAge(radeonContextPtr radeon);
-
-#endif /* __RADEON_IOCTL_H__ */
+#ifndef __r600_TEX_H__
+#define __r600_TEX_H__
+
+/* TODO : review this after texture load code. */
+#define R700_BLIT_WIDTH_BYTES 1024
+/* The BASE_ADDRESS and MIP_ADDRESS fields are 256-byte-aligned */
+#define R700_TEXTURE_ALIGNMENT_MASK 0x255
+/* Texel pitch is 8 alignment. */
+#define R700_TEXEL_PITCH_ALIGNMENT_MASK 0x7
+
+#define R700_MAX_TEXTURE_UNITS 8 /* TODO : should be 16, lets make it work, review later */
+
+extern void r600SetDepthTexMode(struct gl_texture_object *tObj);
+
+extern void r600SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
+ __DRIdrawable *dPriv);
+
+extern void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+ GLint format, __DRIdrawable *dPriv);
+
+extern void r600SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth,
+ GLuint pitch);
+
+extern GLboolean r600ValidateBuffers(GLcontext * ctx);
+
+extern void r600InitTextureFuncs(struct dd_function_table *functions);
+
+#endif /* __r600_TEX_H__ */
diff --git a/r600/r600_texstate.c b/r600/r600_texstate.c
new file mode 100644
index 0000000..bcb8d7c
--- /dev/null
+++ b/r600/r600_texstate.c
@@ -0,0 +1,960 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * \todo Enable R300 texture tiling code?
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/texformat.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/enums.h"
+#include "main/simple_list.h"
+
+#include "r600_context.h"
+#include "r700_state.h"
+#include "radeon_mipmap_tree.h"
+#include "r600_tex.h"
+#include "r700_fragprog.h"
+#include "r700_vertprog.h"
+
+void r600UpdateTextureState(GLcontext * ctx);
+
+void r600UpdateTextureState(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ struct gl_texture_unit *texUnit;
+ struct radeon_tex_obj *t;
+ GLuint unit;
+
+ R600_STATECHANGE(context, tx);
+ R600_STATECHANGE(context, tx_smplr);
+ R600_STATECHANGE(context, tx_brdr_clr);
+
+ for (unit = 0; unit < R700_MAX_TEXTURE_UNITS; unit++) {
+ texUnit = &ctx->Texture.Unit[unit];
+ t = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+ r700->textures[unit] = NULL;
+ if (texUnit->_ReallyEnabled) {
+ if (!t)
+ continue;
+ r700->textures[unit] = t;
+ }
+ }
+}
+
+static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_format)
+{
+ radeonTexObj *t = radeon_tex_obj(tObj);
+
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+
+ switch (mesa_format) /* This is mesa format. */
+ {
+ case MESA_FORMAT_RGBA8888:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGBA8888_REV:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB8888:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB8888_REV:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB888:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB565:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB565_REV:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB4444:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_4_4_4_4,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB4444_REV:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_4_4_4_4,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB1555:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_1_5_5_5,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB1555_REV:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_1_5_5_5,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_AL88:
+ case MESA_FORMAT_AL88_REV: /* TODO : Check this. */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB332:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_3_3_2,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_L8: /* X, X, X, ONE */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_I8: /* X, X, X, X */
+ case MESA_FORMAT_CI8:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ /* YUV422 TODO conversion */ /* X, Y, Z, ONE, G8R8_G8B8 */
+ /*
+ case MESA_FORMAT_YCBCR:
+ t->SQ_TEX_RESOURCE1.bitfields.DATA_FORMAT = ;
+ break;
+ */
+ /* VUY422 TODO conversion */ /* X, Y, Z, ONE, G8R8_G8B8 */
+ /*
+ case MESA_FORMAT_YCBCR_REV:
+ t->SQ_TEX_RESOURCE1.bitfields.DATA_FORMAT = ;
+ break;
+ */
+ case MESA_FORMAT_RGB_DXT1: /* not supported yet */
+
+ break;
+ case MESA_FORMAT_RGBA_DXT1: /* not supported yet */
+
+ break;
+ case MESA_FORMAT_RGBA_DXT3: /* not supported yet */
+
+ break;
+ case MESA_FORMAT_RGBA_DXT5: /* not supported yet */
+
+ break;
+ case MESA_FORMAT_RGBA_FLOAT32:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_32_32_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGBA_FLOAT16:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_16_16_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB_FLOAT32: /* X, Y, Z, ONE */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_32_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB_FLOAT16:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_16_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_Z16:
+ case MESA_FORMAT_Z24_S8:
+ case MESA_FORMAT_Z32:
+ switch (mesa_format) {
+ case MESA_FORMAT_Z16:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_16,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ break;
+ case MESA_FORMAT_Z24_S8:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_24_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ break;
+ case MESA_FORMAT_Z32:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_32,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ break;
+ };
+ switch (tObj->DepthMode) {
+ case GL_LUMINANCE: /* X, X, X, ONE */
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case GL_INTENSITY: /* X, X, X, X */
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case GL_ALPHA: /* ZERO, ZERO, ZERO, X */
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ default:
+ return GL_FALSE;
+ }
+ break;
+ default:
+ /* Not supported format */
+ return GL_FALSE;
+ };
+
+ return GL_TRUE;
+}
+
+void r600SetDepthTexMode(struct gl_texture_object *tObj)
+{
+ radeonTexObjPtr t;
+
+ if (!tObj)
+ return;
+
+ t = radeon_tex_obj(tObj);
+
+ r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat);
+
+}
+
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *texObj)
+{
+ radeonTexObj *t = radeon_tex_obj(texObj);
+ const struct gl_texture_image *firstImage;
+ int firstlevel = t->mt ? t->mt->firstLevel : 0;
+ GLuint uTexelPitch, row_align;
+
+ if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled &&
+ t->image_override &&
+ t->bo)
+ return;
+
+ firstImage = t->base.Image[0][firstlevel];
+
+ if (!t->image_override) {
+ if (!r600GetTexFormat(texObj, firstImage->TexFormat->MesaFormat)) {
+ radeon_error("unexpected texture format in %s\n",
+ __FUNCTION__);
+ return;
+ }
+ }
+
+ switch (texObj->Target) {
+ case GL_TEXTURE_1D:
+ SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_1D, DIM_shift, DIM_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, 0, TEX_DEPTH_shift, TEX_DEPTH_mask);
+ break;
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_RECTANGLE_NV:
+ SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_2D, DIM_shift, DIM_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, 0, TEX_DEPTH_shift, TEX_DEPTH_mask);
+ break;
+ case GL_TEXTURE_3D:
+ SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_3D, DIM_shift, DIM_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, firstImage->Depth - 1, // ???
+ TEX_DEPTH_shift, TEX_DEPTH_mask);
+ break;
+ case GL_TEXTURE_CUBE_MAP:
+ SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_CUBEMAP, DIM_shift, DIM_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, 0, TEX_DEPTH_shift, TEX_DEPTH_mask);
+ break;
+ default:
+ radeon_error("unexpected texture target type in %s\n", __FUNCTION__);
+ return;
+ }
+
+ row_align = rmesa->radeon.texture_row_align - 1;
+ uTexelPitch = ((firstImage->Width * t->mt->bpp + row_align) & ~row_align) / t->mt->bpp;
+ uTexelPitch = (uTexelPitch + R700_TEXEL_PITCH_ALIGNMENT_MASK)
+ & ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+ /* min pitch is 8 */
+ if (uTexelPitch < 8)
+ uTexelPitch = 8;
+
+ SETfield(t->SQ_TEX_RESOURCE0, (uTexelPitch/8)-1, PITCH_shift, PITCH_mask);
+ SETfield(t->SQ_TEX_RESOURCE0, firstImage->Width - 1,
+ TEX_WIDTH_shift, TEX_WIDTH_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, firstImage->Height - 1,
+ TEX_HEIGHT_shift, TEX_HEIGHT_mask);
+
+ if ((t->mt->lastLevel - t->mt->firstLevel) > 0) {
+ t->SQ_TEX_RESOURCE3 = t->mt->levels[0].size / 256;
+ SETfield(t->SQ_TEX_RESOURCE4, t->mt->firstLevel, BASE_LEVEL_shift, BASE_LEVEL_mask);
+ SETfield(t->SQ_TEX_RESOURCE5, t->mt->lastLevel, LAST_LEVEL_shift, LAST_LEVEL_mask);
+ }
+}
+
+/**
+ * Ensure the given texture is ready for rendering.
+ *
+ * Mostly this means populating the texture object's mipmap tree.
+ */
+static GLboolean r600_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+ context_t *rmesa = R700_CONTEXT(ctx);
+ radeonTexObj *t = radeon_tex_obj(texObj);
+
+ if (!radeon_validate_texture_miptree(ctx, texObj))
+ return GL_FALSE;
+
+ /* Configure the hardware registers (more precisely, the cached version
+ * of the hardware registers). */
+ setup_hardware_state(rmesa, texObj);
+
+ t->validated = GL_TRUE;
+ return GL_TRUE;
+}
+
+/**
+ * Ensure all enabled and complete textures are uploaded along with any buffers being used.
+ */
+GLboolean r600ValidateBuffers(GLcontext * ctx)
+{
+ context_t *rmesa = R700_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb;
+ struct radeon_bo *pbo;
+ int i;
+ int ret;
+
+ radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+
+ rrb = radeon_get_colorbuffer(&rmesa->radeon);
+ /* color buffer */
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ rrb->bo, 0,
+ RADEON_GEM_DOMAIN_VRAM);
+ }
+
+ /* depth buffer */
+ rrb = radeon_get_depthbuffer(&rmesa->radeon);
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ rrb->bo, 0,
+ RADEON_GEM_DOMAIN_VRAM);
+ }
+
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+ radeonTexObj *t;
+
+ if (!ctx->Texture.Unit[i]._ReallyEnabled)
+ continue;
+
+ if (!r600_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) {
+ radeon_warning("failed to validate texture for unit %d.\n", i);
+ }
+ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+ if (t->image_override && t->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ t->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+ else if (t->mt->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ t->mt->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+ }
+
+ pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(ctx);
+ if (pbo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+
+ pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(ctx);
+ if (pbo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+ return GL_TRUE;
+}
+
+void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth, GLuint pitch)
+{
+ context_t *rmesa = pDRICtx->driverPrivate;
+ struct gl_texture_object *tObj =
+ _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+ radeonTexObjPtr t = radeon_tex_obj(tObj);
+ uint32_t pitch_val, size;
+
+ if (!tObj)
+ return;
+
+ t->image_override = GL_TRUE;
+
+ if (!offset)
+ return;
+
+ size = pitch;//h * w * (depth / 8);
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+ t->bo = radeon_legacy_bo_alloc_fake(rmesa->radeon.radeonScreen->bom, size, offset);
+ t->override_offset = offset;
+ pitch_val = pitch;
+ switch (depth) {
+ case 32:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ pitch_val /= 4;
+ break;
+ case 24:
+ default:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ pitch_val /= 4;
+ break;
+ case 16:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ pitch_val /= 2;
+ break;
+ }
+
+ pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK)
+ & ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+ /* min pitch is 8 */
+ if (pitch_val < 8)
+ pitch_val = 8;
+
+ SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1, PITCH_shift, PITCH_mask);
+}
+
+void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv)
+{
+ struct gl_texture_unit *texUnit;
+ struct gl_texture_object *texObj;
+ struct gl_texture_image *texImage;
+ struct radeon_renderbuffer *rb;
+ radeon_texture_image *rImage;
+ radeonContextPtr radeon;
+ context_t *rmesa;
+ struct radeon_framebuffer *rfb;
+ radeonTexObjPtr t;
+ uint32_t pitch_val;
+ uint32_t internalFormat, type, format;
+
+ type = GL_BGRA;
+ format = GL_UNSIGNED_BYTE;
+ internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4);
+
+ radeon = pDRICtx->driverPrivate;
+ rmesa = pDRICtx->driverPrivate;
+
+ rfb = dPriv->driverPrivate;
+ texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
+ texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
+ texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
+
+ rImage = get_radeon_texture_image(texImage);
+ t = radeon_tex_obj(texObj);
+ if (t == NULL) {
+ return;
+ }
+
+ radeon_update_renderbuffers(pDRICtx, dPriv);
+ /* back & depth buffer are useless free them right away */
+ rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
+ if (rb && rb->bo) {
+ radeon_bo_unref(rb->bo);
+ rb->bo = NULL;
+ }
+ rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+ if (rb && rb->bo) {
+ radeon_bo_unref(rb->bo);
+ rb->bo = NULL;
+ }
+ rb = rfb->color_rb[0];
+ if (rb->bo == NULL) {
+ /* Failed to BO for the buffer */
+ return;
+ }
+
+ _mesa_lock_texture(radeon->glCtx, texObj);
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+ if (rImage->bo) {
+ radeon_bo_unref(rImage->bo);
+ rImage->bo = NULL;
+ }
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = NULL;
+ }
+ if (rImage->mt) {
+ radeon_miptree_unreference(rImage->mt);
+ rImage->mt = NULL;
+ }
+ _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+ rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
+ texImage->RowStride = rb->pitch / rb->cpp;
+ texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
+ internalFormat,
+ type, format, 0);
+ rImage->bo = rb->bo;
+ radeon_bo_ref(rImage->bo);
+ t->bo = rb->bo;
+ radeon_bo_ref(t->bo);
+ t->image_override = GL_TRUE;
+ t->override_offset = 0;
+ pitch_val = rb->pitch;
+ switch (rb->cpp) {
+ case 4:
+ if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) {
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ } else {
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ }
+ pitch_val /= 4;
+ break;
+ case 3:
+ default:
+ // FMT_8_8_8 ???
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ pitch_val /= 4;
+ break;
+ case 2:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ pitch_val /= 2;
+ break;
+ }
+
+ pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK)
+ & ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+ /* min pitch is 8 */
+ if (pitch_val < 8)
+ pitch_val = 8;
+
+ SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1, PITCH_shift, PITCH_mask);
+ SETfield(t->SQ_TEX_RESOURCE0, rb->base.Width - 1,
+ TEX_WIDTH_shift, TEX_WIDTH_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, rb->base.Height - 1,
+ TEX_HEIGHT_shift, TEX_HEIGHT_mask);
+
+ t->validated = GL_TRUE;
+ _mesa_unlock_texture(radeon->glCtx, texObj);
+ return;
+}
+
+void r600SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+ r600SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
+}
diff --git a/r600/r700_assembler.c b/r600/r700_assembler.c
new file mode 100644
index 0000000..00eda54
--- /dev/null
+++ b/r600/r700_assembler.c
@@ -0,0 +1,4334 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/mtypes.h"
+#include "main/imports.h"
+
+#include "radeon_debug.h"
+#include "r600_context.h"
+
+#include "r700_assembler.h"
+
+BITS addrmode_PVSDST(PVSDST * pPVSDST)
+{
+ return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
+}
+
+void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
+{
+ pPVSDST->addrmode0 = addrmode & 1;
+ pPVSDST->addrmode1 = (addrmode >> 1) & 1;
+}
+
+void nomask_PVSDST(PVSDST * pPVSDST)
+{
+ pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
+}
+
+BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
+{
+ return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
+}
+
+void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
+{
+ pPVSSRC->addrmode0 = addrmode & 1;
+ pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
+}
+
+
+void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
+{
+ pPVSSRC->swizzlex =
+ pPVSSRC->swizzley =
+ pPVSSRC->swizzlez =
+ pPVSSRC->swizzlew = swz;
+}
+
+void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
+{
+ pPVSSRC->swizzlex = SQ_SEL_X;
+ pPVSSRC->swizzley = SQ_SEL_Y;
+ pPVSSRC->swizzlez = SQ_SEL_Z;
+ pPVSSRC->swizzlew = SQ_SEL_W;
+}
+
+void
+swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
+{
+ switch (x)
+ {
+ case SQ_SEL_X: x = pPVSSRC->swizzlex;
+ break;
+ case SQ_SEL_Y: x = pPVSSRC->swizzley;
+ break;
+ case SQ_SEL_Z: x = pPVSSRC->swizzlez;
+ break;
+ case SQ_SEL_W: x = pPVSSRC->swizzlew;
+ break;
+ default:;
+ }
+
+ switch (y)
+ {
+ case SQ_SEL_X: y = pPVSSRC->swizzlex;
+ break;
+ case SQ_SEL_Y: y = pPVSSRC->swizzley;
+ break;
+ case SQ_SEL_Z: y = pPVSSRC->swizzlez;
+ break;
+ case SQ_SEL_W: y = pPVSSRC->swizzlew;
+ break;
+ default:;
+ }
+
+ switch (z)
+ {
+ case SQ_SEL_X: z = pPVSSRC->swizzlex;
+ break;
+ case SQ_SEL_Y: z = pPVSSRC->swizzley;
+ break;
+ case SQ_SEL_Z: z = pPVSSRC->swizzlez;
+ break;
+ case SQ_SEL_W: z = pPVSSRC->swizzlew;
+ break;
+ default:;
+ }
+
+ switch (w)
+ {
+ case SQ_SEL_X: w = pPVSSRC->swizzlex;
+ break;
+ case SQ_SEL_Y: w = pPVSSRC->swizzley;
+ break;
+ case SQ_SEL_Z: w = pPVSSRC->swizzlez;
+ break;
+ case SQ_SEL_W: w = pPVSSRC->swizzlew;
+ break;
+ default:;
+ }
+
+ pPVSSRC->swizzlex = x;
+ pPVSSRC->swizzley = y;
+ pPVSSRC->swizzlez = z;
+ pPVSSRC->swizzlew = w;
+}
+
+void neg_PVSSRC(PVSSRC* pPVSSRC)
+{
+ pPVSSRC->negx = 1;
+ pPVSSRC->negy = 1;
+ pPVSSRC->negz = 1;
+ pPVSSRC->negw = 1;
+}
+
+void noneg_PVSSRC(PVSSRC* pPVSSRC)
+{
+ pPVSSRC->negx = 0;
+ pPVSSRC->negy = 0;
+ pPVSSRC->negz = 0;
+ pPVSSRC->negw = 0;
+}
+
+// negate argument (for SUB instead of ADD and alike)
+void flipneg_PVSSRC(PVSSRC* pPVSSRC)
+{
+ pPVSSRC->negx = !pPVSSRC->negx;
+ pPVSSRC->negy = !pPVSSRC->negy;
+ pPVSSRC->negz = !pPVSSRC->negz;
+ pPVSSRC->negw = !pPVSSRC->negw;
+}
+
+void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
+{
+ switch (c)
+ {
+ case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
+ case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
+ case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
+ case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
+ default:;
+ }
+}
+
+void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
+{
+ switch (c)
+ {
+ case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
+ case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
+ case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
+ case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
+ default:;
+ }
+}
+
+BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
+{
+ return (pOutVTXFmt0->point_size |
+ pOutVTXFmt0->edge_flag |
+ pOutVTXFmt0->rta_index |
+ pOutVTXFmt0->kill_flag |
+ pOutVTXFmt0->viewport_index);
+}
+
+BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
+{
+ return (pFPOutFmt->depth |
+ pFPOutFmt->stencil_ref |
+ pFPOutFmt->mask |
+ pFPOutFmt->coverage_to_mask);
+}
+
+GLboolean is_reduction_opcode(PVSDWORD* dest)
+{
+ if (dest->dst.op3 == 0)
+ {
+ if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
+ {
+ return GL_TRUE;
+ }
+ }
+ return GL_FALSE;
+}
+
+GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
+{
+ GLuint format = FMT_INVALID;
+ GLuint uiElemSize = 0;
+
+ switch (eType)
+ {
+ case GL_BYTE:
+ case GL_UNSIGNED_BYTE:
+ uiElemSize = 1;
+ switch(nChannels)
+ {
+ case 1:
+ format = FMT_8; break;
+ case 2:
+ format = FMT_8_8; break;
+ case 3:
+ format = FMT_8_8_8; break;
+ case 4:
+ format = FMT_8_8_8_8; break;
+ default:
+ break;
+ }
+ break;
+
+ case GL_UNSIGNED_SHORT:
+ case GL_SHORT:
+ uiElemSize = 2;
+ switch(nChannels)
+ {
+ case 1:
+ format = FMT_16; break;
+ case 2:
+ format = FMT_16_16; break;
+ case 3:
+ format = FMT_16_16_16; break;
+ case 4:
+ format = FMT_16_16_16_16; break;
+ default:
+ break;
+ }
+ break;
+
+ case GL_UNSIGNED_INT:
+ case GL_INT:
+ uiElemSize = 4;
+ switch(nChannels)
+ {
+ case 1:
+ format = FMT_32; break;
+ case 2:
+ format = FMT_32_32; break;
+ case 3:
+ format = FMT_32_32_32; break;
+ case 4:
+ format = FMT_32_32_32_32; break;
+ default:
+ break;
+ }
+ break;
+
+ case GL_FLOAT:
+ uiElemSize = 4;
+ switch(nChannels)
+ {
+ case 1:
+ format = FMT_32_FLOAT; break;
+ case 2:
+ format = FMT_32_32_FLOAT; break;
+ case 3:
+ format = FMT_32_32_32_FLOAT; break;
+ case 4:
+ format = FMT_32_32_32_32_FLOAT; break;
+ default:
+ break;
+ }
+ break;
+ case GL_DOUBLE:
+ uiElemSize = 8;
+ switch(nChannels)
+ {
+ case 1:
+ format = FMT_32_FLOAT; break;
+ case 2:
+ format = FMT_32_32_FLOAT; break;
+ case 3:
+ format = FMT_32_32_32_FLOAT; break;
+ case 4:
+ format = FMT_32_32_32_32_FLOAT; break;
+ default:
+ break;
+ }
+ break;
+ default:
+ ;
+ //GL_ASSERT_NO_CASE();
+ }
+
+ if(NULL != pClient_size)
+ {
+ *pClient_size = uiElemSize * nChannels;
+ }
+
+ return(format);
+}
+
+unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
+{
+ if(pAsm->D.dst.op3)
+ {
+ return 3;
+ }
+
+ switch (pAsm->D.dst.opcode)
+ {
+ case SQ_OP2_INST_ADD:
+ case SQ_OP2_INST_MUL:
+ case SQ_OP2_INST_MAX:
+ case SQ_OP2_INST_MIN:
+ //case SQ_OP2_INST_MAX_DX10:
+ //case SQ_OP2_INST_MIN_DX10:
+ case SQ_OP2_INST_SETGT:
+ case SQ_OP2_INST_SETGE:
+ case SQ_OP2_INST_PRED_SETE:
+ case SQ_OP2_INST_PRED_SETGT:
+ case SQ_OP2_INST_PRED_SETGE:
+ case SQ_OP2_INST_PRED_SETNE:
+ case SQ_OP2_INST_DOT4:
+ case SQ_OP2_INST_DOT4_IEEE:
+ case SQ_OP2_INST_CUBE:
+ return 2;
+
+ case SQ_OP2_INST_MOV:
+ case SQ_OP2_INST_FRACT:
+ case SQ_OP2_INST_FLOOR:
+ case SQ_OP2_INST_KILLGT:
+ case SQ_OP2_INST_EXP_IEEE:
+ case SQ_OP2_INST_LOG_CLAMPED:
+ case SQ_OP2_INST_LOG_IEEE:
+ case SQ_OP2_INST_RECIP_IEEE:
+ case SQ_OP2_INST_RECIPSQRT_IEEE:
+ case SQ_OP2_INST_FLT_TO_INT:
+ case SQ_OP2_INST_SIN:
+ case SQ_OP2_INST_COS:
+ return 1;
+
+ default: radeon_error(
+ "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
+ };
+
+ return 3;
+}
+
+int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
+{
+ GLuint i;
+
+ Init_R700_Shader(pShader);
+ pAsm->pR700Shader = pShader;
+ pAsm->currentShaderType = spt;
+
+ pAsm->cf_last_export_ptr = NULL;
+
+ pAsm->cf_current_export_clause_ptr = NULL;
+ pAsm->cf_current_alu_clause_ptr = NULL;
+ pAsm->cf_current_tex_clause_ptr = NULL;
+ pAsm->cf_current_vtx_clause_ptr = NULL;
+ pAsm->cf_current_cf_clause_ptr = NULL;
+
+ // No clause has been created yet
+ pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
+
+ pAsm->number_of_colorandz_exports = 0;
+ pAsm->number_of_exports = 0;
+ pAsm->number_of_export_opcodes = 0;
+
+
+ pAsm->D.bits = 0;
+ pAsm->S[0].bits = 0;
+ pAsm->S[1].bits = 0;
+ pAsm->S[2].bits = 0;
+
+ pAsm->uLastPosUpdate = 0;
+
+ *(BITS *) &pAsm->fp_stOutFmt0 = 0;
+
+ pAsm->uIIns = 0;
+ pAsm->uOIns = 0;
+ pAsm->number_used_registers = 0;
+ pAsm->uUsedConsts = 256;
+
+
+ // Fragment programs
+ pAsm->uBoolConsts = 0;
+ pAsm->uIntConsts = 0;
+ pAsm->uInsts = 0;
+ pAsm->uConsts = 0;
+
+ pAsm->FCSP = 0;
+ pAsm->fc_stack[0].type = FC_NONE;
+
+ pAsm->branch_depth = 0;
+ pAsm->max_branch_depth = 0;
+
+ pAsm->aArgSubst[0] =
+ pAsm->aArgSubst[1] =
+ pAsm->aArgSubst[2] =
+ pAsm->aArgSubst[3] = (-1);
+
+ pAsm->uOutputs = 0;
+
+ for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
+ {
+ pAsm->color_export_register_number[i] = (-1);
+ }
+
+
+ pAsm->depth_export_register_number = (-1);
+ pAsm->stencil_export_register_number = (-1);
+ pAsm->coverage_to_mask_export_register_number = (-1);
+ pAsm->mask_export_register_number = (-1);
+
+ pAsm->starting_export_register_number = 0;
+ pAsm->starting_vfetch_register_number = 0;
+ pAsm->starting_temp_register_number = 0;
+ pAsm->uFirstHelpReg = 0;
+
+
+ pAsm->input_position_is_used = GL_FALSE;
+ pAsm->input_normal_is_used = GL_FALSE;
+
+
+ for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
+ {
+ pAsm->input_color_is_used[ i ] = GL_FALSE;
+ }
+
+ for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
+ {
+ pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
+ }
+
+ for (i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
+ }
+
+ pAsm->number_of_inputs = 0;
+
+ pAsm->is_tex = GL_FALSE;
+ pAsm->need_tex_barrier = GL_FALSE;
+
+ return 0;
+}
+
+GLboolean IsTex(gl_inst_opcode Opcode)
+{
+ if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
+ {
+ return GL_TRUE;
+ }
+ return GL_FALSE;
+}
+
+GLboolean IsAlu(gl_inst_opcode Opcode)
+{
+ //TODO : more for fc and ex for higher spec.
+ if( IsTex(Opcode) )
+ {
+ return GL_FALSE;
+ }
+ return GL_TRUE;
+}
+
+int check_current_clause(r700_AssemblerBase* pAsm,
+ CF_CLAUSE_TYPE new_clause_type)
+{
+ if (pAsm->cf_current_clause_type != new_clause_type)
+ { //Close last open clause
+ switch (pAsm->cf_current_clause_type)
+ {
+ case CF_ALU_CLAUSE:
+ if ( pAsm->cf_current_alu_clause_ptr != NULL)
+ {
+ pAsm->cf_current_alu_clause_ptr = NULL;
+ }
+ break;
+ case CF_VTX_CLAUSE:
+ if ( pAsm->cf_current_vtx_clause_ptr != NULL)
+ {
+ pAsm->cf_current_vtx_clause_ptr = NULL;
+ }
+ break;
+ case CF_TEX_CLAUSE:
+ if ( pAsm->cf_current_tex_clause_ptr != NULL)
+ {
+ pAsm->cf_current_tex_clause_ptr = NULL;
+ }
+ break;
+ case CF_EXPORT_CLAUSE:
+ if ( pAsm->cf_current_export_clause_ptr != NULL)
+ {
+ pAsm->cf_current_export_clause_ptr = NULL;
+ }
+ break;
+ case CF_OTHER_CLAUSE:
+ if ( pAsm->cf_current_cf_clause_ptr != NULL)
+ {
+ pAsm->cf_current_cf_clause_ptr = NULL;
+ }
+ break;
+ case CF_EMPTY_CLAUSE:
+ break;
+ default:
+ radeon_error(
+ "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
+
+ // Create new clause
+ switch (new_clause_type)
+ {
+ case CF_ALU_CLAUSE:
+ pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
+ break;
+ case CF_VTX_CLAUSE:
+ pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
+ break;
+ case CF_TEX_CLAUSE:
+ pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
+ break;
+ case CF_EXPORT_CLAUSE:
+ {
+ R700ControlFlowSXClause* pR700ControlFlowSXClause
+ = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
+
+ // Add new export instruction to control flow program
+ if (pR700ControlFlowSXClause != 0)
+ {
+ pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
+ Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
+ AddCFInstruction( pAsm->pR700Shader,
+ (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
+ }
+ else
+ {
+ radeon_error(
+ "Error allocating new EXPORT CF instruction in check_current_clause. \n");
+ return GL_FALSE;
+ }
+ pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
+ }
+ break;
+ case CF_EMPTY_CLAUSE:
+ break;
+ case CF_OTHER_CLAUSE:
+ pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
+ break;
+ default:
+ radeon_error(
+ "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
+ R700VertexInstruction* vertex_instruction_ptr)
+{
+ if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
+ {
+ return GL_FALSE;
+ }
+
+ if( pAsm->cf_current_vtx_clause_ptr == NULL ||
+ ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
+ (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
+ ) )
+ {
+ // Create new Vfetch control flow instruction for this new clause
+ pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
+
+ if (pAsm->cf_current_vtx_clause_ptr != NULL)
+ {
+ Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
+ AddCFInstruction( pAsm->pR700Shader,
+ (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
+ }
+ else
+ {
+ radeon_error("Could not allocate a new VFetch CF instruction.\n");
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
+ }
+ else
+ {
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
+ }
+
+ AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
+
+ return GL_TRUE;
+}
+
+GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
+ R700TextureInstruction* tex_instruction_ptr)
+{
+ if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( pAsm->cf_current_tex_clause_ptr == NULL ||
+ ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
+ (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
+ ) )
+ {
+ // new tex cf instruction for this new clause
+ pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
+
+ if (pAsm->cf_current_tex_clause_ptr != NULL)
+ {
+ Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
+ AddCFInstruction( pAsm->pR700Shader,
+ (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
+ }
+ else
+ {
+ radeon_error("Could not allocate a new TEX CF instruction.\n");
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
+ }
+ else
+ {
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
+ }
+
+ // If this clause constains any TEX instruction that is dependent on a previous instruction,
+ // set the barrier bit
+ if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
+ {
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
+
+ if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
+ {
+ pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
+ tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
+ }
+
+ AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
+ GLuint gl_client_id,
+ GLuint destination_register,
+ GLuint number_of_elements,
+ GLenum dataElementType,
+ VTX_FETCH_METHOD* pFetchMethod)
+{
+ GLuint client_size_inbyte;
+ GLuint data_format;
+ GLuint mega_fetch_count;
+ GLuint is_mega_fetch_flag;
+
+ R700VertexGenericFetch* vfetch_instruction_ptr;
+ R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
+
+ if (assembled_vfetch_instruction_ptr == NULL)
+ {
+ vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
+ if (vfetch_instruction_ptr == NULL)
+ {
+ return GL_FALSE;
+ }
+ Init_R700VertexGenericFetch(vfetch_instruction_ptr);
+ }
+ else
+ {
+ vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
+ }
+
+ data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
+
+ if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
+ {
+ //TODO : mini fetch
+ }
+ else
+ {
+ mega_fetch_count = MEGA_FETCH_BYTES - 1;
+ is_mega_fetch_flag = 0x1;
+ pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
+ }
+
+ vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
+ vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
+ vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+
+ vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
+ vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
+ vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
+ vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
+
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
+
+ vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
+
+ // Destination register
+ vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
+ vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
+
+ vfetch_instruction_ptr->m_Word2.f.offset = 0;
+ vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
+
+ vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
+
+ if (assembled_vfetch_instruction_ptr == NULL)
+ {
+ if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
+ {
+ return GL_FALSE;
+ }
+
+ if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
+ {
+ return GL_FALSE;
+ }
+ else
+ {
+ pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLuint gethelpr(r700_AssemblerBase* pAsm)
+{
+ GLuint r = pAsm->uHelpReg;
+ pAsm->uHelpReg++;
+ if (pAsm->uHelpReg > pAsm->number_used_registers)
+ {
+ pAsm->number_used_registers = pAsm->uHelpReg;
+ }
+ return r;
+}
+void resethelpr(r700_AssemblerBase* pAsm)
+{
+ pAsm->uHelpReg = pAsm->uFirstHelpReg;
+}
+
+void checkop_init(r700_AssemblerBase* pAsm)
+{
+ resethelpr(pAsm);
+ pAsm->aArgSubst[0] =
+ pAsm->aArgSubst[1] =
+ pAsm->aArgSubst[2] =
+ pAsm->aArgSubst[3] = -1;
+}
+
+GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
+{
+ GLuint tmp = gethelpr(pAsm);
+
+ //mov src to temp helper gpr.
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ nomask_PVSDST(&(pAsm->D.dst));
+
+ if( GL_FALSE == assemble_src(pAsm, src, 0) )
+ {
+ return GL_FALSE;
+ }
+
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->aArgSubst[1 + src] = tmp;
+
+ return GL_TRUE;
+}
+
+GLboolean checkop1(r700_AssemblerBase* pAsm)
+{
+ checkop_init(pAsm);
+ return GL_TRUE;
+}
+
+GLboolean checkop2(r700_AssemblerBase* pAsm)
+{
+ GLboolean bSrcConst[2];
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ checkop_init(pAsm);
+
+ if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
+ (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
+ (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
+ (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
+ {
+ bSrcConst[0] = GL_TRUE;
+ }
+ else
+ {
+ bSrcConst[0] = GL_FALSE;
+ }
+ if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
+ (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
+ (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
+ (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
+ {
+ bSrcConst[1] = GL_TRUE;
+ }
+ else
+ {
+ bSrcConst[1] = GL_FALSE;
+ }
+
+ if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
+ {
+ if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
+ {
+ if( GL_FALSE == mov_temp(pAsm, 1) )
+ {
+ return GL_FALSE;
+ }
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean checkop3(r700_AssemblerBase* pAsm)
+{
+ GLboolean bSrcConst[3];
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ checkop_init(pAsm);
+
+ if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
+ (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
+ (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
+ (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
+ {
+ bSrcConst[0] = GL_TRUE;
+ }
+ else
+ {
+ bSrcConst[0] = GL_FALSE;
+ }
+ if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
+ (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
+ (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
+ (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
+ {
+ bSrcConst[1] = GL_TRUE;
+ }
+ else
+ {
+ bSrcConst[1] = GL_FALSE;
+ }
+ if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
+ (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
+ (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
+ (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
+ {
+ bSrcConst[2] = GL_TRUE;
+ }
+ else
+ {
+ bSrcConst[2] = GL_FALSE;
+ }
+
+ if( (GL_TRUE == bSrcConst[0]) &&
+ (GL_TRUE == bSrcConst[1]) &&
+ (GL_TRUE == bSrcConst[2]) )
+ {
+ if( GL_FALSE == mov_temp(pAsm, 1) )
+ {
+ return GL_FALSE;
+ }
+ if( GL_FALSE == mov_temp(pAsm, 2) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+ }
+ else if( (GL_TRUE == bSrcConst[0]) &&
+ (GL_TRUE == bSrcConst[1]) )
+ {
+ if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
+ {
+ if( GL_FALSE == mov_temp(pAsm, 1) )
+ {
+ return 1;
+ }
+ }
+
+ return GL_TRUE;
+ }
+ else if ( (GL_TRUE == bSrcConst[0]) &&
+ (GL_TRUE == bSrcConst[2]) )
+ {
+ if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
+ {
+ if( GL_FALSE == mov_temp(pAsm, 2) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+ }
+ else if( (GL_TRUE == bSrcConst[1]) &&
+ (GL_TRUE == bSrcConst[2]) )
+ {
+ if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
+ {
+ if( GL_FALSE == mov_temp(pAsm, 2) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_src(r700_AssemblerBase *pAsm,
+ int src,
+ int fld)
+{
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ if (fld == -1)
+ {
+ fld = src;
+ }
+
+ if(pAsm->aArgSubst[1+src] >= 0)
+ {
+ setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+ pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
+ }
+ else
+ {
+ switch (pILInst->SrcReg[src].File)
+ {
+ case PROGRAM_TEMPORARY:
+ setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+ pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
+ break;
+ case PROGRAM_CONSTANT:
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_STATE_VAR:
+ if (1 == pILInst->SrcReg[src].RelAddr)
+ {
+ setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
+ }
+ else
+ {
+ setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+ }
+
+ pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
+ pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
+ break;
+ case PROGRAM_INPUT:
+ setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+ pAsm->S[fld].src.rtype = SRC_REG_INPUT;
+ switch (pAsm->currentShaderType)
+ {
+ case SPT_FP:
+ pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
+ break;
+ case SPT_VP:
+ pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
+ break;
+ }
+ break;
+ default:
+ radeon_error("Invalid source argument type\n");
+ return GL_FALSE;
+ }
+ }
+
+ pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
+ pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
+ pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
+ pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
+
+ pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
+ pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
+ pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
+ pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_dst(r700_AssemblerBase *pAsm)
+{
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+ switch (pILInst->DstReg.File)
+ {
+ case PROGRAM_TEMPORARY:
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
+ break;
+ case PROGRAM_ADDRESS:
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_A0;
+ pAsm->D.dst.reg = 0;
+ break;
+ case PROGRAM_OUTPUT:
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_OUT;
+ switch (pAsm->currentShaderType)
+ {
+ case SPT_FP:
+ pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
+ break;
+ case SPT_VP:
+ pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
+ break;
+ }
+ break;
+ default:
+ radeon_error("Invalid destination output argument type\n");
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
+ pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
+ pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
+ pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
+
+ return GL_TRUE;
+}
+
+GLboolean tex_dst(r700_AssemblerBase *pAsm)
+{
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
+ {
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ }
+ else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
+ {
+ pAsm->D.dst.rtype = DST_REG_OUT;
+ switch (pAsm->currentShaderType)
+ {
+ case SPT_FP:
+ pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
+ break;
+ case SPT_VP:
+ pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
+ break;
+ }
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ }
+ else
+ {
+ radeon_error("Invalid destination output argument type\n");
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
+ pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
+ pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
+ pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
+
+ return GL_TRUE;
+}
+
+GLboolean tex_src(r700_AssemblerBase *pAsm)
+{
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ GLboolean bValidTexCoord = GL_FALSE;
+
+ if(pAsm->aArgSubst[1] >= 0)
+ {
+ bValidTexCoord = GL_TRUE;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pAsm->aArgSubst[1];
+ }
+ else
+ {
+ switch (pILInst->SrcReg[0].File) {
+ case PROGRAM_CONSTANT:
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_STATE_VAR:
+ break;
+ case PROGRAM_TEMPORARY:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
+ pAsm->starting_temp_register_number;
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ break;
+ case PROGRAM_INPUT:
+ switch (pILInst->SrcReg[0].Index)
+ {
+ case FRAG_ATTRIB_COL0:
+ case FRAG_ATTRIB_COL1:
+ case FRAG_ATTRIB_TEX0:
+ case FRAG_ATTRIB_TEX1:
+ case FRAG_ATTRIB_TEX2:
+ case FRAG_ATTRIB_TEX3:
+ case FRAG_ATTRIB_TEX4:
+ case FRAG_ATTRIB_TEX5:
+ case FRAG_ATTRIB_TEX6:
+ case FRAG_ATTRIB_TEX7:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ break;
+ }
+ break;
+ }
+ }
+
+ if(GL_TRUE == bValidTexCoord)
+ {
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ }
+ else
+ {
+ radeon_error("Invalid source texcoord for TEX instruction\n");
+ return GL_FALSE;
+ }
+
+ pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
+ pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
+ pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
+ pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
+
+ pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
+ pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
+ pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
+ pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
+{
+ PVSSRC * texture_coordinate_source;
+ PVSSRC * texture_unit_source;
+
+ R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
+ if (tex_instruction_ptr == NULL)
+ {
+ return GL_FALSE;
+ }
+ Init_R700TextureInstruction(tex_instruction_ptr);
+
+ texture_coordinate_source = &(pAsm->S[0].src);
+ texture_unit_source = &(pAsm->S[1].src);
+
+ tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
+ tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
+ tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+
+ tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
+ if (normalized) {
+ tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
+ tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
+ tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
+ tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
+ } else {
+ /* XXX: UNNORMALIZED tex coords have limited wrap modes */
+ tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
+ tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
+ tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
+ tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
+ }
+
+ tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
+ tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
+ tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
+
+ tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
+
+ // dst
+ if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
+ (pAsm->D.dst.rtype == DST_REG_OUT) )
+ {
+ tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
+ tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
+
+ tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
+ tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
+
+ tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
+ tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
+ tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
+ tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
+
+
+ tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
+ tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
+ tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
+ tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
+ }
+ else
+ {
+ radeon_error("Only temp destination registers supported for TEX dest regs.\n");
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+void initialize(r700_AssemblerBase *pAsm)
+{
+ GLuint cycle, component;
+
+ for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
+ {
+ for (component=0; component<NUMBER_OF_COMPONENTS; component++)
+ {
+ pAsm->hw_gpr[cycle][component] = (-1);
+ }
+ }
+ for (component=0; component<NUMBER_OF_COMPONENTS; component++)
+ {
+ pAsm->hw_cfile_addr[component] = (-1);
+ pAsm->hw_cfile_chan[component] = (-1);
+ }
+}
+
+GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
+ int source_index,
+ PVSSRC* pSource,
+ BITS scalar_channel_index)
+{
+ BITS src_sel;
+ BITS src_rel;
+ BITS src_chan;
+ BITS src_neg;
+
+ //--------------------------------------------------------------------------
+ // Source for operands src0, src1.
+ // Values [0,127] correspond to GPR[0..127].
+ // Values [256,511] correspond to cfile constants c[0..255].
+
+ //--------------------------------------------------------------------------
+ // Other special values are shown in the list below.
+
+ // 248 SQ_ALU_SRC_0: special constant 0.0.
+ // 249 SQ_ALU_SRC_1: special constant 1.0 float.
+
+ // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
+ // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
+
+ // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
+ // 253 SQ_ALU_SRC_LITERAL: literal constant.
+
+ // 254 SQ_ALU_SRC_PV: previous vector result.
+ // 255 SQ_ALU_SRC_PS: previous scalar result.
+ //--------------------------------------------------------------------------
+
+ BITS channel_swizzle;
+ switch (scalar_channel_index)
+ {
+ case 0: channel_swizzle = pSource->swizzlex; break;
+ case 1: channel_swizzle = pSource->swizzley; break;
+ case 2: channel_swizzle = pSource->swizzlez; break;
+ case 3: channel_swizzle = pSource->swizzlew; break;
+ default: channel_swizzle = SQ_SEL_MASK; break;
+ }
+
+ if(channel_swizzle == SQ_SEL_0)
+ {
+ src_sel = SQ_ALU_SRC_0;
+ }
+ else if (channel_swizzle == SQ_SEL_1)
+ {
+ src_sel = SQ_ALU_SRC_1;
+ }
+ else
+ {
+ if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
+ (pSource->rtype == SRC_REG_INPUT)
+ )
+ {
+ src_sel = pSource->reg;
+ }
+ else if (pSource->rtype == SRC_REG_CONSTANT)
+ {
+ src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
+ }
+ else
+ {
+ radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
+ source_index, pSource->rtype);
+ return GL_FALSE;
+ }
+ }
+
+ if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
+ {
+ src_rel = SQ_ABSOLUTE;
+ }
+ else
+ {
+ src_rel = SQ_RELATIVE;
+ }
+
+ switch (channel_swizzle)
+ {
+ case SQ_SEL_X:
+ src_chan = SQ_CHAN_X;
+ break;
+ case SQ_SEL_Y:
+ src_chan = SQ_CHAN_Y;
+ break;
+ case SQ_SEL_Z:
+ src_chan = SQ_CHAN_Z;
+ break;
+ case SQ_SEL_W:
+ src_chan = SQ_CHAN_W;
+ break;
+ case SQ_SEL_0:
+ case SQ_SEL_1:
+ // Does not matter since src_sel controls
+ src_chan = SQ_CHAN_X;
+ break;
+ default:
+ radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
+ return GL_FALSE;
+ break;
+ }
+
+ switch (scalar_channel_index)
+ {
+ case 0: src_neg = pSource->negx; break;
+ case 1: src_neg = pSource->negy; break;
+ case 2: src_neg = pSource->negz; break;
+ case 3: src_neg = pSource->negw; break;
+ default: src_neg = 0; break;
+ }
+
+ switch (source_index)
+ {
+ case 0:
+ alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
+ alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
+ alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
+ alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
+ break;
+ case 1:
+ alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
+ alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
+ alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
+ alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
+ break;
+ case 2:
+ alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
+ alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
+ alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
+ alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
+ break;
+ default:
+ radeon_error("Only three sources allowed in ALU opcodes.\n");
+ return GL_FALSE;
+ break;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
+ R700ALUInstruction* alu_instruction_ptr,
+ GLuint contiguous_slots_needed)
+{
+ if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( pAsm->cf_current_alu_clause_ptr == NULL ||
+ ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
+ (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
+ ) )
+ {
+
+ //new cf inst for this clause
+ pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
+
+ // link the new cf to cf segment
+ if(NULL != pAsm->cf_current_alu_clause_ptr)
+ {
+ Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
+ AddCFInstruction( pAsm->pR700Shader,
+ (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
+ }
+ else
+ {
+ radeon_error("Could not allocate a new ALU CF instruction.\n");
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
+ pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
+ pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
+
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
+
+ //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
+
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
+ else
+ {
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
+ }
+
+ // If this clause constains any instruction that is forward dependent on a TEX instruction,
+ // set the whole_quad_mode for this clause
+ if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
+ {
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
+ }
+
+ if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
+ {
+ alu_instruction_ptr->m_Word0.f.last = 1;
+ }
+
+ if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
+ {
+ pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
+ alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
+ }
+
+ AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
+
+ return GL_TRUE;
+}
+
+void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
+ int source_index,
+ BITS* psrc_sel,
+ BITS* psrc_rel,
+ BITS* psrc_chan,
+ BITS* psrc_neg)
+{
+ switch (source_index)
+ {
+ case 0:
+ *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
+ *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
+ *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
+ *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
+ break;
+
+ case 1:
+ *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
+ *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
+ *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
+ *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
+ break;
+
+ case 2:
+ *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
+ *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
+ *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
+ *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
+ break;
+ }
+}
+
+int is_cfile(BITS sel)
+{
+ if (sel > 255 && sel < 512)
+ {
+ return 1;
+ }
+ return 0;
+}
+
+int is_const(BITS sel)
+{
+ if (is_cfile(sel))
+ {
+ return 1;
+ }
+ else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
+ {
+ return 1;
+ }
+ return 0;
+}
+
+int is_gpr(BITS sel)
+{
+ if (sel >= 0 && sel < 128)
+ {
+ return 1;
+ }
+ return 0;
+}
+
+const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
+ SQ_ALU_VEC_120, //001
+ SQ_ALU_VEC_102, //010
+
+ SQ_ALU_VEC_201, //011
+ SQ_ALU_VEC_012, //100
+ SQ_ALU_VEC_021, //101
+
+ SQ_ALU_VEC_012, //110
+ SQ_ALU_VEC_012}; //111
+
+const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
+ SQ_ALU_SCL_122, //001
+ SQ_ALU_SCL_122, //010
+
+ SQ_ALU_SCL_221, //011
+ SQ_ALU_SCL_212, //100
+ SQ_ALU_SCL_122, //101
+
+ SQ_ALU_SCL_122, //110
+ SQ_ALU_SCL_122}; //111
+
+GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
+ GLuint sel,
+ GLuint chan)
+{
+ int res_match = (-1);
+ int res_empty = (-1);
+
+ GLint res;
+
+ for (res=3; res>=0; res--)
+ {
+ if(pAsm->hw_cfile_addr[ res] < 0)
+ {
+ res_empty = res;
+ }
+ else if( (pAsm->hw_cfile_addr[res] == (int)sel)
+ &&
+ (pAsm->hw_cfile_chan[ res ] == (int) chan) )
+ {
+ res_match = res;
+ }
+ }
+
+ if(res_match >= 0)
+ {
+ // Read for this scalar component already reserved, nothing to do here.
+ ;
+ }
+ else if(res_empty >= 0)
+ {
+ pAsm->hw_cfile_addr[ res_empty ] = sel;
+ pAsm->hw_cfile_chan[ res_empty ] = chan;
+ }
+ else
+ {
+ radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
+ return GL_FALSE;
+ }
+ return GL_TRUE;
+}
+
+GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
+{
+ if(pAsm->hw_gpr[cycle][chan] < 0)
+ {
+ pAsm->hw_gpr[cycle][chan] = sel;
+ }
+ else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
+ {
+ radeon_error("Another scalar operation has already used GPR read port for given channel\n");
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
+{
+ switch (swiz)
+ {
+ case SQ_ALU_SCL_210:
+ {
+ int table[3] = {2, 1, 0};
+ *pCycle = table[sel];
+ return GL_TRUE;
+ }
+ break;
+ case SQ_ALU_SCL_122:
+ {
+ int table[3] = {1, 2, 2};
+ *pCycle = table[sel];
+ return GL_TRUE;
+ }
+ break;
+ case SQ_ALU_SCL_212:
+ {
+ int table[3] = {2, 1, 2};
+ *pCycle = table[sel];
+ return GL_TRUE;
+ }
+ break;
+ case SQ_ALU_SCL_221:
+ {
+ int table[3] = {2, 2, 1};
+ *pCycle = table[sel];
+ return GL_TRUE;
+ }
+ break;
+ default:
+ radeon_error("Bad Scalar bank swizzle value\n");
+ break;
+ }
+
+ return GL_FALSE;
+}
+
+GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
+{
+ switch (swiz)
+ {
+ case SQ_ALU_VEC_012:
+ {
+ int table[3] = {0, 1, 2};
+ *pCycle = table[sel];
+ }
+ break;
+ case SQ_ALU_VEC_021:
+ {
+ int table[3] = {0, 2, 1};
+ *pCycle = table[sel];
+ }
+ break;
+ case SQ_ALU_VEC_120:
+ {
+ int table[3] = {1, 2, 0};
+ *pCycle = table[sel];
+ }
+ break;
+ case SQ_ALU_VEC_102:
+ {
+ int table[3] = {1, 0, 2};
+ *pCycle = table[sel];
+ }
+ break;
+ case SQ_ALU_VEC_201:
+ {
+ int table[3] = {2, 0, 1};
+ *pCycle = table[sel];
+ }
+ break;
+ case SQ_ALU_VEC_210:
+ {
+ int table[3] = {2, 1, 0};
+ *pCycle = table[sel];
+ }
+ break;
+ default:
+ radeon_error("Bad Vec bank swizzle value\n");
+ return GL_FALSE;
+ break;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean check_scalar(r700_AssemblerBase* pAsm,
+ R700ALUInstruction* alu_instruction_ptr)
+{
+ GLuint cycle;
+ GLuint bank_swizzle;
+ GLuint const_count = 0;
+
+ BITS sel;
+ BITS chan;
+ BITS rel;
+ BITS neg;
+
+ GLuint src;
+
+ BITS src_sel [3] = {0,0,0};
+ BITS src_chan[3] = {0,0,0};
+ BITS src_rel [3] = {0,0,0};
+ BITS src_neg [3] = {0,0,0};
+
+ GLuint swizzle_key;
+
+ GLuint number_of_operands = r700GetNumOperands(pAsm);
+
+ for (src=0; src<number_of_operands; src++)
+ {
+ get_src_properties(alu_instruction_ptr,
+ src,
+ &(src_sel[src]),
+ &(src_rel[src]),
+ &(src_chan[src]),
+ &(src_neg[src]) );
+ }
+
+
+ swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
+ (is_const( src_sel[1] ) ? 2 : 0) +
+ (is_const( src_sel[2] ) ? 1 : 0) );
+
+ alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
+
+ for (src=0; src<number_of_operands; src++)
+ {
+ sel = src_sel [src];
+ chan = src_chan[src];
+ rel = src_rel [src];
+ neg = src_neg [src];
+
+ if (is_const( sel ))
+ {
+ // Any constant, including literal and inline constants
+ const_count++;
+
+ if (is_cfile( sel ))
+ {
+ reserve_cfile(pAsm, sel, chan);
+ }
+
+ }
+ }
+
+ for (src=0; src<number_of_operands; src++)
+ {
+ sel = src_sel [src];
+ chan = src_chan[src];
+ rel = src_rel [src];
+ neg = src_neg [src];
+
+ if( is_gpr(sel) )
+ {
+ bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
+
+ if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
+ {
+ return GL_FALSE;
+ }
+
+ if(cycle < const_count)
+ {
+ if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
+ {
+ return GL_FALSE;
+ }
+ }
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean check_vector(r700_AssemblerBase* pAsm,
+ R700ALUInstruction* alu_instruction_ptr)
+{
+ GLuint cycle;
+ GLuint bank_swizzle;
+ GLuint const_count = 0;
+
+ GLuint src;
+
+ BITS sel;
+ BITS chan;
+ BITS rel;
+ BITS neg;
+
+ BITS src_sel [3] = {0,0,0};
+ BITS src_chan[3] = {0,0,0};
+ BITS src_rel [3] = {0,0,0};
+ BITS src_neg [3] = {0,0,0};
+
+ GLuint swizzle_key;
+
+ GLuint number_of_operands = r700GetNumOperands(pAsm);
+
+ for (src=0; src<number_of_operands; src++)
+ {
+ get_src_properties(alu_instruction_ptr,
+ src,
+ &(src_sel[src]),
+ &(src_rel[src]),
+ &(src_chan[src]),
+ &(src_neg[src]) );
+ }
+
+
+ swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
+ (is_const( src_sel[1] ) ? 2 : 0) +
+ (is_const( src_sel[2] ) ? 1 : 0)
+ );
+
+ alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
+
+ for (src=0; src<number_of_operands; src++)
+ {
+ sel = src_sel [src];
+ chan = src_chan[src];
+ rel = src_rel [src];
+ neg = src_neg [src];
+
+
+ bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
+
+ if( is_gpr(sel) )
+ {
+ if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( (src == 1) &&
+ (sel == src_sel[0]) &&
+ (chan == src_chan[0]) )
+ {
+ }
+ else
+ {
+ if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
+ {
+ return GL_FALSE;
+ }
+ }
+ }
+ else if( is_const(sel) )
+ {
+ const_count++;
+
+ if( is_cfile(sel) )
+ {
+ if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
+ {
+ return GL_FALSE;
+ }
+ }
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
+{
+ GLuint number_of_scalar_operations;
+ GLboolean is_single_scalar_operation;
+ GLuint scalar_channel_index;
+
+ PVSSRC * pcurrent_source;
+ int current_source_index;
+ GLuint contiguous_slots_needed;
+
+ GLuint uNumSrc = r700GetNumOperands(pAsm);
+ GLuint channel_swizzle, j;
+ GLuint chan_counter[4] = {0, 0, 0, 0};
+ PVSSRC * pSource[3];
+ GLboolean bSplitInst = GL_FALSE;
+
+ if (1 == pAsm->D.dst.math)
+ {
+ is_single_scalar_operation = GL_TRUE;
+ number_of_scalar_operations = 1;
+ }
+ else
+ {
+ is_single_scalar_operation = GL_FALSE;
+ number_of_scalar_operations = 4;
+
+/* current assembler doesn't do more than 1 register per source */
+#if 0
+ /* check read port, only very preliminary algorithm, not count in
+ src0/1 same comp case and prev slot repeat case; also not count relative
+ addressing. TODO: improve performance. */
+ for(j=0; j<uNumSrc; j++)
+ {
+ pSource[j] = &(pAsm->S[j].src);
+ }
+ for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
+ {
+ for(j=0; j<uNumSrc; j++)
+ {
+ switch (scalar_channel_index)
+ {
+ case 0: channel_swizzle = pSource[j]->swizzlex; break;
+ case 1: channel_swizzle = pSource[j]->swizzley; break;
+ case 2: channel_swizzle = pSource[j]->swizzlez; break;
+ case 3: channel_swizzle = pSource[j]->swizzlew; break;
+ default: channel_swizzle = SQ_SEL_MASK; break;
+ }
+ if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
+ (pSource[j]->rtype == SRC_REG_INPUT))
+ && (channel_swizzle <= SQ_SEL_W) )
+ {
+ chan_counter[channel_swizzle]++;
+ }
+ }
+ }
+ if( (chan_counter[SQ_SEL_X] > 3)
+ || (chan_counter[SQ_SEL_Y] > 3)
+ || (chan_counter[SQ_SEL_Z] > 3)
+ || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
+ {
+ bSplitInst = GL_TRUE;
+ }
+#endif
+ }
+
+ contiguous_slots_needed = 0;
+
+ if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
+ {
+ contiguous_slots_needed = 4;
+ }
+
+ initialize(pAsm);
+
+ for (scalar_channel_index=0;
+ scalar_channel_index < number_of_scalar_operations;
+ scalar_channel_index++)
+ {
+ R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ if (alu_instruction_ptr == NULL)
+ {
+ return GL_FALSE;
+ }
+ Init_R700ALUInstruction(alu_instruction_ptr);
+
+ //src 0
+ current_source_index = 0;
+ pcurrent_source = &(pAsm->S[0].src);
+
+ if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+ current_source_index,
+ pcurrent_source,
+ scalar_channel_index) )
+ {
+ return GL_FALSE;
+ }
+
+ if (uNumSrc > 1)
+ {
+ // Process source 1
+ current_source_index = 1;
+ pcurrent_source = &(pAsm->S[current_source_index].src);
+
+ if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+ current_source_index,
+ pcurrent_source,
+ scalar_channel_index) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ //other bits
+ alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
+
+ if( (is_single_scalar_operation == GL_TRUE)
+ || (GL_TRUE == bSplitInst) )
+ {
+ alu_instruction_ptr->m_Word0.f.last = 1;
+ }
+ else
+ {
+ alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
+ }
+
+ alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+
+ // dst
+ if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
+ (pAsm->D.dst.rtype == DST_REG_OUT) )
+ {
+ alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
+ }
+ else
+ {
+ radeon_error("Only temp destination registers supported for ALU dest regs.\n");
+ return GL_FALSE;
+ }
+
+ alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
+
+ if ( is_single_scalar_operation == GL_TRUE )
+ {
+ // Override scalar_channel_index since only one scalar value will be written
+ if(pAsm->D.dst.writex)
+ {
+ scalar_channel_index = 0;
+ }
+ else if(pAsm->D.dst.writey)
+ {
+ scalar_channel_index = 1;
+ }
+ else if(pAsm->D.dst.writez)
+ {
+ scalar_channel_index = 2;
+ }
+ else if(pAsm->D.dst.writew)
+ {
+ scalar_channel_index = 3;
+ }
+ }
+
+ alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
+
+ alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
+
+ if (pAsm->D.dst.op3)
+ {
+ //op3
+
+ alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
+
+ //There's 3rd src for op3
+ current_source_index = 2;
+ pcurrent_source = &(pAsm->S[current_source_index].src);
+
+ if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+ current_source_index,
+ pcurrent_source,
+ scalar_channel_index) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ //op2
+ if (pAsm->bR6xx)
+ {
+ alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
+
+ alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
+
+ //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
+ //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
+ switch (scalar_channel_index)
+ {
+ case 0:
+ alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
+ break;
+ case 1:
+ alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
+ break;
+ case 2:
+ alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
+ break;
+ case 3:
+ alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
+ break;
+ default:
+ alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
+ break;
+ }
+ alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
+ }
+ else
+ {
+ alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
+
+ alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
+
+ //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+ //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
+ switch (scalar_channel_index)
+ {
+ case 0:
+ alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
+ break;
+ case 1:
+ alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
+ break;
+ case 2:
+ alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
+ break;
+ case 3:
+ alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
+ break;
+ default:
+ alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
+ break;
+ }
+ alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
+ }
+ }
+
+ if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
+ {
+ return GL_FALSE;
+ }
+
+ /*
+ * Judge the type of current instruction, is it vector or scalar
+ * instruction.
+ */
+ if (is_single_scalar_operation)
+ {
+ if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
+ {
+ return 1;
+ }
+ }
+
+ contiguous_slots_needed = 0;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean next_ins(r700_AssemblerBase *pAsm)
+{
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ if( GL_TRUE == pAsm->is_tex )
+ {
+ if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+ if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) )
+ {
+ radeon_error("Error assembling TEX instruction\n");
+ return GL_FALSE;
+ }
+ } else {
+ if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) )
+ {
+ radeon_error("Error assembling TEX instruction\n");
+ return GL_FALSE;
+ }
+ }
+ }
+ else
+ { //ALU
+ if( GL_FALSE == assemble_alu_instruction(pAsm) )
+ {
+ radeon_error("Error assembling ALU instruction\n");
+ return GL_FALSE;
+ }
+ }
+
+ if(pAsm->D.dst.rtype == DST_REG_OUT)
+ {
+ if(pAsm->D.dst.op3)
+ {
+ // There is no mask for OP3 instructions, so all channels are written
+ pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
+ }
+ else
+ {
+ pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
+ |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
+ }
+ }
+
+ //reset for next inst.
+ pAsm->D.bits = 0;
+ pAsm->S[0].bits = 0;
+ pAsm->S[1].bits = 0;
+ pAsm->S[2].bits = 0;
+ pAsm->is_tex = GL_FALSE;
+ pAsm->need_tex_barrier = GL_FALSE;
+ return GL_TRUE;
+}
+
+GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
+{
+ BITS tmp;
+
+ checkop1(pAsm);
+
+ tmp = gethelpr(pAsm);
+
+ // opcode tmp.x, a.x
+ // MOV dst, tmp.x
+
+ pAsm->D.dst.opcode = opcode;
+ pAsm->D.dst.math = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // Now replicate result to all necessary channels in destination
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
+{
+ checkop1(pAsm);
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->S[1].bits = pAsm->S[0].bits;
+ flipneg_PVSSRC(&(pAsm->S[1].src));
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
+ {
+ flipneg_PVSSRC(&(pAsm->S[1].src));
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_BAD(char *opcode_str)
+{
+ radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
+ return GL_FALSE;
+}
+
+GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
+{
+ int tmp;
+
+ if( GL_FALSE == checkop3(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
+ pAsm->D.dst.op3 = 1;
+
+ tmp = (-1);
+
+ if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
+ {
+ //OP3 has no support for write mask
+ tmp = gethelpr(pAsm);
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ nomask_PVSDST(&(pAsm->D.dst));
+ }
+ else
+ {
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 2, 1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, 2) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
+ {
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ //tmp for source
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_COS(r700_AssemblerBase *pAsm)
+{
+ return assemble_math_function(pAsm, SQ_OP2_INST_COS);
+}
+
+GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
+ {
+ zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
+ zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
+ }
+ else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
+ {
+ onecomp_PVSSRC(&(pAsm->S[1].src), 3);
+ }
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_DST(r700_AssemblerBase *pAsm)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ onecomp_PVSSRC(&(pAsm->S[0].src), 0);
+ onecomp_PVSSRC(&(pAsm->S[0].src), 3);
+
+ onecomp_PVSSRC(&(pAsm->S[1].src), 0);
+ onecomp_PVSSRC(&(pAsm->S[1].src), 2);
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
+{
+ return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
+}
+
+GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
+{
+ checkop1(pAsm);
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
+
+ if ( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
+{
+ return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
+}
+
+GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
+{
+ checkop1(pAsm);
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ if ( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
+{
+ checkop1(pAsm);
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
+
+ if ( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = 0;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = 0;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+
+ if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File)
+ {
+ pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
+ }
+ else
+ { //PROGRAM_OUTPUT
+ pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index];
+ }
+
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noswizzle_PVSSRC(&(pAsm->S[1].src));
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->pR700Shader->killIsUsed = GL_TRUE;
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
+{
+ return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
+}
+
+GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
+{
+ BITS tmp;
+
+ if( GL_FALSE == checkop3(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ tmp = gethelpr(pAsm);
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ nomask_PVSDST(&(pAsm->D.dst));
+
+
+ if( GL_FALSE == assemble_src(pAsm, 1, 0) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
+ {
+ return GL_FALSE;
+ }
+
+ neg_PVSSRC(&(pAsm->S[1].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ nomask_PVSDST(&(pAsm->D.dst));
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+
+ if( GL_FALSE == assemble_src(pAsm, 0, 1) )
+ {
+ return GL_FALSE;
+ }
+ if( GL_FALSE == assemble_src(pAsm, 2, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
+{
+ int tmp, ii;
+ GLboolean bReplaceDst = GL_FALSE;
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ if( GL_FALSE == checkop3(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ tmp = (-1);
+
+ if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
+ { /* TODO : more investigation on MAD src and dst using same register */
+ for(ii=0; ii<3; ii++)
+ {
+ if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
+ && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
+ {
+ bReplaceDst = GL_TRUE;
+ break;
+ }
+ }
+ }
+ if(0xF != pILInst->DstReg.WriteMask)
+ { /* OP3 has no support for write mask */
+ bReplaceDst = GL_TRUE;
+ }
+
+ if(GL_TRUE == bReplaceDst)
+ {
+ tmp = gethelpr(pAsm);
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ nomask_PVSDST(&(pAsm->D.dst));
+ }
+ else
+ {
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 2, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if (GL_TRUE == bReplaceDst)
+ {
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ //tmp for source
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+/* LIT dst, src */
+GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
+{
+ unsigned int dstReg;
+ unsigned int dstType;
+ unsigned int srcReg;
+ unsigned int srcType;
+ checkop1(pAsm);
+ int tmp = gethelpr(pAsm);
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+ dstReg = pAsm->D.dst.reg;
+ dstType = pAsm->D.dst.rtype;
+ srcReg = pAsm->S[0].src.reg;
+ srcType = pAsm->S[0].src.rtype;
+
+ /* dst.xw, <- 1.0 */
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ pAsm->D.dst.rtype = dstType;
+ pAsm->D.dst.reg = dstReg;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 1;
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlex = SQ_SEL_1;
+ pAsm->S[0].src.swizzley = SQ_SEL_1;
+ pAsm->S[0].src.swizzlez = SQ_SEL_1;
+ pAsm->S[0].src.swizzlew = SQ_SEL_1;
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ /* dst.y = max(src.x, 0.0) */
+ pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
+ pAsm->D.dst.rtype = dstType;
+ pAsm->D.dst.reg = dstReg;
+ pAsm->D.dst.writex = 0;
+ pAsm->D.dst.writey = 1;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+ pAsm->S[0].src.rtype = srcType;
+ pAsm->S[0].src.reg = srcReg;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[1].src.reg = tmp;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[1].src));
+ pAsm->S[1].src.swizzlex = SQ_SEL_0;
+ pAsm->S[1].src.swizzley = SQ_SEL_0;
+ pAsm->S[1].src.swizzlez = SQ_SEL_0;
+ pAsm->S[1].src.swizzlew = SQ_SEL_0;
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
+
+ /* dst.z = log(src.y) */
+ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
+ pAsm->D.dst.math = 1;
+ pAsm->D.dst.rtype = dstType;
+ pAsm->D.dst.reg = dstReg;
+ pAsm->D.dst.writex = 0;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 1;
+ pAsm->D.dst.writew = 0;
+ pAsm->S[0].src.rtype = srcType;
+ pAsm->S[0].src.reg = srcReg;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, 2) )
+ {
+ return GL_FALSE;
+ }
+
+ swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
+
+ swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
+
+ /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
+ pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
+ pAsm->D.dst.math = 1;
+ pAsm->D.dst.op3 = 1;
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+
+ pAsm->S[0].src.rtype = srcType;
+ pAsm->S[0].src.reg = srcReg;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[1].src.reg = dstReg;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[1].src));
+ pAsm->S[1].src.swizzlex = SQ_SEL_Z;
+ pAsm->S[1].src.swizzley = SQ_SEL_Z;
+ pAsm->S[1].src.swizzlez = SQ_SEL_Z;
+ pAsm->S[1].src.swizzlew = SQ_SEL_Z;
+
+ pAsm->S[2].src.rtype = srcType;
+ pAsm->S[2].src.reg = srcReg;
+ setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ /* dst.z = exp(tmp.x) */
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ pAsm->D.dst.math = 1;
+ pAsm->D.dst.rtype = dstType;
+ pAsm->D.dst.reg = dstReg;
+ pAsm->D.dst.writex = 0;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 1;
+ pAsm->D.dst.writew = 0;
+
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlex = SQ_SEL_X;
+ pAsm->S[0].src.swizzley = SQ_SEL_X;
+ pAsm->S[0].src.swizzlez = SQ_SEL_X;
+ pAsm->S[0].src.swizzlew = SQ_SEL_X;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
+{
+ checkop1(pAsm);
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if (GL_FALSE == assemble_dst(pAsm))
+ {
+ return GL_FALSE;
+ }
+
+ if (GL_FALSE == assemble_src(pAsm, 0, -1))
+ {
+ return GL_FALSE;
+ }
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_POW(r700_AssemblerBase *pAsm)
+{
+ BITS tmp;
+
+ checkop1(pAsm);
+
+ tmp = gethelpr(pAsm);
+
+ // LG2 tmp.x, a.swizzle
+ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
+ pAsm->D.dst.math = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ nomask_PVSDST(&(pAsm->D.dst));
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // MUL tmp.x, tmp.x, b.swizzle
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ nomask_PVSDST(&(pAsm->D.dst));
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // EX2 dst.mask, tmp.x
+ // EX2 tmp.x, tmp.x
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ pAsm->D.dst.math = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ nomask_PVSDST(&(pAsm->D.dst));
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // Now replicate result to all necessary channels in destination
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
+{
+ return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
+}
+
+GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
+{
+ return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
+}
+
+GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
+{
+ return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
+}
+
+GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
+{
+ BITS tmp;
+
+ checkop1(pAsm);
+
+ tmp = gethelpr(pAsm);
+
+ // COS tmp.x, a.x
+ pAsm->D.dst.opcode = SQ_OP2_INST_COS;
+ pAsm->D.dst.math = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // SIN tmp.y, a.x
+ pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
+ pAsm->D.dst.math = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writey = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // MOV dst.mask, tmp
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlez = SQ_SEL_0;
+ pAsm->S[0].src.swizzlew = SQ_SEL_0;
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, 1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, 0) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_STP(r700_AssemblerBase *pAsm)
+{
+ return GL_TRUE;
+}
+
+GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
+{
+ GLboolean src_const;
+ GLboolean need_barrier = GL_FALSE;
+
+ checkop1(pAsm);
+
+ switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
+ {
+ case PROGRAM_CONSTANT:
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_STATE_VAR:
+ src_const = GL_TRUE;
+ break;
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ default:
+ src_const = GL_FALSE;
+ break;
+ }
+
+ if (GL_TRUE == src_const)
+ {
+ if ( GL_FALSE == mov_temp(pAsm, 0) )
+ return GL_FALSE;
+ need_barrier = GL_TRUE;
+ }
+
+ switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
+ {
+ case OPCODE_TEX:
+ break;
+ case OPCODE_TXB:
+ radeon_error("do not support TXB yet\n");
+ return GL_FALSE;
+ break;
+ case OPCODE_TXP:
+ break;
+ default:
+ radeon_error("Internal error: bad texture op (not TEX)\n");
+ return GL_FALSE;
+ break;
+ }
+
+ if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
+ {
+ GLuint tmp = gethelpr(pAsm);
+ pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
+ pAsm->D.dst.math = 1;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writew = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+ swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 1;
+ pAsm->D.dst.writez = 1;
+ pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[1].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->aArgSubst[1] = tmp;
+ need_barrier = GL_TRUE;
+ }
+
+ if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
+ {
+ GLuint tmp1 = gethelpr(pAsm);
+ GLuint tmp2 = gethelpr(pAsm);
+
+ /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
+ pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp1;
+ nomask_PVSDST(&(pAsm->D.dst));
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, 1) )
+ {
+ return GL_FALSE;
+ }
+
+ swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
+ swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
+ * have to do explicit instruction
+ */
+ pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp1;
+ pAsm->D.dst.writez = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp1;
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[1].bits = pAsm->S[0].bits;
+ flipneg_PVSSRC(&(pAsm->S[1].src));
+
+ next_ins(pAsm);
+
+ /* tmp1.z = RCP_e(|tmp1.z|) */
+ pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
+ pAsm->D.dst.math = 1;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp1;
+ pAsm->D.dst.writez = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp1;
+ pAsm->S[0].src.swizzlex = SQ_SEL_Z;
+
+ next_ins(pAsm);
+
+ /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
+ * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
+ * muladd has no writemask, have to use another temp
+ * also no support for imm constants, so add 1 here
+ */
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp2;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp1;
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[1].src.reg = tmp1;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
+ setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
+ pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[2].src.reg = tmp1;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1);
+
+ next_ins(pAsm);
+
+ /* ADD the remaining .5 */
+ pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp2;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 1;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp2;
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5
+ noswizzle_PVSSRC(&(pAsm->S[1].src));
+
+ next_ins(pAsm);
+
+ /* tmp1.xy = temp2.xy */
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp1;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 1;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp2;
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+ next_ins(pAsm);
+ pAsm->aArgSubst[1] = tmp1;
+ need_barrier = GL_TRUE;
+
+ }
+
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+ pAsm->is_tex = GL_TRUE;
+ if ( GL_TRUE == need_barrier )
+ {
+ pAsm->need_tex_barrier = GL_TRUE;
+ }
+ // Set src1 to tex unit id
+ pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+
+ //No sw info from mesa compiler, so hard code here.
+ pAsm->S[1].src.swizzlex = SQ_SEL_X;
+ pAsm->S[1].src.swizzley = SQ_SEL_Y;
+ pAsm->S[1].src.swizzlez = SQ_SEL_Z;
+ pAsm->S[1].src.swizzlew = SQ_SEL_W;
+
+ if( GL_FALSE == tex_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == tex_src(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
+ {
+ /* hopefully did swizzles before */
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+ }
+
+ if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
+ {
+ /* SAMPLE dst, tmp.yxwy, CUBE */
+ pAsm->S[0].src.swizzlex = SQ_SEL_Y;
+ pAsm->S[0].src.swizzley = SQ_SEL_X;
+ pAsm->S[0].src.swizzlez = SQ_SEL_W;
+ pAsm->S[0].src.swizzlew = SQ_SEL_Y;
+ }
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
+{
+ BITS tmp;
+
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ tmp = gethelpr(pAsm);
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ nomask_PVSDST(&(pAsm->D.dst));
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
+ swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
+ {
+ tmp = gethelpr(pAsm);
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ nomask_PVSDST(&(pAsm->D.dst));
+ }
+ else
+ {
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
+ swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
+
+ // result1 + (neg) result0
+ setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
+ pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[2].src.reg = tmp;
+
+ neg_PVSSRC(&(pAsm->S[2].src));
+ noswizzle_PVSSRC(&(pAsm->S[2].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
+ {
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ // Use tmp as source
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
+{
+ return GL_TRUE;
+}
+
+GLboolean assemble_IF(r700_AssemblerBase *pAsm)
+{
+ return GL_TRUE;
+}
+
+GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
+{
+ return GL_TRUE;
+}
+
+GLboolean AssembleInstr(GLuint uiNumberInsts,
+ struct prog_instruction *pILInst,
+ r700_AssemblerBase *pR700AsmCode)
+{
+ GLuint i;
+
+ pR700AsmCode->pILInst = pILInst;
+ for(i=0; i<uiNumberInsts; i++)
+ {
+ pR700AsmCode->uiCurInst = i;
+
+ switch (pILInst[i].Opcode)
+ {
+ case OPCODE_ABS:
+ if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_ADD:
+ case OPCODE_SUB:
+ if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ case OPCODE_ARL:
+ radeon_error("Not yet implemented instruction OPCODE_ARL \n");
+ //if ( GL_FALSE == assemble_BAD("ARL") )
+ return GL_FALSE;
+ break;
+ case OPCODE_ARR:
+ radeon_error("Not yet implemented instruction OPCODE_ARR \n");
+ //if ( GL_FALSE == assemble_BAD("ARR") )
+ return GL_FALSE;
+ break;
+
+ case OPCODE_CMP:
+ if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_COS:
+ if ( GL_FALSE == assemble_COS(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ case OPCODE_DP3:
+ case OPCODE_DP4:
+ case OPCODE_DPH:
+ if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ case OPCODE_DST:
+ if ( GL_FALSE == assemble_DST(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ case OPCODE_EX2:
+ if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_EXP:
+ radeon_error("Not yet implemented instruction OPCODE_EXP \n");
+ //if ( GL_FALSE == assemble_BAD("EXP") )
+ return GL_FALSE;
+ break; // approx of EX2
+
+ case OPCODE_FLR:
+ if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ //case OP_FLR_INT:
+ // if ( GL_FALSE == assemble_FLR_INT() )
+ // return GL_FALSE;
+ // break;
+
+ case OPCODE_FRC:
+ if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ case OPCODE_KIL:
+ if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_LG2:
+ if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_LIT:
+ if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_LRP:
+ if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_LOG:
+ radeon_error("Not yet implemented instruction OPCODE_LOG \n");
+ //if ( GL_FALSE == assemble_BAD("LOG") )
+ return GL_FALSE;
+ break; // approx of LG2
+
+ case OPCODE_MAD:
+ if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_MAX:
+ if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_MIN:
+ if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ case OPCODE_MOV:
+ if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_MUL:
+ if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ case OPCODE_POW:
+ if ( GL_FALSE == assemble_POW(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_RCP:
+ if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_RSQ:
+ if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_SIN:
+ if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_SCS:
+ if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ case OPCODE_SGE:
+ if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_SLT:
+ if ( GL_FALSE == assemble_SLT(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ //case OP_STP:
+ // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
+ // return GL_FALSE;
+ // break;
+
+ case OPCODE_SWZ:
+ if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ else
+ {
+ if( (i+1)<uiNumberInsts )
+ {
+ if(OPCODE_END != pILInst[i+1].Opcode)
+ {
+ if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
+ {
+ pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
+ }
+ }
+ }
+ }
+ break;
+
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ case OPCODE_TXP:
+ if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ case OPCODE_XPD:
+ if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ case OPCODE_IF :
+ if ( GL_FALSE == assemble_IF(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+ case OPCODE_ELSE :
+ radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
+ //if ( GL_FALSE == assemble_BAD("ELSE") )
+ return GL_FALSE;
+ break;
+ case OPCODE_ENDIF:
+ if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
+ return GL_FALSE;
+ break;
+
+ //case OPCODE_EXPORT:
+ // if ( GL_FALSE == assemble_EXPORT() )
+ // return GL_FALSE;
+ // break;
+
+ case OPCODE_END:
+ //pR700AsmCode->uiCurInst = i;
+ //This is to remaind that if in later exoort there is depth/stencil
+ //export, we need a mov to re-arrange DST channel, where using a
+ //psuedo inst, we will use this end inst to do it.
+ return GL_TRUE;
+
+ default:
+ radeon_error("internal: unknown instruction\n");
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean Process_Export(r700_AssemblerBase* pAsm,
+ GLuint type,
+ GLuint export_starting_index,
+ GLuint export_count,
+ GLuint starting_register_number,
+ GLboolean is_depth_export)
+{
+ unsigned char ucWriteMask;
+
+ check_current_clause(pAsm, CF_EMPTY_CLAUSE);
+ check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
+
+ pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
+
+ switch (type)
+ {
+ case SQ_EXPORT_PIXEL:
+ if(GL_TRUE == is_depth_export)
+ {
+ pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
+ }
+ else
+ {
+ pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
+ }
+ break;
+
+ case SQ_EXPORT_POS:
+ pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
+ break;
+
+ case SQ_EXPORT_PARAM:
+ pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
+ break;
+
+ default:
+ radeon_error("Unknown export type: %d\n", type);
+ return GL_FALSE;
+ break;
+ }
+
+ pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
+
+ pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
+ pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
+ pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
+
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ if (export_count == 1)
+ {
+ ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
+ /* exports Z as a float into Red channel */
+ if (GL_TRUE == is_depth_export)
+ ucWriteMask = 0x1;
+
+ if( (ucWriteMask & 0x1) != 0)
+ {
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
+ }
+ else
+ {
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
+ }
+ if( ((ucWriteMask>>1) & 0x1) != 0)
+ {
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
+ }
+ else
+ {
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
+ }
+ if( ((ucWriteMask>>2) & 0x1) != 0)
+ {
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
+ }
+ else
+ {
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
+ }
+ if( ((ucWriteMask>>3) & 0x1) != 0)
+ {
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
+ }
+ else
+ {
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
+ }
+ }
+ else
+ {
+ // This should only be used if all components for all registers have been written
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
+ pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
+ }
+
+ pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
+
+ return GL_TRUE;
+}
+
+GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
+{
+ gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
+ pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
+
+ // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pAsm->depth_export_register_number;
+
+ pAsm->D.dst.writex = 1; // depth goes in R channel for HW
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pAsm->depth_export_register_number;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
+
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
+
+ return GL_TRUE;
+}
+
+GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
+ GLbitfield OutputsWritten)
+{
+ unsigned int unBit;
+
+ if(pR700AsmCode->depth_export_register_number >= 0)
+ {
+ if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
+ {
+ return GL_FALSE;
+ }
+ }
+
+ unBit = 1 << FRAG_RESULT_COLOR;
+ if(OutputsWritten & unBit)
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_PIXEL,
+ 0,
+ 1,
+ pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
+ GL_FALSE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ unBit = 1 << FRAG_RESULT_DEPTH;
+ if(OutputsWritten & unBit)
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_PIXEL,
+ 0,
+ 1,
+ pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
+ GL_TRUE))
+ {
+ return GL_FALSE;
+ }
+ }
+
+ if(pR700AsmCode->cf_last_export_ptr != NULL)
+ {
+ pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+ pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
+ GLbitfield OutputsWritten)
+{
+ unsigned int unBit;
+ unsigned int i;
+
+ GLuint export_starting_index = 0;
+ GLuint export_count = pR700AsmCode->number_of_exports;
+
+ unBit = 1 << VERT_RESULT_HPOS;
+ if(OutputsWritten & unBit)
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_POS,
+ export_starting_index,
+ 1,
+ pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
+ GL_FALSE) )
+ {
+ return GL_FALSE;
+ }
+
+ export_count--;
+
+ pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+ }
+
+ pR700AsmCode->number_of_exports = export_count;
+
+ unBit = 1 << VERT_RESULT_COL0;
+ if(OutputsWritten & unBit)
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_PARAM,
+ export_starting_index,
+ 1,
+ pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
+ GL_FALSE) )
+ {
+ return GL_FALSE;
+ }
+
+ export_starting_index++;
+ }
+
+ unBit = 1 << VERT_RESULT_COL1;
+ if(OutputsWritten & unBit)
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_PARAM,
+ export_starting_index,
+ 1,
+ pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
+ GL_FALSE) )
+ {
+ return GL_FALSE;
+ }
+
+ export_starting_index++;
+ }
+
+ unBit = 1 << VERT_RESULT_FOGC;
+ if(OutputsWritten & unBit)
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_PARAM,
+ export_starting_index,
+ 1,
+ pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
+ GL_FALSE) )
+ {
+ return GL_FALSE;
+ }
+
+ export_starting_index++;
+ }
+
+ for(i=0; i<8; i++)
+ {
+ unBit = 1 << (VERT_RESULT_TEX0 + i);
+ if(OutputsWritten & unBit)
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_PARAM,
+ export_starting_index,
+ 1,
+ pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
+ GL_FALSE) )
+ {
+ return GL_FALSE;
+ }
+
+ export_starting_index++;
+ }
+ }
+
+ // At least one param should be exported
+ if (export_count)
+ {
+ pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+ }
+ else
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_PARAM,
+ 0,
+ 1,
+ pR700AsmCode->starting_export_register_number,
+ GL_FALSE) )
+ {
+ return GL_FALSE;
+ }
+
+ pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
+ pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
+ pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
+ pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
+ pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+ }
+
+ pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
+
+ return GL_TRUE;
+}
+
+GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
+{
+ FREE(pR700AsmCode->pucOutMask);
+ FREE(pR700AsmCode->pInstDeps);
+ return GL_TRUE;
+}
+
diff --git a/r600/r700_assembler.h b/r600/r700_assembler.h
new file mode 100644
index 0000000..73bb8ba
--- /dev/null
+++ b/r600/r700_assembler.h
@@ -0,0 +1,516 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_ASSEMBLER_H_
+#define _R700_ASSEMBLER_H_
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+
+#include "r700_chip.h"
+#include "r700_shaderinst.h"
+#include "r700_shader.h"
+
+typedef enum SHADER_PIPE_TYPE
+{
+ SPT_VP = 0,
+ SPT_FP = 1
+} SHADER_PIPE_TYPE;
+
+typedef enum ConstantCycles
+{
+ NUMBER_OF_CYCLES = 3,
+ NUMBER_OF_COMPONENTS = 4
+} ConstantCycles;
+
+typedef enum HARDWARE_LIMIT_VALUES
+{
+ TEMPORARY_REGISTER_OFFSET = SQ_ALU_SRC_GPR_BASE,
+ MAX_TEMPORARY_REGISTERS = SQ_ALU_SRC_GPR_SIZE,
+ MAX_CONSTANT_REGISTERS = SQ_ALU_SRC_CFILE_SIZE,
+ CFILE_REGISTER_OFFSET = SQ_ALU_SRC_CFILE_BASE,
+ NUMBER_OF_INPUT_COLORS = 2,
+ NUMBER_OF_OUTPUT_COLORS = 8,
+ NUMBER_OF_TEXTURE_UNITS = 16,
+ MEGA_FETCH_BYTES = 32
+} HARDWARE_LIMIT_VALUES;
+
+typedef enum AddressMode
+{
+ ADDR_ABSOLUTE = 0,
+ ADDR_RELATIVE_A0 = 1,
+ ADDR_RELATIVE_FLI_0 = 2,
+ NUMBER_OF_ADDR_MOD = 3
+} AddressMode;
+
+typedef enum SrcRegisterType
+{
+ SRC_REG_TEMPORARY = 0,
+ SRC_REG_INPUT = 1,
+ SRC_REG_CONSTANT = 2,
+ SRC_REG_ALT_TEMPORARY = 3,
+ NUMBER_OF_SRC_REG_TYPE = 4
+} SrcRegisterType;
+
+typedef enum DstRegisterType
+{
+ DST_REG_TEMPORARY = 0,
+ DST_REG_A0 = 1,
+ DST_REG_OUT = 2,
+ DST_REG_OUT_X_REPL = 3,
+ DST_REG_ALT_TEMPORARY = 4,
+ DST_REG_INPUT = 5,
+ NUMBER_OF_DST_REG_TYPE = 6
+} DstRegisterType;
+
+typedef unsigned int BITS;
+
+typedef struct PVSDSTtag
+{
+ BITS opcode:8; //(:6) //@@@ really should be 10 bits for OP2
+ BITS math:1;
+ BITS predicated:1; //10 //8
+ BITS pred_inv :1; //11 //8
+
+ BITS rtype:3;
+ BITS reg:10; //24 //20
+
+ BITS writex:1;
+ BITS writey:1;
+ BITS writez:1;
+ BITS writew:1; //28
+
+ BITS op3:1; // 29 Represents *_OP3_* ALU opcode
+
+ BITS dualop:1; // 30 //26
+
+ BITS addrmode0:1; //31 //29
+ BITS addrmode1:1; //32
+} PVSDST;
+
+typedef struct PVSSRCtag
+{
+ BITS rtype:4;
+ BITS addrmode0:1;
+ BITS reg:10; //15 (8)
+ BITS swizzlex:3;
+ BITS swizzley:3;
+ BITS swizzlez:3;
+ BITS swizzlew:3; //27
+
+ BITS negx:1;
+ BITS negy:1;
+ BITS negz:1;
+ BITS negw:1; //31
+ //BITS addrsel:2;
+ BITS addrmode1:1; //32
+} PVSSRC;
+
+typedef struct PVSMATHtag
+{
+ BITS rtype:4;
+ BITS spare:1;
+ BITS reg:8;
+ BITS swizzlex:3;
+ BITS swizzley:3;
+ BITS dstoff:2; // 2 bits of dest offset into alt ram
+ BITS opcode:4;
+ BITS negx:1;
+ BITS negy:1;
+ BITS dstcomp:2; // select dest component
+ BITS spare2:3;
+} PVSMATH;
+
+typedef union PVSDWORDtag
+{
+ BITS bits;
+ PVSDST dst;
+ PVSSRC src;
+ PVSMATH math;
+ float f;
+} PVSDWORD;
+
+typedef struct VAP_OUT_VTX_FMT_0tag
+{
+ BITS pos:1; // 0
+ BITS misc:1;
+ BITS clip_dist0:1;
+ BITS clip_dist1:1;
+ BITS pos_param:1; // 4
+
+ BITS color0:1; // 5
+ BITS color1:1;
+ BITS color2:1;
+ BITS color3:1;
+ BITS color4:1;
+ BITS color5:1;
+ BITS color6:1;
+ BITS color7:1;
+
+ BITS normal:1;
+
+ BITS depth:1; // 14
+
+ BITS point_size:1; // 15
+ BITS edge_flag:1;
+ BITS rta_index:1; // shares same channel as kill_flag
+ BITS kill_flag:1;
+ BITS viewport_index:1; // 19
+
+ BITS resvd1:12; // 20
+} VAP_OUT_VTX_FMT_0;
+
+typedef struct VAP_OUT_VTX_FMT_1tag
+{
+ BITS tex0comp:3;
+ BITS tex1comp:3;
+ BITS tex2comp:3;
+ BITS tex3comp:3;
+ BITS tex4comp:3;
+ BITS tex5comp:3;
+ BITS tex6comp:3;
+ BITS tex7comp:3;
+
+ BITS resvd:8;
+} VAP_OUT_VTX_FMT_1;
+
+typedef struct VAP_OUT_VTX_FMT_2tag
+{
+ BITS tex8comp :3;
+ BITS tex9comp :3;
+ BITS tex10comp:3;
+ BITS tex11comp:3;
+ BITS tex12comp:3;
+ BITS tex13comp:3;
+ BITS tex14comp:3;
+ BITS tex15comp:3;
+
+ BITS resvd:8;
+} VAP_OUT_VTX_FMT_2;
+
+typedef struct OUT_FRAGMENT_FMT_0tag
+{
+ BITS color0:1;
+ BITS color1:1;
+ BITS color2:1;
+ BITS color3:1;
+ BITS color4:1;
+ BITS color5:1;
+ BITS color6:1;
+ BITS color7:1;
+
+ BITS depth:1;
+ BITS stencil_ref:1;
+ BITS coverage_to_mask:1;
+ BITS mask:1;
+
+ BITS resvd1:20;
+} OUT_FRAGMENT_FMT_0;
+
+typedef enum CF_CLAUSE_TYPE
+{
+ CF_EXPORT_CLAUSE,
+ CF_ALU_CLAUSE,
+ CF_TEX_CLAUSE,
+ CF_VTX_CLAUSE,
+ CF_OTHER_CLAUSE,
+ CF_EMPTY_CLAUSE,
+ NUMBER_CF_CLAUSE_TYPES
+} CF_CLAUSE_TYPE;
+
+enum
+{
+ MAX_BOOL_CONSTANTS = 32,
+ MAX_INT_CONSTANTS = 32,
+ MAX_FLOAT_CONSTANTS = 256,
+
+ FC_NONE = 0,
+ FC_IF = 1,
+ FC_LOOP = 2,
+ FC_REP = 3,
+
+ COND_NONE = 0,
+ COND_BOOL = 1,
+ COND_PRED = 2,
+ COND_ALU = 3,
+
+ SAFEDIST_TEX = 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
+ SAFEDIST_ALU = 6 ///< the same for alu->fc
+};
+
+typedef struct FC_LEVEL
+{
+ unsigned int first; ///< first fc instruction on level (if, rep, loop)
+ unsigned int* mid; ///< middle instructions - else or all breaks on this level
+ unsigned int midLen;
+ unsigned int type;
+ unsigned int cond;
+ unsigned int inv;
+ unsigned int bpush; ///< 1 if first instruction does branch stack push
+ int id; ///< id of bool or int variable
+} FC_LEVEL;
+
+typedef struct VTX_FETCH_METHOD
+{
+ GLboolean bEnableMini;
+ GLuint mega_fetch_remainder;
+} VTX_FETCH_METHOD;
+
+typedef struct r700_AssemblerBase
+{
+ R700ControlFlowSXClause* cf_last_export_ptr;
+ R700ControlFlowSXClause* cf_current_export_clause_ptr;
+ R700ControlFlowALUClause* cf_current_alu_clause_ptr;
+ R700ControlFlowGenericClause* cf_current_tex_clause_ptr;
+ R700ControlFlowGenericClause* cf_current_vtx_clause_ptr;
+ R700ControlFlowGenericClause* cf_current_cf_clause_ptr;
+
+ //Result shader
+ R700_Shader * pR700Shader;
+
+ // No clause has been created yet
+ CF_CLAUSE_TYPE cf_current_clause_type;
+
+ GLuint number_of_exports;
+ GLuint number_of_colorandz_exports;
+ GLuint number_of_export_opcodes;
+
+ PVSDWORD D;
+ PVSDWORD S[3];
+
+ unsigned int uLastPosUpdate;
+
+ OUT_FRAGMENT_FMT_0 fp_stOutFmt0;
+
+ unsigned int uIIns;
+ unsigned int uOIns;
+ unsigned int number_used_registers;
+ unsigned int uUsedConsts;
+
+ // Fragment programs
+ unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX];
+ unsigned int uiFP_OutputMap[FRAG_RESULT_MAX];
+ unsigned int uBoolConsts;
+ unsigned int uIntConsts;
+ unsigned int uInsts;
+ unsigned int uConsts;
+
+ // Vertex programs
+ unsigned char ucVP_AttributeMap[VERT_ATTRIB_MAX];
+ unsigned char ucVP_OutputMap[VERT_RESULT_MAX];
+
+ unsigned char * pucOutMask;
+
+ //-----------------------------------------------------------------------------------
+ // flow control members
+ //-----------------------------------------------------------------------------------
+ unsigned int FCSP;
+ FC_LEVEL fc_stack[32];
+
+ unsigned int branch_depth;
+ unsigned int max_branch_depth;
+
+ //-----------------------------------------------------------------------------------
+ // ArgSubst used in Assemble_Source() function
+ //-----------------------------------------------------------------------------------
+ int aArgSubst[4];
+
+ GLint hw_gpr[ NUMBER_OF_CYCLES ][ NUMBER_OF_COMPONENTS ];
+ GLint hw_cfile_addr[ NUMBER_OF_COMPONENTS ];
+ GLint hw_cfile_chan[ NUMBER_OF_COMPONENTS ];
+
+ GLuint uOutputs;
+
+ GLint color_export_register_number[NUMBER_OF_OUTPUT_COLORS];
+ GLint depth_export_register_number;
+
+ GLint stencil_export_register_number;
+ GLint coverage_to_mask_export_register_number;
+ GLint mask_export_register_number;
+
+ GLuint starting_export_register_number;
+ GLuint starting_vfetch_register_number;
+ GLuint starting_temp_register_number;
+ GLuint uHelpReg;
+ GLuint uFirstHelpReg;
+
+ GLboolean input_position_is_used;
+ GLboolean input_normal_is_used;
+
+ GLboolean input_color_is_used[NUMBER_OF_INPUT_COLORS];
+
+ GLboolean input_texture_unit_is_used[NUMBER_OF_TEXTURE_UNITS];
+
+ R700VertexGenericFetch* vfetch_instruction_ptr_array[VERT_ATTRIB_MAX];
+
+ GLuint number_of_inputs;
+
+ InstDeps *pInstDeps;
+
+ SHADER_PIPE_TYPE currentShaderType;
+ struct prog_instruction * pILInst;
+ GLuint uiCurInst;
+ GLboolean bR6xx;
+ /* helper to decide which type of instruction to assemble */
+ GLboolean is_tex;
+ /* we inserted helper intructions and need barrier on next TEX ins */
+ GLboolean need_tex_barrier;
+} r700_AssemblerBase;
+
+//Internal use
+BITS addrmode_PVSDST(PVSDST * pPVSDST);
+void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode);
+void nomask_PVSDST(PVSDST * pPVSDST);
+BITS addrmode_PVSSRC(PVSSRC* pPVSSRC);
+void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode);
+void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz);
+void noswizzle_PVSSRC(PVSSRC* pPVSSRC);
+void swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w);
+void neg_PVSSRC(PVSSRC* pPVSSRC);
+void noneg_PVSSRC(PVSSRC* pPVSSRC);
+void flipneg_PVSSRC(PVSSRC* pPVSSRC);
+void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c);
+void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c);
+BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0);
+BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ;
+GLboolean is_reduction_opcode(PVSDWORD * dest);
+GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
+
+unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm);
+
+GLboolean IsTex(gl_inst_opcode Opcode);
+GLboolean IsAlu(gl_inst_opcode Opcode);
+int check_current_clause(r700_AssemblerBase* pAsm,
+ CF_CLAUSE_TYPE new_clause_type);
+GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
+ R700VertexInstruction* vertex_instruction_ptr);
+GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
+ R700TextureInstruction* tex_instruction_ptr);
+GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
+ GLuint gl_client_id,
+ GLuint destination_register,
+ GLuint number_of_elements,
+ GLenum dataElementType,
+ VTX_FETCH_METHOD* pFetchMethod);
+GLuint gethelpr(r700_AssemblerBase* pAsm);
+void resethelpr(r700_AssemblerBase* pAsm);
+void checkop_init(r700_AssemblerBase* pAsm);
+GLboolean mov_temp(r700_AssemblerBase* pAsm, int src);
+GLboolean checkop1(r700_AssemblerBase* pAsm);
+GLboolean checkop2(r700_AssemblerBase* pAsm);
+GLboolean checkop3(r700_AssemblerBase* pAsm);
+GLboolean assemble_src(r700_AssemblerBase *pAsm,
+ int src,
+ int fld);
+GLboolean assemble_dst(r700_AssemblerBase *pAsm);
+GLboolean tex_dst(r700_AssemblerBase *pAsm);
+GLboolean tex_src(r700_AssemblerBase *pAsm);
+GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized);
+void initialize(r700_AssemblerBase *pAsm);
+GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
+ int source_index,
+ PVSSRC* pSource,
+ BITS scalar_channel_index);
+GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
+ R700ALUInstruction* alu_instruction_ptr,
+ GLuint contiguous_slots_needed);
+void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
+ int source_index,
+ BITS* psrc_sel,
+ BITS* psrc_rel,
+ BITS* psrc_chan,
+ BITS* psrc_neg);
+int is_cfile(BITS sel);
+int is_const(BITS sel);
+int is_gpr(BITS sel);
+GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
+ GLuint sel,
+ GLuint chan);
+GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle);
+GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
+GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
+GLboolean check_scalar(r700_AssemblerBase* pAsm,
+ R700ALUInstruction* alu_instruction_ptr);
+GLboolean check_vector(r700_AssemblerBase* pAsm,
+ R700ALUInstruction* alu_instruction_ptr);
+GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
+GLboolean next_ins(r700_AssemblerBase *pAsm);
+GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
+GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
+GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
+GLboolean assemble_BAD(char *opcode_str);
+GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
+GLboolean assemble_COS(r700_AssemblerBase *pAsm);
+GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
+GLboolean assemble_DST(r700_AssemblerBase *pAsm);
+GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
+GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
+GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
+GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
+GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
+GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
+GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
+GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
+GLboolean assemble_MAX(r700_AssemblerBase *pAsm);
+GLboolean assemble_MIN(r700_AssemblerBase *pAsm);
+GLboolean assemble_MOV(r700_AssemblerBase *pAsm);
+GLboolean assemble_MUL(r700_AssemblerBase *pAsm);
+GLboolean assemble_POW(r700_AssemblerBase *pAsm);
+GLboolean assemble_RCP(r700_AssemblerBase *pAsm);
+GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
+GLboolean assemble_SIN(r700_AssemblerBase *pAsm);
+GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
+GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
+GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
+GLboolean assemble_STP(r700_AssemblerBase *pAsm);
+GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
+GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
+GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm);
+GLboolean assemble_IF(r700_AssemblerBase *pAsm);
+GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm);
+
+GLboolean Process_Export(r700_AssemblerBase* pAsm,
+ GLuint type,
+ GLuint export_starting_index,
+ GLuint export_count,
+ GLuint starting_register_number,
+ GLboolean is_depth_export);
+GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm,
+ BITS depth_channel_select);
+
+
+//Interface
+GLboolean AssembleInstr(GLuint uiNumberInsts,
+ struct prog_instruction *pILInst,
+ r700_AssemblerBase *pR700AsmCode);
+GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
+GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
+
+int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
+GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode);
+
+#endif //_R700_ASSEMBLER_H_
diff --git a/r600/r700_chip.c b/r600/r700_chip.c
new file mode 100644
index 0000000..06d7e9c
--- /dev/null
+++ b/r600/r700_chip.c
@@ -0,0 +1,1274 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include "main/imports.h"
+#include "main/glheader.h"
+#include "main/simple_list.h"
+
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+
+#include "r700_state.h"
+#include "r600_tex.h"
+#include "r700_oglprog.h"
+#include "r700_fragprog.h"
+#include "r700_vertprog.h"
+#include "r700_ioctl.h"
+
+#include "radeon_mipmap_tree.h"
+
+static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ struct radeon_bo *bo = NULL;
+ unsigned int i;
+ BATCH_LOCALS(&context->radeon);
+
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ radeonTexObj *t = r700->textures[i];
+ if (t) {
+ if (!t->image_override)
+ bo = t->mt->bo;
+ else
+ bo = t->bo;
+ if (bo) {
+
+ r700SyncSurf(context, bo,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
+ 0, TC_ACTION_ENA_bit);
+
+ BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+ R600_OUT_BATCH(i * 7);
+ R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
+ R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
+ R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2);
+ R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3);
+ R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4);
+ R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5);
+ R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6);
+ R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2,
+ bo,
+ 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3,
+ bo,
+ r700->textures[i]->SQ_TEX_RESOURCE3,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
+ COMMIT_BATCH();
+ }
+ }
+ }
+ }
+}
+
+static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ unsigned int i;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ radeonTexObj *t = r700->textures[i];
+ if (t) {
+ BEGIN_BATCH_NO_AUTOSTATE(5);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
+ R600_OUT_BATCH(i * 3);
+ R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0);
+ R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
+ R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
+ END_BATCH();
+ COMMIT_BATCH();
+ }
+ }
+ }
+}
+
+static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ unsigned int i;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ radeonTexObj *t = r700->textures[i];
+ if (t) {
+ BEGIN_BATCH_NO_AUTOSTATE(2 + 4);
+ R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4);
+ R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED);
+ R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN);
+ R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE);
+ R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA);
+ END_BATCH();
+ COMMIT_BATCH();
+ }
+ }
+ }
+}
+
+static void r700SetupVTXConstants(GLcontext * ctx,
+ unsigned int nStreamID,
+ void * pAos,
+ unsigned int size, /* number of elements in vector */
+ unsigned int stride,
+ unsigned int count) /* number of vectors in stream */
+{
+ context_t *context = R700_CONTEXT(ctx);
+ struct radeon_aos * paos = (struct radeon_aos *)pAos;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ unsigned int uSQ_VTX_CONSTANT_WORD0_0;
+ unsigned int uSQ_VTX_CONSTANT_WORD1_0;
+ unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0;
+ unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0;
+ unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0;
+
+ if (!paos->bo)
+ return;
+
+ if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
+ r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit);
+ else
+ r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
+
+ uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
+ uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1;
+
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask);
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL),
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+ SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit);
+
+ SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask);
+ SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
+
+ BEGIN_BATCH_NO_AUTOSTATE(9 + 2);
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+ R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0);
+ R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0,
+ paos->bo,
+ uSQ_VTX_CONSTANT_WORD0_0,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+ COMMIT_BATCH();
+
+}
+
+void r700SetupStreams(GLcontext *ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ struct r700_vertex_program *vp = context->selected_vp;
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *vb = &tnl->vb;
+ unsigned int i, j = 0;
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ R600_STATECHANGE(context, vtx);
+
+ for(i=0; i<VERT_ATTRIB_MAX; i++) {
+ if(vp->mesa_program->Base.InputsRead & (1 << i)) {
+ rcommon_emit_vector(ctx,
+ &context->radeon.tcl.aos[j],
+ vb->AttribPtr[i]->data,
+ vb->AttribPtr[i]->size,
+ vb->AttribPtr[i]->stride,
+ vb->Count);
+ j++;
+ }
+ }
+ context->radeon.tcl.aos_count = j;
+}
+
+static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ struct r700_vertex_program *vp = context->selected_vp;
+ unsigned int i, j = 0;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ if (context->radeon.tcl.aos_count == 0)
+ return;
+
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
+ R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(0);
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
+ R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(0);
+ END_BATCH();
+ COMMIT_BATCH();
+
+ for(i=0; i<VERT_ATTRIB_MAX; i++) {
+ if(vp->mesa_program->Base.InputsRead & (1 << i)) {
+ /* currently aos are packed */
+ r700SetupVTXConstants(ctx,
+ i,
+ (void*)(&context->radeon.tcl.aos[j]),
+ (unsigned int)context->radeon.tcl.aos[j].components,
+ (unsigned int)context->radeon.tcl.aos[j].stride * 4,
+ (unsigned int)context->radeon.tcl.aos[j].count);
+ j++;
+ }
+ }
+}
+
+static void r700SetRenderTarget(context_t *context, int id)
+{
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ struct radeon_renderbuffer *rrb;
+ unsigned int nPitchInPixel;
+
+ rrb = radeon_get_colorbuffer(&context->radeon);
+ if (!rrb || !rrb->bo) {
+ return;
+ }
+
+ R600_STATECHANGE(context, cb_target);
+
+ /* color buffer */
+ r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset;
+
+ nPitchInPixel = rrb->pitch/rrb->cpp;
+ SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, (nPitchInPixel/8)-1,
+ PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
+ SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1,
+ SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask);
+ r700->render_target[id].CB_COLOR0_BASE.u32All = 0;
+ SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask);
+ SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL,
+ CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+ if(4 == rrb->cpp)
+ {
+ SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_8_8_8_8,
+ CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask);
+ SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT, COMP_SWAP_shift, COMP_SWAP_mask);
+ }
+ else
+ {
+ SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_5_6_5,
+ CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask);
+ SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT_REV,
+ COMP_SWAP_shift, COMP_SWAP_mask);
+ }
+ SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+ SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit);
+ SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+
+ r700->render_target[id].enabled = GL_TRUE;
+}
+
+static void r700SetDepthTarget(context_t *context)
+{
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ struct radeon_renderbuffer *rrb;
+ unsigned int nPitchInPixel;
+
+ rrb = radeon_get_depthbuffer(&context->radeon);
+ if (!rrb)
+ return;
+
+ R600_STATECHANGE(context, db_target);
+
+ /* depth buf */
+ r700->DB_DEPTH_SIZE.u32All = 0;
+ r700->DB_DEPTH_BASE.u32All = 0;
+ r700->DB_DEPTH_INFO.u32All = 0;
+ r700->DB_DEPTH_VIEW.u32All = 0;
+
+ nPitchInPixel = rrb->pitch/rrb->cpp;
+
+ SETfield(r700->DB_DEPTH_SIZE.u32All, (nPitchInPixel/8)-1,
+ PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
+ SETfield(r700->DB_DEPTH_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1,
+ SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); /* size in pixel / 64 - 1 */
+
+ if(4 == rrb->cpp)
+ {
+ SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_8_24,
+ DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);
+ }
+ else
+ {
+ SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16,
+ DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);
+ }
+ SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_1D_TILED_THIN1,
+ DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask);
+ /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */
+}
+
+static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ struct radeon_renderbuffer *rrb;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ rrb = radeon_get_depthbuffer(&context->radeon);
+ if (!rrb || !rrb->bo) {
+ fprintf(stderr, "no rrb\n");
+ return;
+ }
+
+ r700SetDepthTarget(context);
+
+ BEGIN_BATCH_NO_AUTOSTATE(8 + 2);
+ R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2);
+ R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All);
+ R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All);
+ R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2);
+ R600_OUT_BATCH(r700->DB_DEPTH_BASE.u32All);
+ R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All);
+ R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All,
+ rrb->bo,
+ r700->DB_DEPTH_BASE.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) &&
+ (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
+ R600_OUT_BATCH(1 << 0);
+ END_BATCH();
+ }
+
+ COMMIT_BATCH();
+
+}
+
+static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ struct radeon_renderbuffer *rrb;
+ BATCH_LOCALS(&context->radeon);
+ int id = 0;
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ rrb = radeon_get_colorbuffer(&context->radeon);
+ if (!rrb || !rrb->bo) {
+ fprintf(stderr, "no rrb\n");
+ return;
+ }
+
+ r700SetRenderTarget(context, 0);
+
+ if (id > R700_MAX_RENDER_TARGETS)
+ return;
+
+ if (!r700->render_target[id].enabled)
+ return;
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1);
+ R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_BASE.u32All);
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+ rrb->bo,
+ r700->render_target[id].CB_COLOR0_BASE.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) &&
+ (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
+ R600_OUT_BATCH((2 << id));
+ END_BATCH();
+ }
+
+ BEGIN_BATCH_NO_AUTOSTATE(18);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_TILE + (4 * id), r700->render_target[id].CB_COLOR0_TILE.u32All);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_FRAG + (4 * id), r700->render_target[id].CB_COLOR0_FRAG.u32All);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All);
+ END_BATCH();
+
+ COMMIT_BATCH();
+
+}
+
+static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ struct radeon_bo * pbo;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context));
+
+ if (!pbo)
+ return;
+
+ r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
+ R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All);
+ R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All,
+ pbo,
+ r700->ps.SQ_PGM_START_PS.u32All,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(9);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All);
+ END_BATCH();
+
+ COMMIT_BATCH();
+
+}
+
+static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ struct radeon_bo * pbo;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
+
+ if (!pbo)
+ return;
+
+ r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
+ R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All);
+ R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All,
+ pbo,
+ r700->vs.SQ_PGM_START_VS.u32All,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All);
+ END_BATCH();
+
+ COMMIT_BATCH();
+}
+
+static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ struct radeon_bo * pbo;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ /* XXX fixme
+ * R6xx chips require a FS be emitted, even if it's not used.
+ * since we aren't using FS yet, just send the VS address to make
+ * the kernel command checker happy
+ */
+ pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
+ r700->fs.SQ_PGM_START_FS.u32All = r700->vs.SQ_PGM_START_VS.u32All;
+ r700->fs.SQ_PGM_RESOURCES_FS.u32All = 0;
+ r700->fs.SQ_PGM_CF_OFFSET_FS.u32All = 0;
+ /* XXX */
+
+ if (!pbo)
+ return;
+
+ r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1);
+ R600_OUT_BATCH(r700->fs.SQ_PGM_START_FS.u32All);
+ R600_OUT_BATCH_RELOC(r700->fs.SQ_PGM_START_FS.u32All,
+ pbo,
+ r700->fs.SQ_PGM_START_FS.u32All,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_FS, r700->fs.SQ_PGM_RESOURCES_FS.u32All);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_FS, r700->fs.SQ_PGM_CF_OFFSET_FS.u32All);
+ END_BATCH();
+
+ COMMIT_BATCH();
+
+}
+
+static void r700SendViewportState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ int id = 0;
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ if (id > R700_MAX_VIEWPORTS)
+ return;
+
+ if (!r700->viewport[id].enabled)
+ return;
+
+ BEGIN_BATCH_NO_AUTOSTATE(16);
+ R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL + (8 * id), 2);
+ R600_OUT_BATCH(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All);
+ R600_OUT_BATCH(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All);
+ R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_ZMIN_0 + (8 * id), 2);
+ R600_OUT_BATCH(r700->viewport[id].PA_SC_VPORT_ZMIN_0.u32All);
+ R600_OUT_BATCH(r700->viewport[id].PA_SC_VPORT_ZMAX_0.u32All);
+ R600_OUT_BATCH_REGSEQ(PA_CL_VPORT_XSCALE_0 + (24 * id), 6);
+ R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_XSCALE.u32All);
+ R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_XOFFSET.u32All);
+ R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_YSCALE.u32All);
+ R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_YOFFSET.u32All);
+ R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_ZSCALE.u32All);
+ R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_ZOFFSET.u32All);
+ END_BATCH();
+
+ COMMIT_BATCH();
+
+}
+
+static void r700SendSQConfig(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(34);
+ R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6);
+ R600_OUT_BATCH(r700->sq_config.SQ_CONFIG.u32All);
+ R600_OUT_BATCH(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All);
+ R600_OUT_BATCH(r700->sq_config.SQ_GPR_RESOURCE_MGMT_2.u32All);
+ R600_OUT_BATCH(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All);
+ R600_OUT_BATCH(r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All);
+ R600_OUT_BATCH(r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All);
+
+ R600_OUT_BATCH_REGVAL(TA_CNTL_AUX, r700->TA_CNTL_AUX.u32All);
+ R600_OUT_BATCH_REGVAL(VC_ENHANCE, r700->VC_ENHANCE.u32All);
+ R600_OUT_BATCH_REGVAL(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, r700->SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All);
+ R600_OUT_BATCH_REGVAL(DB_DEBUG, r700->DB_DEBUG.u32All);
+ R600_OUT_BATCH_REGVAL(DB_WATERMARKS, r700->DB_WATERMARKS.u32All);
+
+ R600_OUT_BATCH_REGSEQ(SQ_ESGS_RING_ITEMSIZE, 9);
+ R600_OUT_BATCH(r700->SQ_ESGS_RING_ITEMSIZE.u32All);
+ R600_OUT_BATCH(r700->SQ_GSVS_RING_ITEMSIZE.u32All);
+ R600_OUT_BATCH(r700->SQ_ESTMP_RING_ITEMSIZE.u32All);
+ R600_OUT_BATCH(r700->SQ_GSTMP_RING_ITEMSIZE.u32All);
+ R600_OUT_BATCH(r700->SQ_VSTMP_RING_ITEMSIZE.u32All);
+ R600_OUT_BATCH(r700->SQ_PSTMP_RING_ITEMSIZE.u32All);
+ R600_OUT_BATCH(r700->SQ_FBUF_RING_ITEMSIZE.u32All);
+ R600_OUT_BATCH(r700->SQ_REDUC_RING_ITEMSIZE.u32All);
+ R600_OUT_BATCH(r700->SQ_GS_VERT_ITEMSIZE.u32All);
+ END_BATCH();
+
+ COMMIT_BATCH();
+}
+
+static void r700SendUCPState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ int i;
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ for (i = 0; i < R700_MAX_UCP; i++) {
+ if (r700->ucp[i].enabled) {
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH_REGSEQ(PA_CL_UCP_0_X + (16 * i), 4);
+ R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_X.u32All);
+ R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_Y.u32All);
+ R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_Z.u32All);
+ R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_W.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+ }
+ }
+}
+
+static void r700SendSPIState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ unsigned int ui;
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(59 + R700_MAX_SHADER_EXPORTS);
+
+ R600_OUT_BATCH_REGSEQ(SQ_VTX_SEMANTIC_0, 32);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_0.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_1.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_2.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_3.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_4.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_5.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_6.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_7.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_8.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_9.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_10.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_11.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_12.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_13.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_14.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_15.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_16.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_17.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_18.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_19.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_20.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_21.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_22.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_23.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_24.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_25.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_26.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_27.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_28.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_29.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_30.u32All);
+ R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_31.u32All);
+
+ R600_OUT_BATCH_REGSEQ(SPI_VS_OUT_ID_0, 10);
+ R600_OUT_BATCH(r700->SPI_VS_OUT_ID_0.u32All);
+ R600_OUT_BATCH(r700->SPI_VS_OUT_ID_1.u32All);
+ R600_OUT_BATCH(r700->SPI_VS_OUT_ID_2.u32All);
+ R600_OUT_BATCH(r700->SPI_VS_OUT_ID_3.u32All);
+ R600_OUT_BATCH(r700->SPI_VS_OUT_ID_4.u32All);
+ R600_OUT_BATCH(r700->SPI_VS_OUT_ID_5.u32All);
+ R600_OUT_BATCH(r700->SPI_VS_OUT_ID_6.u32All);
+ R600_OUT_BATCH(r700->SPI_VS_OUT_ID_7.u32All);
+ R600_OUT_BATCH(r700->SPI_VS_OUT_ID_8.u32All);
+ R600_OUT_BATCH(r700->SPI_VS_OUT_ID_9.u32All);
+
+ R600_OUT_BATCH_REGSEQ(SPI_VS_OUT_CONFIG, 9);
+ R600_OUT_BATCH(r700->SPI_VS_OUT_CONFIG.u32All);
+ R600_OUT_BATCH(r700->SPI_THREAD_GROUPING.u32All);
+ R600_OUT_BATCH(r700->SPI_PS_IN_CONTROL_0.u32All);
+ R600_OUT_BATCH(r700->SPI_PS_IN_CONTROL_1.u32All);
+ R600_OUT_BATCH(r700->SPI_INTERP_CONTROL_0.u32All);
+ R600_OUT_BATCH(r700->SPI_INPUT_Z.u32All);
+ R600_OUT_BATCH(r700->SPI_FOG_CNTL.u32All);
+ R600_OUT_BATCH(r700->SPI_FOG_FUNC_SCALE.u32All);
+ R600_OUT_BATCH(r700->SPI_FOG_FUNC_BIAS.u32All);
+
+ R600_OUT_BATCH_REGSEQ(SPI_PS_INPUT_CNTL_0, R700_MAX_SHADER_EXPORTS);
+ for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
+ R600_OUT_BATCH(r700->SPI_PS_INPUT_CNTL[ui].u32All);
+
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendVGTState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(41);
+
+ R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4);
+ R600_OUT_BATCH(r700->VGT_MAX_VTX_INDX.u32All);
+ R600_OUT_BATCH(r700->VGT_MIN_VTX_INDX.u32All);
+ R600_OUT_BATCH(r700->VGT_INDX_OFFSET.u32All);
+ R600_OUT_BATCH(r700->VGT_MULTI_PRIM_IB_RESET_INDX.u32All);
+
+ R600_OUT_BATCH_REGSEQ(VGT_OUTPUT_PATH_CNTL, 13);
+ R600_OUT_BATCH(r700->VGT_OUTPUT_PATH_CNTL.u32All);
+ R600_OUT_BATCH(r700->VGT_HOS_CNTL.u32All);
+ R600_OUT_BATCH(r700->VGT_HOS_MAX_TESS_LEVEL.u32All);
+ R600_OUT_BATCH(r700->VGT_HOS_MIN_TESS_LEVEL.u32All);
+ R600_OUT_BATCH(r700->VGT_HOS_REUSE_DEPTH.u32All);
+ R600_OUT_BATCH(r700->VGT_GROUP_PRIM_TYPE.u32All);
+ R600_OUT_BATCH(r700->VGT_GROUP_FIRST_DECR.u32All);
+ R600_OUT_BATCH(r700->VGT_GROUP_DECR.u32All);
+ R600_OUT_BATCH(r700->VGT_GROUP_VECT_0_CNTL.u32All);
+ R600_OUT_BATCH(r700->VGT_GROUP_VECT_1_CNTL.u32All);
+ R600_OUT_BATCH(r700->VGT_GROUP_VECT_0_FMT_CNTL.u32All);
+ R600_OUT_BATCH(r700->VGT_GROUP_VECT_1_FMT_CNTL.u32All);
+ R600_OUT_BATCH(r700->VGT_GS_MODE.u32All);
+
+ R600_OUT_BATCH_REGVAL(VGT_PRIMITIVEID_EN, r700->VGT_PRIMITIVEID_EN.u32All);
+ R600_OUT_BATCH_REGVAL(VGT_MULTI_PRIM_IB_RESET_EN, r700->VGT_MULTI_PRIM_IB_RESET_EN.u32All);
+ R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_0, r700->VGT_INSTANCE_STEP_RATE_0.u32All);
+ R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_1, r700->VGT_INSTANCE_STEP_RATE_1.u32All);
+
+ R600_OUT_BATCH_REGSEQ(VGT_STRMOUT_EN, 3);
+ R600_OUT_BATCH(r700->VGT_STRMOUT_EN.u32All);
+ R600_OUT_BATCH(r700->VGT_REUSE_OFF.u32All);
+ R600_OUT_BATCH(r700->VGT_VTX_CNT_EN.u32All);
+
+ R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, r700->VGT_STRMOUT_BUFFER_EN.u32All);
+
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendSXState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(9);
+ R600_OUT_BATCH_REGVAL(SX_MISC, r700->SX_MISC.u32All);
+ R600_OUT_BATCH_REGVAL(SX_ALPHA_TEST_CONTROL, r700->SX_ALPHA_TEST_CONTROL.u32All);
+ R600_OUT_BATCH_REGVAL(SX_ALPHA_REF, r700->SX_ALPHA_REF.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(23);
+ R600_OUT_BATCH_REGVAL(DB_HTILE_DATA_BASE, r700->DB_HTILE_DATA_BASE.u32All);
+
+ R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2);
+ R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All);
+ R600_OUT_BATCH(r700->DB_DEPTH_CLEAR.u32All);
+
+ R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, r700->DB_DEPTH_CONTROL.u32All);
+ R600_OUT_BATCH_REGVAL(DB_SHADER_CONTROL, r700->DB_SHADER_CONTROL.u32All);
+
+ R600_OUT_BATCH_REGSEQ(DB_RENDER_CONTROL, 2);
+ R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All);
+ R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All);
+
+ R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All);
+ R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All);
+
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendStencilState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ R600_OUT_BATCH_REGSEQ(DB_STENCILREFMASK, 2);
+ R600_OUT_BATCH(r700->DB_STENCILREFMASK.u32All);
+ R600_OUT_BATCH(r700->DB_STENCILREFMASK_BF.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendCBState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+ BEGIN_BATCH_NO_AUTOSTATE(11);
+ R600_OUT_BATCH_REGSEQ(CB_CLEAR_RED, 4);
+ R600_OUT_BATCH(r700->CB_CLEAR_RED_R6XX.u32All);
+ R600_OUT_BATCH(r700->CB_CLEAR_GREEN_R6XX.u32All);
+ R600_OUT_BATCH(r700->CB_CLEAR_BLUE_R6XX.u32All);
+ R600_OUT_BATCH(r700->CB_CLEAR_ALPHA_R6XX.u32All);
+ R600_OUT_BATCH_REGSEQ(CB_FOG_RED, 3);
+ R600_OUT_BATCH(r700->CB_FOG_RED_R6XX.u32All);
+ R600_OUT_BATCH(r700->CB_FOG_GREEN_R6XX.u32All);
+ R600_OUT_BATCH(r700->CB_FOG_BLUE_R6XX.u32All);
+ END_BATCH();
+ }
+
+ BEGIN_BATCH_NO_AUTOSTATE(7);
+ R600_OUT_BATCH_REGSEQ(CB_TARGET_MASK, 2);
+ R600_OUT_BATCH(r700->CB_TARGET_MASK.u32All);
+ R600_OUT_BATCH(r700->CB_SHADER_MASK.u32All);
+ R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, r700->CB_SHADER_CONTROL.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendCBCLRCMPState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH_REGSEQ(CB_CLRCMP_CONTROL, 4);
+ R600_OUT_BATCH(r700->CB_CLRCMP_CONTROL.u32All);
+ R600_OUT_BATCH(r700->CB_CLRCMP_SRC.u32All);
+ R600_OUT_BATCH(r700->CB_CLRCMP_DST.u32All);
+ R600_OUT_BATCH(r700->CB_CLRCMP_MSK.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendCBBlendState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ unsigned int ui;
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ R600_OUT_BATCH_REGVAL(CB_BLEND_CONTROL, r700->CB_BLEND_CONTROL.u32All);
+ END_BATCH();
+ }
+
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, r700->CB_COLOR_CONTROL.u32All);
+ END_BATCH();
+
+ if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
+ for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) {
+ if (r700->render_target[ui].enabled) {
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ R600_OUT_BATCH_REGVAL(CB_BLEND0_CONTROL + (4 * ui),
+ r700->render_target[ui].CB_BLEND0_CONTROL.u32All);
+ END_BATCH();
+ }
+ }
+ }
+
+ COMMIT_BATCH();
+}
+
+static void r700SendCBBlendColorState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH_REGSEQ(CB_BLEND_RED, 4);
+ R600_OUT_BATCH(r700->CB_BLEND_RED.u32All);
+ R600_OUT_BATCH(r700->CB_BLEND_GREEN.u32All);
+ R600_OUT_BATCH(r700->CB_BLEND_BLUE.u32All);
+ R600_OUT_BATCH(r700->CB_BLEND_ALPHA.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendSUState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(9);
+ R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, r700->PA_SU_SC_MODE_CNTL.u32All);
+ R600_OUT_BATCH_REGSEQ(PA_SU_POINT_SIZE, 4);
+ R600_OUT_BATCH(r700->PA_SU_POINT_SIZE.u32All);
+ R600_OUT_BATCH(r700->PA_SU_POINT_MINMAX.u32All);
+ R600_OUT_BATCH(r700->PA_SU_LINE_CNTL.u32All);
+ R600_OUT_BATCH(r700->PA_SU_VTX_CNTL.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+
+}
+
+static void r700SendPolyState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(10);
+ R600_OUT_BATCH_REGSEQ(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2);
+ R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All);
+ R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_CLAMP.u32All);
+ R600_OUT_BATCH_REGSEQ(PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_FRONT_SCALE.u32All);
+ R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.u32All);
+ R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_BACK_SCALE.u32All);
+ R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_BACK_OFFSET.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+
+}
+
+static void r700SendCLState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(12);
+ R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, r700->PA_CL_CLIP_CNTL.u32All);
+ R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, r700->PA_CL_VTE_CNTL.u32All);
+ R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, r700->PA_CL_VS_OUT_CNTL.u32All);
+ R600_OUT_BATCH_REGVAL(PA_CL_NANINF_CNTL, r700->PA_CL_NANINF_CNTL.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendGBState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH_REGSEQ(PA_CL_GB_VERT_CLIP_ADJ, 4);
+ R600_OUT_BATCH(r700->PA_CL_GB_VERT_CLIP_ADJ.u32All);
+ R600_OUT_BATCH(r700->PA_CL_GB_VERT_DISC_ADJ.u32All);
+ R600_OUT_BATCH(r700->PA_CL_GB_HORZ_CLIP_ADJ.u32All);
+ R600_OUT_BATCH(r700->PA_CL_GB_HORZ_DISC_ADJ.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendScissorState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(22);
+ R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2);
+ R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_TL.u32All);
+ R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_BR.u32All);
+
+ R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 12);
+ R600_OUT_BATCH(r700->PA_SC_WINDOW_OFFSET.u32All);
+ R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_TL.u32All);
+ R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_BR.u32All);
+ R600_OUT_BATCH(r700->PA_SC_CLIPRECT_RULE.u32All);
+ R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_TL.u32All);
+ R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_BR.u32All);
+ R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_TL.u32All);
+ R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_BR.u32All);
+ R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_TL.u32All);
+ R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_BR.u32All);
+ R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_TL.u32All);
+ R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_BR.u32All);
+
+ R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2);
+ R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_TL.u32All);
+ R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_BR.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendSCState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(15);
+ R600_OUT_BATCH_REGVAL(R7xx_PA_SC_EDGERULE, r700->PA_SC_EDGERULE.u32All);
+ R600_OUT_BATCH_REGVAL(PA_SC_LINE_STIPPLE, r700->PA_SC_LINE_STIPPLE.u32All);
+ R600_OUT_BATCH_REGVAL(PA_SC_MPASS_PS_CNTL, r700->PA_SC_MPASS_PS_CNTL.u32All);
+ R600_OUT_BATCH_REGVAL(PA_SC_MODE_CNTL, r700->PA_SC_MODE_CNTL.u32All);
+ R600_OUT_BATCH_REGVAL(PA_SC_LINE_CNTL, r700->PA_SC_LINE_CNTL.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendAAState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(12);
+ R600_OUT_BATCH_REGVAL(PA_SC_AA_CONFIG, r700->PA_SC_AA_CONFIG.u32All);
+ R600_OUT_BATCH_REGVAL(PA_SC_AA_SAMPLE_LOCS_MCTX, r700->PA_SC_AA_SAMPLE_LOCS_MCTX.u32All);
+ R600_OUT_BATCH_REGVAL(PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, r700->PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX.u32All);
+ R600_OUT_BATCH_REGVAL(PA_SC_AA_MASK, r700->PA_SC_AA_MASK.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendPSConsts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ int i;
+ BATCH_LOCALS(&context->radeon);
+
+ if (r700->ps.num_consts == 0)
+ return;
+
+ BEGIN_BATCH_NO_AUTOSTATE(2 + (r700->ps.num_consts * 4));
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, (r700->ps.num_consts * 4)));
+ /* assembler map const from very beginning. */
+ R600_OUT_BATCH(SQ_ALU_CONSTANT_PS_OFFSET * 4);
+ for (i = 0; i < r700->ps.num_consts; i++) {
+ R600_OUT_BATCH(r700->ps.consts[i][0].u32All);
+ R600_OUT_BATCH(r700->ps.consts[i][1].u32All);
+ R600_OUT_BATCH(r700->ps.consts[i][2].u32All);
+ R600_OUT_BATCH(r700->ps.consts[i][3].u32All);
+ }
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ int i;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ if (r700->vs.num_consts == 0)
+ return;
+
+ BEGIN_BATCH_NO_AUTOSTATE(2 + (r700->vs.num_consts * 4));
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, (r700->vs.num_consts * 4)));
+ /* assembler map const from very beginning. */
+ R600_OUT_BATCH(SQ_ALU_CONSTANT_VS_OFFSET * 4);
+ for (i = 0; i < r700->vs.num_consts; i++) {
+ R600_OUT_BATCH(r700->vs.consts[i][0].u32All);
+ R600_OUT_BATCH(r700->vs.consts[i][1].u32All);
+ R600_OUT_BATCH(r700->vs.consts[i][2].u32All);
+ R600_OUT_BATCH(r700->vs.consts[i][3].u32All);
+ }
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ return atom->cmd_size;
+}
+
+static int check_cb(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ int count = 7;
+
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+ count += 11;
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+ return count;
+}
+
+static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ unsigned int ui;
+ int count = 3;
+
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+ count += 3;
+
+ if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
+ for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) {
+ if (r700->render_target[ui].enabled)
+ count += 3;
+ }
+ }
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+ return count;
+}
+
+static int check_ucp(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ int i;
+ int count = 0;
+
+ for (i = 0; i < R700_MAX_UCP; i++) {
+ if (r700->ucp[i].enabled)
+ count += 6;
+ }
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+ return count;
+}
+
+static int check_vtx(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ int count = context->radeon.tcl.aos_count * 18;
+
+ if (count)
+ count += 6;
+
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+ return count;
+}
+
+static int check_tx(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ unsigned int i, count = 0;
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ radeonTexObj *t = r700->textures[i];
+ if (t)
+ count++;
+ }
+ }
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+ return count * 31;
+}
+
+static int check_ps_consts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ int count = r700->ps.num_consts * 4;
+
+ if (count)
+ count += 2;
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+ return count;
+}
+
+static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ int count = r700->vs.num_consts * 4;
+
+ if (count)
+ count += 2;
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+ return count;
+}
+
+#define ALLOC_STATE( ATOM, CHK, SZ, EMIT ) \
+do { \
+ context->atoms.ATOM.cmd_size = (SZ); \
+ context->atoms.ATOM.cmd = NULL; \
+ context->atoms.ATOM.name = #ATOM; \
+ context->atoms.ATOM.idx = 0; \
+ context->atoms.ATOM.check = check_##CHK; \
+ context->atoms.ATOM.dirty = GL_FALSE; \
+ context->atoms.ATOM.emit = (EMIT); \
+ context->radeon.hw.max_state_size += (SZ); \
+ insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \
+} while (0)
+
+void r600InitAtoms(context_t *context)
+{
+ radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context);
+ context->radeon.hw.max_state_size = 10 + 5 + 14; /* start 3d, idle, cb/db flush */
+
+ /* Setup the atom linked list */
+ make_empty_list(&context->radeon.hw.atomlist);
+ context->radeon.hw.atomlist.name = "atom-list";
+
+ ALLOC_STATE(sq, always, 34, r700SendSQConfig);
+ ALLOC_STATE(db, always, 23, r700SendDBState);
+ ALLOC_STATE(stencil, always, 4, r700SendStencilState);
+ ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState);
+ ALLOC_STATE(sc, always, 15, r700SendSCState);
+ ALLOC_STATE(scissor, always, 22, r700SendScissorState);
+ ALLOC_STATE(aa, always, 12, r700SendAAState);
+ ALLOC_STATE(cl, always, 12, r700SendCLState);
+ ALLOC_STATE(gb, always, 6, r700SendGBState);
+ ALLOC_STATE(ucp, ucp, (R700_MAX_UCP * 6), r700SendUCPState);
+ ALLOC_STATE(su, always, 9, r700SendSUState);
+ ALLOC_STATE(poly, always, 10, r700SendPolyState);
+ ALLOC_STATE(cb, cb, 18, r700SendCBState);
+ ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
+ ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
+ ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
+ ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
+ ALLOC_STATE(sx, always, 9, r700SendSXState);
+ ALLOC_STATE(vgt, always, 41, r700SendVGTState);
+ ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState);
+ ALLOC_STATE(vpt, always, 16, r700SendViewportState);
+ ALLOC_STATE(fs, always, 18, r700SendFSState);
+ ALLOC_STATE(vs, always, 18, r700SendVSState);
+ ALLOC_STATE(ps, always, 21, r700SendPSState);
+ ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts);
+ ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts);
+ ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState);
+ ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
+ ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
+ ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
+
+ context->radeon.hw.is_dirty = GL_TRUE;
+ context->radeon.hw.all_dirty = GL_TRUE;
+}
diff --git a/r600/r700_chip.h b/r600/r700_chip.h
new file mode 100644
index 0000000..ae249e1
--- /dev/null
+++ b/r600/r700_chip.h
@@ -0,0 +1,503 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_CHIP_H_
+#define _R700_CHIP_H_
+
+#include "r600_context.h"
+
+#include "r600_reg.h"
+#include "r600_reg_auto_r6xx.h"
+#include "r600_reg_r6xx.h"
+#include "r600_reg_r7xx.h"
+
+#include "r700_chipoffset.h"
+
+#define SETfield(x, val, shift, mask) ( (x) = ((x) & ~(mask)) | ((val) << (shift)) ) /* u32All */
+#define CLEARfield(x, mask) ( (x) &= ~(mask) )
+#define SETbit(x, bit) ( (x) |= (bit) )
+#define CLEARbit(x, bit) ( (x) &= ~(bit) )
+
+#define R700_TEXTURE_NUMBERUNITS 16
+#define R700_MAX_RENDER_TARGETS 8
+#define R700_MAX_VIEWPORTS 16
+#define R700_MAX_SHADER_EXPORTS 32
+#define R700_MAX_UCP 6
+#define R700_MAX_DX9_CONSTS 256
+
+/* Enum not show in r600_*.h */
+
+#define FETCH_RESOURCE_STRIDE 7
+
+#define ASIC_CONFIG_BASE_INDEX 0x2000
+#define ASIC_CONTEXT_BASE_INDEX 0xA000
+#define ASIC_CTL_CONST_BASE_INDEX 0xF3FC
+
+
+enum
+{
+ SQ_ABSOLUTE = 0x00000000,
+ SQ_RELATIVE = 0x00000001,
+};
+
+enum
+{
+ SQ_ALU_SCL_210 = 0x00000000,
+ SQ_ALU_SCL_122 = 0x00000001,
+ SQ_ALU_SCL_212 = 0x00000002,
+ SQ_ALU_SCL_221 = 0x00000003,
+};
+
+enum
+{
+ SQ_TEX_UNNORMALIZED = 0x00000000,
+ SQ_TEX_NORMALIZED = 0x00000001,
+};
+
+enum
+{
+ SQ_CF_PIXEL_MRT0 = 0x00000000,
+ SQ_CF_PIXEL_MRT1 = 0x00000001,
+ SQ_CF_PIXEL_MRT2 = 0x00000002,
+ SQ_CF_PIXEL_MRT3 = 0x00000003,
+ SQ_CF_PIXEL_MRT4 = 0x00000004,
+ SQ_CF_PIXEL_MRT5 = 0x00000005,
+ SQ_CF_PIXEL_MRT6 = 0x00000006,
+ SQ_CF_PIXEL_MRT7 = 0x00000007,
+ SQ_CF_PIXEL_Z = 0x0000003d,
+};
+
+typedef enum ENUM_SQ_CF_ARRAY_BASE_POS {
+SQ_CF_POS_0 = 0x0000003c,
+SQ_CF_POS_1 = 0x0000003d,
+SQ_CF_POS_2 = 0x0000003e,
+SQ_CF_POS_3 = 0x0000003f,
+} ENUM_SQ_CF_ARRAY_BASE_POS;
+
+enum
+{
+ PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit = 23,
+};
+
+enum
+{
+ TEX_XYFilter_Point = 0x00000000,
+ TEX_XYFilter_Linear = 0x00000001,
+ TEX_XYFilter_Cubic = 0x00000002,
+ TEX_XYFilter_Cleartype = 0x00000003,
+
+ TEX_MipFilter_None = 0x00000000,
+ TEX_MipFilter_Point = 0x00000001,
+ TEX_MipFilter_Linear = 0x00000002,
+};
+
+enum
+{
+ SQ_EXPORT_WRITE = 0x00000000,
+ SQ_EXPORT_WRITE_IND = 0x00000001,
+ SQ_EXPORT_WRITE_ACK = 0x00000002,
+ SQ_EXPORT_WRITE_IND_ACK = 0x00000003,
+};
+
+/* --------------------------------- */
+
+enum
+{
+ R700_PM4_PACKET0_NOP = 0x00000000,
+ R700_PM4_PACKET1_NOP = 0x40000000,
+ R700_PM4_PACKET2_NOP = 0x80000000,
+ R700_PM4_PACKET3_NOP = 0xC0000000,
+};
+
+#define PM4_OPCODE_SET_INDEX_TYPE (R700_PM4_PACKET3_NOP | (IT_INDEX_TYPE << 8))
+
+#define PM4_OPCODE_DRAW_INDEX_AUTO (R700_PM4_PACKET3_NOP | (IT_DRAW_INDEX_AUTO << 8))
+#define PM4_OPCODE_DRAW_INDEX_IMMD (R700_PM4_PACKET3_NOP | (IT_DRAW_INDEX_IMMD << 8))
+#define PM4_OPCODE_WAIT_REG_MEM (R700_PM4_PACKET3_NOP | (IT_WAIT_REG_MEM << 8))
+#define PM4_OPCODE_SET_CONTEXT_REG (R700_PM4_PACKET3_NOP | (IT_SET_CONTEXT_REG << 8))
+#define PM4_OPCODE_SET_CONFIG_REG (R700_PM4_PACKET3_NOP | (IT_SET_CONFIG_REG << 8))
+#define PM4_OPCODE_SET_ALU_CONST (R700_PM4_PACKET3_NOP | (IT_SET_ALU_CONST << 8))
+#define PM4_OPCODE_SET_RESOURCE (R700_PM4_PACKET3_NOP | (IT_SET_RESOURCE << 8))
+#define PM4_OPCODE_SET_SAMPLER (R700_PM4_PACKET3_NOP | (IT_SET_SAMPLER << 8))
+#define PM4_OPCODE_CONTEXT_CONTROL (R700_PM4_PACKET3_NOP | (IT_CONTEXT_CONTROL << 8))
+
+union UINT_FLOAT
+{
+ unsigned int u32All;
+ float f32All;
+};
+
+#if 0
+typedef struct _TEXTURE_STATE_STRUCT
+{
+ union UINT_FLOAT SQ_TEX_RESOURCE0;
+ union UINT_FLOAT SQ_TEX_RESOURCE1;
+ union UINT_FLOAT SQ_TEX_RESOURCE2;
+ union UINT_FLOAT SQ_TEX_RESOURCE3;
+ union UINT_FLOAT SQ_TEX_RESOURCE4;
+ union UINT_FLOAT SQ_TEX_RESOURCE5;
+ union UINT_FLOAT SQ_TEX_RESOURCE6;
+ GLboolean enabled;
+} TEXTURE_STATE_STRUCT;
+
+typedef struct _SAMPLER_STATE_STRUCT
+{
+ union UINT_FLOAT SQ_TEX_SAMPLER0;
+ union UINT_FLOAT SQ_TEX_SAMPLER1;
+ union UINT_FLOAT SQ_TEX_SAMPLER2;
+ GLboolean enabled;
+} SAMPLER_STATE_STRUCT;
+
+typedef struct _R700_TEXTURE_STATES
+{
+ TEXTURE_STATE_STRUCT *textures[R700_TEXTURE_NUMBERUNITS];
+ SAMPLER_STATE_STRUCT *samplers[R700_TEXTURE_NUMBERUNITS];
+} R700_TEXTURE_STATES;
+#endif
+
+typedef struct _RENDER_TARGET_STATE_STRUCT
+{
+ union UINT_FLOAT CB_COLOR0_BASE; /* 0xA010 */
+ union UINT_FLOAT CB_COLOR0_SIZE; /* 0xA018 */
+ union UINT_FLOAT CB_COLOR0_VIEW; /* 0xA020 */
+ union UINT_FLOAT CB_COLOR0_INFO; /* 0xA028 */
+ union UINT_FLOAT CB_COLOR0_TILE; /* 0xA030 */
+ union UINT_FLOAT CB_COLOR0_FRAG; /* 0xA038 */
+ union UINT_FLOAT CB_COLOR0_MASK; /* 0xA040 */
+ union UINT_FLOAT CB_BLEND0_CONTROL; /* 0xA1E0 */
+ GLboolean enabled;
+ GLboolean dirty;
+} RENDER_TARGET_STATE_STRUCT;
+
+typedef struct _VIEWPORT_STATE_STRUCT
+{
+ union UINT_FLOAT PA_SC_VPORT_SCISSOR_0_TL; /* 0xA094 */
+ union UINT_FLOAT PA_SC_VPORT_SCISSOR_0_BR; /* 0xA095 */
+ union UINT_FLOAT PA_SC_VPORT_ZMIN_0; /* 0xA0B4 */
+ union UINT_FLOAT PA_SC_VPORT_ZMAX_0; /* 0xA0B5 */
+ union UINT_FLOAT PA_CL_VPORT_XSCALE; /* 0xA10F */
+ union UINT_FLOAT PA_CL_VPORT_XOFFSET; /* 0xA110 */
+ union UINT_FLOAT PA_CL_VPORT_YSCALE; /* 0xA111 */
+ union UINT_FLOAT PA_CL_VPORT_YOFFSET; /* 0xA112 */
+ union UINT_FLOAT PA_CL_VPORT_ZSCALE; /* 0xA113 */
+ union UINT_FLOAT PA_CL_VPORT_ZOFFSET; /* 0xA114 */
+ GLboolean enabled;
+ GLboolean dirty;
+} VIEWPORT_STATE_STRUCT;
+
+typedef struct _UCP_STATE_STRUCT
+{
+ union UINT_FLOAT PA_CL_UCP_0_X;
+ union UINT_FLOAT PA_CL_UCP_0_Y;
+ union UINT_FLOAT PA_CL_UCP_0_Z;
+ union UINT_FLOAT PA_CL_UCP_0_W;
+ GLboolean enabled;
+ GLboolean dirty;
+} UCP_STATE_STRUCT;
+
+typedef struct _PS_STATE_STRUCT
+{
+ union UINT_FLOAT SQ_PGM_START_PS ; /* 0xA210 */
+ union UINT_FLOAT SQ_PGM_RESOURCES_PS ; /* 0xA214 */
+ union UINT_FLOAT SQ_PGM_EXPORTS_PS ; /* 0xA215 */
+ union UINT_FLOAT SQ_PGM_CF_OFFSET_PS ; /* 0xA233 */
+ GLboolean dirty;
+ int num_consts;
+ union UINT_FLOAT consts[R700_MAX_DX9_CONSTS][4];
+} PS_STATE_STRUCT;
+
+typedef struct _VS_STATE_STRUCT
+{
+ union UINT_FLOAT SQ_PGM_START_VS ; /* 0xA216 */
+ union UINT_FLOAT SQ_PGM_RESOURCES_VS ; /* 0xA21A */
+ union UINT_FLOAT SQ_PGM_CF_OFFSET_VS ; /* 0xA234 */
+ GLboolean dirty;
+ int num_consts;
+ union UINT_FLOAT consts[R700_MAX_DX9_CONSTS][4];
+} VS_STATE_STRUCT;
+
+typedef struct _GS_STATE_STRUCT
+{
+ union UINT_FLOAT SQ_PGM_START_GS ; /* 0xA21B */
+ union UINT_FLOAT SQ_PGM_RESOURCES_GS ; /* 0xA21F */
+ union UINT_FLOAT SQ_PGM_CF_OFFSET_GS ; /* 0xA235 */
+ GLboolean dirty;
+} GS_STATE_STRUCT;
+
+typedef struct _ES_STATE_STRUCT
+{
+ union UINT_FLOAT SQ_PGM_START_ES ; /* 0xA220 */
+ union UINT_FLOAT SQ_PGM_RESOURCES_ES ; /* 0xA224 */
+ union UINT_FLOAT SQ_PGM_CF_OFFSET_ES ; /* 0xA236 */
+ GLboolean dirty;
+} ES_STATE_STRUCT;
+
+typedef struct _FS_STATE_STRUCT
+{
+ union UINT_FLOAT SQ_PGM_START_FS ; /* 0xA225 */
+ union UINT_FLOAT SQ_PGM_RESOURCES_FS ; /* 0xA229 */
+ union UINT_FLOAT SQ_PGM_CF_OFFSET_FS ; /* 0xA237 */
+ GLboolean dirty;
+} FS_STATE_STRUCT;
+
+typedef struct _SQ_CONFIG_STRUCT
+{
+ union UINT_FLOAT SQ_CONFIG ; /* 0x2300 */
+ union UINT_FLOAT SQ_GPR_RESOURCE_MGMT_1 ; /* 0x2301 */
+ union UINT_FLOAT SQ_GPR_RESOURCE_MGMT_2 ; /* 0x2302 */
+ union UINT_FLOAT SQ_THREAD_RESOURCE_MGMT ; /* 0x2303 */
+ union UINT_FLOAT SQ_STACK_RESOURCE_MGMT_1 ; /* 0x2304 */
+ union UINT_FLOAT SQ_STACK_RESOURCE_MGMT_2 ; /* 0x2305 */
+} SQ_CONFIG_STRUCT;
+
+typedef struct _R700_CHIP_CONTEXT
+{
+ // DB
+ union UINT_FLOAT DB_DEPTH_SIZE ; /* 0xA000 */
+ union UINT_FLOAT DB_DEPTH_VIEW ; /* 0xA001 */
+ union UINT_FLOAT DB_DEPTH_BASE ; /* 0xA003 */
+ union UINT_FLOAT DB_DEPTH_INFO ; /* 0xA004 */
+ GLboolean db_target_dirty;
+ union UINT_FLOAT DB_HTILE_DATA_BASE ; /* 0xA005 */
+ union UINT_FLOAT DB_STENCIL_CLEAR ; /* 0xA00A */
+ union UINT_FLOAT DB_DEPTH_CLEAR ; /* 0xA00B */
+ union UINT_FLOAT DB_STENCILREFMASK ; /* 0xA10C */
+ union UINT_FLOAT DB_STENCILREFMASK_BF ; /* 0xA10D */
+ union UINT_FLOAT DB_RENDER_CONTROL ; /* 0xA343 */
+ union UINT_FLOAT DB_RENDER_OVERRIDE ; /* 0xA344 */
+ union UINT_FLOAT DB_HTILE_SURFACE ; /* 0xA349 */
+ union UINT_FLOAT DB_ALPHA_TO_MASK ; /* 0xA351 */
+ union UINT_FLOAT DB_DEPTH_CONTROL ; /* 0xA200 */
+ union UINT_FLOAT DB_SHADER_CONTROL ; /* 0xA203 */
+ GLboolean db_dirty;
+
+ // SC
+ union UINT_FLOAT PA_SC_SCREEN_SCISSOR_TL ; /* 0xA00C */
+ union UINT_FLOAT PA_SC_SCREEN_SCISSOR_BR ; /* 0xA00D */
+ union UINT_FLOAT PA_SC_WINDOW_OFFSET ; /* 0xA080 */
+ union UINT_FLOAT PA_SC_WINDOW_SCISSOR_TL ; /* 0xA081 */
+ union UINT_FLOAT PA_SC_WINDOW_SCISSOR_BR ; /* 0xA082 */
+ union UINT_FLOAT PA_SC_CLIPRECT_RULE ; /* 0xA083 */
+ union UINT_FLOAT PA_SC_CLIPRECT_0_TL ; /* 0xA084 */
+ union UINT_FLOAT PA_SC_CLIPRECT_0_BR ; /* 0xA085 */
+ union UINT_FLOAT PA_SC_CLIPRECT_1_TL ; /* 0xA086 */
+ union UINT_FLOAT PA_SC_CLIPRECT_1_BR ; /* 0xA087 */
+ union UINT_FLOAT PA_SC_CLIPRECT_2_TL ; /* 0xA088 */
+ union UINT_FLOAT PA_SC_CLIPRECT_2_BR ; /* 0xA089 */
+ union UINT_FLOAT PA_SC_CLIPRECT_3_TL ; /* 0xA08A */
+ union UINT_FLOAT PA_SC_CLIPRECT_3_BR ; /* 0xA08B */
+ union UINT_FLOAT PA_SC_EDGERULE ; /* 0xA08C */
+ union UINT_FLOAT PA_SC_GENERIC_SCISSOR_TL ; /* 0xA090 */
+ union UINT_FLOAT PA_SC_GENERIC_SCISSOR_BR ; /* 0xA091 */
+ GLboolean scissor_dirty;
+
+ union UINT_FLOAT PA_SC_LINE_STIPPLE ; /* 0xA283 */
+ union UINT_FLOAT PA_SC_LINE_CNTL ; /* 0xA300 */
+ union UINT_FLOAT PA_SC_AA_CONFIG ; /* 0xA301 */
+ union UINT_FLOAT PA_SC_MPASS_PS_CNTL ; /* 0xA292 */
+ union UINT_FLOAT PA_SC_MODE_CNTL ; /* 0xA293 */
+ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_MCTX ; /* 0xA307 */
+ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX; /* 0xA308 */
+ union UINT_FLOAT PA_SC_AA_MASK ; /* 0xA312 */
+ GLboolean sc_dirty;
+
+ // CL
+ union UINT_FLOAT PA_CL_CLIP_CNTL ; /* 0xA204 */
+ union UINT_FLOAT PA_CL_VTE_CNTL ; /* 0xA206 */
+ union UINT_FLOAT PA_CL_VS_OUT_CNTL ; /* 0xA207 */
+ union UINT_FLOAT PA_CL_NANINF_CNTL ; /* 0xA208 */
+ union UINT_FLOAT PA_CL_GB_VERT_CLIP_ADJ ; /* 0xA303 */
+ union UINT_FLOAT PA_CL_GB_VERT_DISC_ADJ ; /* 0xA304 */
+ union UINT_FLOAT PA_CL_GB_HORZ_CLIP_ADJ ; /* 0xA305 */
+ union UINT_FLOAT PA_CL_GB_HORZ_DISC_ADJ ; /* 0xA306 */
+ GLboolean cl_dirty;
+
+ // SU
+ union UINT_FLOAT PA_SU_SC_MODE_CNTL ; /* 0xA205 */
+ union UINT_FLOAT PA_SU_POINT_SIZE ; /* 0xA280 */
+ union UINT_FLOAT PA_SU_POINT_MINMAX ; /* 0xA281 */
+ union UINT_FLOAT PA_SU_LINE_CNTL ; /* 0xA282 */
+ union UINT_FLOAT PA_SU_VTX_CNTL ; /* 0xA302 */
+ union UINT_FLOAT PA_SU_POLY_OFFSET_DB_FMT_CNTL; /* 0xA37E */
+ union UINT_FLOAT PA_SU_POLY_OFFSET_CLAMP ; /* 0xA37F */
+ union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_SCALE; /* 0xA380 */
+ union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_OFFSET; /* 0xA381 */
+ union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_SCALE; /* 0xA382 */
+ union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_OFFSET; /* 0xA383 */
+ GLboolean su_dirty;
+
+ VIEWPORT_STATE_STRUCT viewport[R700_MAX_VIEWPORTS];
+ UCP_STATE_STRUCT ucp[R700_MAX_UCP];
+
+ // CB
+ union UINT_FLOAT CB_CLEAR_RED_R6XX ; /* 0xA048 */
+ union UINT_FLOAT CB_CLEAR_GREEN_R6XX ; /* 0xA049 */
+ union UINT_FLOAT CB_CLEAR_BLUE_R6XX ; /* 0xA04A */
+ union UINT_FLOAT CB_CLEAR_ALPHA_R6XX ; /* 0xA04B */
+ union UINT_FLOAT CB_TARGET_MASK ; /* 0xA08E */
+ union UINT_FLOAT CB_SHADER_MASK ; /* 0xA08F */
+ union UINT_FLOAT CB_BLEND_RED ; /* 0xA105 */
+ union UINT_FLOAT CB_BLEND_GREEN ; /* 0xA106 */
+ union UINT_FLOAT CB_BLEND_BLUE ; /* 0xA107 */
+ union UINT_FLOAT CB_BLEND_ALPHA ; /* 0xA108 */
+ union UINT_FLOAT CB_FOG_RED_R6XX ; /* 0xA109 */
+ union UINT_FLOAT CB_FOG_GREEN_R6XX ; /* 0xA10A */
+ union UINT_FLOAT CB_FOG_BLUE_R6XX ; /* 0xA10B */
+ union UINT_FLOAT CB_SHADER_CONTROL ; /* 0xA1E8 */
+ union UINT_FLOAT CB_COLOR_CONTROL ; /* 0xA202 */
+ union UINT_FLOAT CB_CLRCMP_CONTROL ; /* 0xA30C */
+ union UINT_FLOAT CB_CLRCMP_SRC ; /* 0xA30D */
+ union UINT_FLOAT CB_CLRCMP_DST ; /* 0xA30E */
+ union UINT_FLOAT CB_CLRCMP_MSK ; /* 0xA30F */
+ union UINT_FLOAT CB_BLEND_CONTROL ; /* 0xABD0 */
+ GLboolean cb_dirty;
+ RENDER_TARGET_STATE_STRUCT render_target[R700_MAX_RENDER_TARGETS];
+
+ // SX
+ union UINT_FLOAT SX_MISC ; /* 0xA0D4 */
+ union UINT_FLOAT SX_ALPHA_TEST_CONTROL ; /* 0xA104 */
+ union UINT_FLOAT SX_ALPHA_REF ; /* 0xA10E */
+ GLboolean sx_dirty;
+
+ // VGT
+ union UINT_FLOAT VGT_MAX_VTX_INDX ; /* 0xA100 */
+ union UINT_FLOAT VGT_MIN_VTX_INDX ; /* 0xA101 */
+ union UINT_FLOAT VGT_INDX_OFFSET ; /* 0xA102 */
+ union UINT_FLOAT VGT_MULTI_PRIM_IB_RESET_INDX; /* 0xA103 */
+ union UINT_FLOAT VGT_OUTPUT_PATH_CNTL ; /* 0xA284 */
+ union UINT_FLOAT VGT_HOS_CNTL ; /* 0xA285 */
+ union UINT_FLOAT VGT_HOS_MAX_TESS_LEVEL ; /* 0xA286 */
+ union UINT_FLOAT VGT_HOS_MIN_TESS_LEVEL ; /* 0xA287 */
+ union UINT_FLOAT VGT_HOS_REUSE_DEPTH ; /* 0xA288 */
+ union UINT_FLOAT VGT_GROUP_PRIM_TYPE ; /* 0xA289 */
+ union UINT_FLOAT VGT_GROUP_FIRST_DECR ; /* 0xA28A */
+ union UINT_FLOAT VGT_GROUP_DECR ; /* 0xA28B */
+ union UINT_FLOAT VGT_GROUP_VECT_0_CNTL ; /* 0xA28C */
+ union UINT_FLOAT VGT_GROUP_VECT_1_CNTL ; /* 0xA28D */
+ union UINT_FLOAT VGT_GROUP_VECT_0_FMT_CNTL ; /* 0xA28E */
+ union UINT_FLOAT VGT_GROUP_VECT_1_FMT_CNTL ; /* 0xA28F */
+ union UINT_FLOAT VGT_GS_MODE ; /* 0xA290 */
+ union UINT_FLOAT VGT_PRIMITIVEID_EN ; /* 0xA2A1 */
+ union UINT_FLOAT VGT_MULTI_PRIM_IB_RESET_EN; /* 0xA2A5 */
+ union UINT_FLOAT VGT_INSTANCE_STEP_RATE_0 ; /* 0xA2A8 */
+ union UINT_FLOAT VGT_INSTANCE_STEP_RATE_1 ; /* 0xA2A9 */
+ union UINT_FLOAT VGT_STRMOUT_EN ; /* 0xA2AC */
+ union UINT_FLOAT VGT_REUSE_OFF ; /* 0xA2AD */
+ union UINT_FLOAT VGT_VTX_CNT_EN ; /* 0xA2AE */
+ union UINT_FLOAT VGT_STRMOUT_BUFFER_EN ; /* 0xA2C8 */
+ GLboolean vgt_dirty;
+
+ // SPI
+ union UINT_FLOAT SPI_VS_OUT_ID_0 ; /* 0xA185 */
+ union UINT_FLOAT SPI_VS_OUT_ID_1 ; /* 0xA186 */
+ union UINT_FLOAT SPI_VS_OUT_ID_2 ; /* 0xA187 */
+ union UINT_FLOAT SPI_VS_OUT_ID_3 ; /* 0xA188 */
+ union UINT_FLOAT SPI_VS_OUT_ID_4 ; /* 0xA189 */
+ union UINT_FLOAT SPI_VS_OUT_ID_5 ; /* 0xA18A */
+ union UINT_FLOAT SPI_VS_OUT_ID_6 ; /* 0xA18B */
+ union UINT_FLOAT SPI_VS_OUT_ID_7 ; /* 0xA18C */
+ union UINT_FLOAT SPI_VS_OUT_ID_8 ; /* 0xA18D */
+ union UINT_FLOAT SPI_VS_OUT_ID_9 ; /* 0xA18E */
+ union UINT_FLOAT SPI_VS_OUT_CONFIG ; /* 0xA1B1 */
+ union UINT_FLOAT SPI_THREAD_GROUPING ; /* 0xA1B2 */
+ union UINT_FLOAT SPI_PS_IN_CONTROL_0 ; /* 0xA1B3 */
+ union UINT_FLOAT SPI_PS_IN_CONTROL_1 ; /* 0xA1B4 */
+ union UINT_FLOAT SPI_INTERP_CONTROL_0 ; /* 0xA1B5 */
+ union UINT_FLOAT SPI_INPUT_Z ; /* 0xA1B6 */
+ union UINT_FLOAT SPI_FOG_CNTL ; /* 0xA1B7 */
+ union UINT_FLOAT SPI_FOG_FUNC_SCALE ; /* 0xA1B8 */
+ union UINT_FLOAT SPI_FOG_FUNC_BIAS ; /* 0xA1B9 */
+
+ union UINT_FLOAT SQ_VTX_SEMANTIC_0 ; /* 0xA0E0 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_1 ; /* 0xA0E1 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_2 ; /* 0xA0E2 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_3 ; /* 0xA0E3 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_4 ; /* 0xA0E4 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_5 ; /* 0xA0E5 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_6 ; /* 0xA0E6 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_7 ; /* 0xA0E7 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_8 ; /* 0xA0E8 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_9 ; /* 0xA0E9 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_10 ; /* 0xA0EA */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_11 ; /* 0xA0EB */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_12 ; /* 0xA0EC */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_13 ; /* 0xA0ED */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_14 ; /* 0xA0EE */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_15 ; /* 0xA0EF */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_16 ; /* 0xA0F0 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_17 ; /* 0xA0F1 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_18 ; /* 0xA0F2 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_19 ; /* 0xA0F3 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_20 ; /* 0xA0F4 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_21 ; /* 0xA0F5 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_22 ; /* 0xA0F6 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_23 ; /* 0xA0F7 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_24 ; /* 0xA0F8 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_25 ; /* 0xA0F9 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_26 ; /* 0xA0FA */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_27 ; /* 0xA0FB */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_28 ; /* 0xA0FC */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_29 ; /* 0xA0FD */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_30 ; /* 0xA0FE */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_31 ; /* 0xA0FF */
+ union UINT_FLOAT SPI_PS_INPUT_CNTL[R700_MAX_SHADER_EXPORTS];
+ GLboolean spi_dirty;
+
+ // shaders
+ PS_STATE_STRUCT ps;
+ VS_STATE_STRUCT vs;
+ GS_STATE_STRUCT gs;
+ ES_STATE_STRUCT es;
+ FS_STATE_STRUCT fs;
+
+ // SQ CONFIG
+ SQ_CONFIG_STRUCT sq_config;
+ // misc
+ union UINT_FLOAT TA_CNTL_AUX ; /* 0x2542 */
+ union UINT_FLOAT VC_ENHANCE ; /* 0x25C5 */
+ union UINT_FLOAT SQ_DYN_GPR_CNTL_PS_FLUSH_REQ; /* 0x2363 */
+ union UINT_FLOAT DB_DEBUG ; /* 0x260C */
+ union UINT_FLOAT DB_WATERMARKS ; /* 0x260E */
+ // SQ
+ union UINT_FLOAT SQ_ESGS_RING_ITEMSIZE ; /* 0xA22A */
+ union UINT_FLOAT SQ_GSVS_RING_ITEMSIZE ; /* 0xA22B */
+ union UINT_FLOAT SQ_ESTMP_RING_ITEMSIZE ; /* 0xA22C */
+ union UINT_FLOAT SQ_GSTMP_RING_ITEMSIZE ; /* 0xA22D */
+ union UINT_FLOAT SQ_VSTMP_RING_ITEMSIZE ; /* 0xA22E */
+ union UINT_FLOAT SQ_PSTMP_RING_ITEMSIZE ; /* 0xA22F */
+ union UINT_FLOAT SQ_FBUF_RING_ITEMSIZE ; /* 0xA230 */
+ union UINT_FLOAT SQ_REDUC_RING_ITEMSIZE ; /* 0xA231 */
+ union UINT_FLOAT SQ_GS_VERT_ITEMSIZE ; /* 0xA232 */
+ GLboolean sq_dirty;
+
+ radeonTexObj* textures[R700_TEXTURE_NUMBERUNITS];
+
+ GLboolean bEnablePerspective;
+
+} R700_CHIP_CONTEXT;
+
+#endif /* _R700_CHIP_H_ */
+
diff --git a/r600/r700_chipoffset.h b/r600/r700_chipoffset.h
new file mode 100644
index 0000000..4d73fb9
--- /dev/null
+++ b/r600/r700_chipoffset.h
@@ -0,0 +1,693 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef _R700_CHIPOFFSET_H_
+#define _R700_CHIPOFFSET_H_
+
+#define mmWAIT_UNTIL 0x2010
+#define mmSCRATCH_REG0 0x2140
+#define mmGUI_SCRATCH_REG0 0x2140
+#define mmSCRATCH_REG1 0x2141
+#define mmGUI_SCRATCH_REG1 0x2141
+#define mmSCRATCH_REG2 0x2142
+#define mmGUI_SCRATCH_REG2 0x2142
+#define mmSCRATCH_REG3 0x2143
+#define mmGUI_SCRATCH_REG3 0x2143
+#define mmSCRATCH_REG4 0x2144
+#define mmGUI_SCRATCH_REG4 0x2144
+#define mmSCRATCH_REG5 0x2145
+#define mmGUI_SCRATCH_REG5 0x2145
+#define mmSCRATCH_REG6 0x2146
+#define mmGUI_SCRATCH_REG6 0x2146
+#define mmSCRATCH_REG7 0x2147
+#define mmGUI_SCRATCH_REG7 0x2147
+
+#define mmCP_COHER_CNTL 0x217C
+#define mmCP_COHER_SIZE 0x217D
+#define mmCP_COHER_BASE 0x217E
+#define mmCP_COHER_STATUS 0x217F
+
+#define mmTA_CNTL_AUX 0x2542
+#define mmVC_ENHANCE 0x25C5
+#define mmSQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x2363
+#define mmDB_DEBUG 0x260C
+#define mmDB_WATERMARKS 0x260E
+
+#define mmPA_CL_VPORT_XSCALE 0xA10F
+#define mmPA_CL_VPORT_XOFFSET 0xA110
+#define mmPA_CL_VPORT_YSCALE 0xA111
+#define mmPA_CL_VPORT_YOFFSET 0xA112
+#define mmPA_CL_VPORT_ZSCALE 0xA113
+#define mmPA_CL_VPORT_ZOFFSET 0xA114
+#define mmPA_CL_VPORT_XSCALE_1 0xA115
+#define mmPA_CL_VPORT_XSCALE_2 0xA11B
+#define mmPA_CL_VPORT_XSCALE_3 0xA121
+#define mmPA_CL_VPORT_XSCALE_4 0xA127
+#define mmPA_CL_VPORT_XSCALE_5 0xA12D
+#define mmPA_CL_VPORT_XSCALE_6 0xA133
+#define mmPA_CL_VPORT_XSCALE_7 0xA139
+#define mmPA_CL_VPORT_XSCALE_8 0xA13F
+#define mmPA_CL_VPORT_XSCALE_9 0xA145
+#define mmPA_CL_VPORT_XSCALE_10 0xA14B
+#define mmPA_CL_VPORT_XSCALE_11 0xA151
+#define mmPA_CL_VPORT_XSCALE_12 0xA157
+#define mmPA_CL_VPORT_XSCALE_13 0xA15D
+#define mmPA_CL_VPORT_XSCALE_14 0xA163
+#define mmPA_CL_VPORT_XSCALE_15 0xA169
+#define mmPA_CL_VPORT_XOFFSET_1 0xA116
+#define mmPA_CL_VPORT_XOFFSET_2 0xA11C
+#define mmPA_CL_VPORT_XOFFSET_3 0xA122
+#define mmPA_CL_VPORT_XOFFSET_4 0xA128
+#define mmPA_CL_VPORT_XOFFSET_5 0xA12E
+#define mmPA_CL_VPORT_XOFFSET_6 0xA134
+#define mmPA_CL_VPORT_XOFFSET_7 0xA13A
+#define mmPA_CL_VPORT_XOFFSET_8 0xA140
+#define mmPA_CL_VPORT_XOFFSET_9 0xA146
+#define mmPA_CL_VPORT_XOFFSET_10 0xA14C
+#define mmPA_CL_VPORT_XOFFSET_11 0xA152
+#define mmPA_CL_VPORT_XOFFSET_12 0xA158
+#define mmPA_CL_VPORT_XOFFSET_13 0xA15E
+#define mmPA_CL_VPORT_XOFFSET_14 0xA164
+#define mmPA_CL_VPORT_XOFFSET_15 0xA16A
+#define mmPA_CL_VPORT_YSCALE_1 0xA117
+#define mmPA_CL_VPORT_YSCALE_2 0xA11D
+#define mmPA_CL_VPORT_YSCALE_3 0xA123
+#define mmPA_CL_VPORT_YSCALE_4 0xA129
+#define mmPA_CL_VPORT_YSCALE_5 0xA12F
+#define mmPA_CL_VPORT_YSCALE_6 0xA135
+#define mmPA_CL_VPORT_YSCALE_7 0xA13B
+#define mmPA_CL_VPORT_YSCALE_8 0xA141
+#define mmPA_CL_VPORT_YSCALE_9 0xA147
+#define mmPA_CL_VPORT_YSCALE_10 0xA14D
+#define mmPA_CL_VPORT_YSCALE_11 0xA153
+#define mmPA_CL_VPORT_YSCALE_12 0xA159
+#define mmPA_CL_VPORT_YSCALE_13 0xA15F
+#define mmPA_CL_VPORT_YSCALE_14 0xA165
+#define mmPA_CL_VPORT_YSCALE_15 0xA16B
+#define mmPA_CL_VPORT_YOFFSET_1 0xA118
+#define mmPA_CL_VPORT_YOFFSET_2 0xA11E
+#define mmPA_CL_VPORT_YOFFSET_3 0xA124
+#define mmPA_CL_VPORT_YOFFSET_4 0xA12A
+#define mmPA_CL_VPORT_YOFFSET_5 0xA130
+#define mmPA_CL_VPORT_YOFFSET_6 0xA136
+#define mmPA_CL_VPORT_YOFFSET_7 0xA13C
+#define mmPA_CL_VPORT_YOFFSET_8 0xA142
+#define mmPA_CL_VPORT_YOFFSET_9 0xA148
+#define mmPA_CL_VPORT_YOFFSET_10 0xA14E
+#define mmPA_CL_VPORT_YOFFSET_11 0xA154
+#define mmPA_CL_VPORT_YOFFSET_12 0xA15A
+#define mmPA_CL_VPORT_YOFFSET_13 0xA160
+#define mmPA_CL_VPORT_YOFFSET_14 0xA166
+#define mmPA_CL_VPORT_YOFFSET_15 0xA16C
+#define mmPA_CL_VPORT_ZSCALE_1 0xA119
+#define mmPA_CL_VPORT_ZSCALE_2 0xA11F
+#define mmPA_CL_VPORT_ZSCALE_3 0xA125
+#define mmPA_CL_VPORT_ZSCALE_4 0xA12B
+#define mmPA_CL_VPORT_ZSCALE_5 0xA131
+#define mmPA_CL_VPORT_ZSCALE_6 0xA137
+#define mmPA_CL_VPORT_ZSCALE_7 0xA13D
+#define mmPA_CL_VPORT_ZSCALE_8 0xA143
+#define mmPA_CL_VPORT_ZSCALE_9 0xA149
+#define mmPA_CL_VPORT_ZSCALE_10 0xA14F
+#define mmPA_CL_VPORT_ZSCALE_11 0xA155
+#define mmPA_CL_VPORT_ZSCALE_12 0xA15B
+#define mmPA_CL_VPORT_ZSCALE_13 0xA161
+#define mmPA_CL_VPORT_ZSCALE_14 0xA167
+#define mmPA_CL_VPORT_ZSCALE_15 0xA16D
+#define mmPA_CL_VPORT_ZOFFSET_1 0xA11A
+#define mmPA_CL_VPORT_ZOFFSET_2 0xA120
+#define mmPA_CL_VPORT_ZOFFSET_3 0xA126
+#define mmPA_CL_VPORT_ZOFFSET_4 0xA12C
+#define mmPA_CL_VPORT_ZOFFSET_5 0xA132
+#define mmPA_CL_VPORT_ZOFFSET_6 0xA138
+#define mmPA_CL_VPORT_ZOFFSET_7 0xA13E
+#define mmPA_CL_VPORT_ZOFFSET_8 0xA144
+#define mmPA_CL_VPORT_ZOFFSET_9 0xA14A
+#define mmPA_CL_VPORT_ZOFFSET_10 0xA150
+#define mmPA_CL_VPORT_ZOFFSET_11 0xA156
+#define mmPA_CL_VPORT_ZOFFSET_12 0xA15C
+#define mmPA_CL_VPORT_ZOFFSET_13 0xA162
+#define mmPA_CL_VPORT_ZOFFSET_14 0xA168
+#define mmPA_CL_VPORT_ZOFFSET_15 0xA16E
+#define mmPA_CL_VTE_CNTL 0xA206
+#define mmPA_CL_VS_OUT_CNTL 0xA207
+#define mmPA_CL_NANINF_CNTL 0xA208
+#define mmPA_CL_CLIP_CNTL 0xA204
+#define mmPA_CL_GB_VERT_CLIP_ADJ 0xA303
+#define mmPA_CL_GB_VERT_DISC_ADJ 0xA304
+#define mmPA_CL_GB_HORZ_CLIP_ADJ 0xA305
+#define mmPA_CL_GB_HORZ_DISC_ADJ 0xA306
+#define mmPA_CL_UCP_0_X 0xA388
+#define mmPA_CL_UCP_0_Y 0xA389
+#define mmPA_CL_UCP_0_Z 0xA38A
+#define mmPA_CL_UCP_0_W 0xA38B
+#define mmPA_CL_UCP_1_X 0xA38C
+#define mmPA_CL_UCP_1_Y 0xA38D
+#define mmPA_CL_UCP_1_Z 0xA38E
+#define mmPA_CL_UCP_1_W 0xA38F
+#define mmPA_CL_UCP_2_X 0xA390
+#define mmPA_CL_UCP_2_Y 0xA391
+#define mmPA_CL_UCP_2_Z 0xA392
+#define mmPA_CL_UCP_2_W 0xA393
+#define mmPA_CL_UCP_3_X 0xA394
+#define mmPA_CL_UCP_3_Y 0xA395
+#define mmPA_CL_UCP_3_Z 0xA396
+#define mmPA_CL_UCP_3_W 0xA397
+#define mmPA_CL_UCP_4_X 0xA398
+#define mmPA_CL_UCP_4_Y 0xA399
+#define mmPA_CL_UCP_4_Z 0xA39A
+#define mmPA_CL_UCP_4_W 0xA39B
+#define mmPA_CL_UCP_5_X 0xA39C
+#define mmPA_CL_UCP_5_Y 0xA39D
+#define mmPA_CL_UCP_5_Z 0xA39E
+#define mmPA_CL_UCP_5_W 0xA39F
+#define mmPA_CL_POINT_X_RAD 0xA384
+#define mmPA_CL_POINT_Y_RAD 0xA385
+#define mmPA_CL_POINT_SIZE 0xA386
+#define mmPA_CL_POINT_CULL_RAD 0xA387
+
+#define mmPA_SU_VTX_CNTL 0xA302
+#define mmPA_SU_POINT_SIZE 0xA280
+#define mmPA_SU_POINT_MINMAX 0xA281
+#define mmPA_SU_LINE_CNTL 0xA282
+#define mmPA_SU_SC_MODE_CNTL 0xA205
+#define mmPA_SU_POLY_OFFSET_DB_FMT_CNTL 0xA37E
+#define mmPA_SU_POLY_OFFSET_CLAMP 0xA37F
+#define mmPA_SU_POLY_OFFSET_FRONT_SCALE 0xA380
+#define mmPA_SU_POLY_OFFSET_FRONT_OFFSET 0xA381
+#define mmPA_SU_POLY_OFFSET_BACK_SCALE 0xA382
+#define mmPA_SU_POLY_OFFSET_BACK_OFFSET 0xA383
+
+#define mmPA_SC_WINDOW_OFFSET 0xA080
+#define mmPA_SC_AA_CONFIG 0xA301
+#define mmPA_SC_AA_MASK 0xA312
+#define mmPA_SC_AA_SAMPLE_LOCS_MCTX 0xA307
+#define mmPA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX 0xA308
+#define mmPA_SC_LINE_STIPPLE 0xA283
+#define mmPA_SC_LINE_CNTL 0xA300
+#define mmPA_SC_SCREEN_SCISSOR_TL 0xA00C
+#define mmPA_SC_SCREEN_SCISSOR_BR 0xA00D
+#define mmPA_SC_WINDOW_SCISSOR_TL 0xA081
+#define mmPA_SC_WINDOW_SCISSOR_BR 0xA082
+#define mmPA_SC_CLIPRECT_RULE 0xA083
+#define mmPA_SC_CLIPRECT_0_TL 0xA084
+#define mmPA_SC_CLIPRECT_0_BR 0xA085
+#define mmPA_SC_CLIPRECT_1_TL 0xA086
+#define mmPA_SC_CLIPRECT_1_BR 0xA087
+#define mmPA_SC_CLIPRECT_2_TL 0xA088
+#define mmPA_SC_CLIPRECT_2_BR 0xA089
+#define mmPA_SC_CLIPRECT_3_TL 0xA08A
+#define mmPA_SC_CLIPRECT_3_BR 0xA08B
+#define mmPA_SC_EDGERULE 0xA08C
+#define mmPA_SC_GENERIC_SCISSOR_TL 0xA090
+#define mmPA_SC_GENERIC_SCISSOR_BR 0xA091
+#define mmPA_SC_VPORT_SCISSOR_0_TL 0xA094
+#define mmPA_SC_VPORT_SCISSOR_1_TL 0xA096
+#define mmPA_SC_VPORT_SCISSOR_2_TL 0xA098
+#define mmPA_SC_VPORT_SCISSOR_3_TL 0xA09A
+#define mmPA_SC_VPORT_SCISSOR_4_TL 0xA09C
+#define mmPA_SC_VPORT_SCISSOR_5_TL 0xA09E
+#define mmPA_SC_VPORT_SCISSOR_6_TL 0xA0A0
+#define mmPA_SC_VPORT_SCISSOR_7_TL 0xA0A2
+#define mmPA_SC_VPORT_SCISSOR_8_TL 0xA0A4
+#define mmPA_SC_VPORT_SCISSOR_9_TL 0xA0A6
+#define mmPA_SC_VPORT_SCISSOR_10_TL 0xA0A8
+#define mmPA_SC_VPORT_SCISSOR_11_TL 0xA0AA
+#define mmPA_SC_VPORT_SCISSOR_12_TL 0xA0AC
+#define mmPA_SC_VPORT_SCISSOR_13_TL 0xA0AE
+#define mmPA_SC_VPORT_SCISSOR_14_TL 0xA0B0
+#define mmPA_SC_VPORT_SCISSOR_15_TL 0xA0B2
+#define mmPA_SC_VPORT_SCISSOR_0_BR 0xA095
+#define mmPA_SC_VPORT_SCISSOR_1_BR 0xA097
+#define mmPA_SC_VPORT_SCISSOR_2_BR 0xA099
+#define mmPA_SC_VPORT_SCISSOR_3_BR 0xA09B
+#define mmPA_SC_VPORT_SCISSOR_4_BR 0xA09D
+#define mmPA_SC_VPORT_SCISSOR_5_BR 0xA09F
+#define mmPA_SC_VPORT_SCISSOR_6_BR 0xA0A1
+#define mmPA_SC_VPORT_SCISSOR_7_BR 0xA0A3
+#define mmPA_SC_VPORT_SCISSOR_8_BR 0xA0A5
+#define mmPA_SC_VPORT_SCISSOR_9_BR 0xA0A7
+#define mmPA_SC_VPORT_SCISSOR_10_BR 0xA0A9
+#define mmPA_SC_VPORT_SCISSOR_11_BR 0xA0AB
+#define mmPA_SC_VPORT_SCISSOR_12_BR 0xA0AD
+#define mmPA_SC_VPORT_SCISSOR_13_BR 0xA0AF
+#define mmPA_SC_VPORT_SCISSOR_14_BR 0xA0B1
+#define mmPA_SC_VPORT_SCISSOR_15_BR 0xA0B3
+#define mmPA_SC_VPORT_ZMIN_0 0xA0B4
+#define mmPA_SC_VPORT_ZMIN_1 0xA0B6
+#define mmPA_SC_VPORT_ZMIN_2 0xA0B8
+#define mmPA_SC_VPORT_ZMIN_3 0xA0BA
+#define mmPA_SC_VPORT_ZMIN_4 0xA0BC
+#define mmPA_SC_VPORT_ZMIN_5 0xA0BE
+#define mmPA_SC_VPORT_ZMIN_6 0xA0C0
+#define mmPA_SC_VPORT_ZMIN_7 0xA0C2
+#define mmPA_SC_VPORT_ZMIN_8 0xA0C4
+#define mmPA_SC_VPORT_ZMIN_9 0xA0C6
+#define mmPA_SC_VPORT_ZMIN_10 0xA0C8
+#define mmPA_SC_VPORT_ZMIN_11 0xA0CA
+#define mmPA_SC_VPORT_ZMIN_12 0xA0CC
+#define mmPA_SC_VPORT_ZMIN_13 0xA0CE
+#define mmPA_SC_VPORT_ZMIN_14 0xA0D0
+#define mmPA_SC_VPORT_ZMIN_15 0xA0D2
+#define mmPA_SC_VPORT_ZMAX_0 0xA0B5
+#define mmPA_SC_VPORT_ZMAX_1 0xA0B7
+#define mmPA_SC_VPORT_ZMAX_2 0xA0B9
+#define mmPA_SC_VPORT_ZMAX_3 0xA0BB
+#define mmPA_SC_VPORT_ZMAX_4 0xA0BD
+#define mmPA_SC_VPORT_ZMAX_5 0xA0BF
+#define mmPA_SC_VPORT_ZMAX_6 0xA0C1
+#define mmPA_SC_VPORT_ZMAX_7 0xA0C3
+#define mmPA_SC_VPORT_ZMAX_8 0xA0C5
+#define mmPA_SC_VPORT_ZMAX_9 0xA0C7
+#define mmPA_SC_VPORT_ZMAX_10 0xA0C9
+#define mmPA_SC_VPORT_ZMAX_11 0xA0CB
+#define mmPA_SC_VPORT_ZMAX_12 0xA0CD
+#define mmPA_SC_VPORT_ZMAX_13 0xA0CF
+#define mmPA_SC_VPORT_ZMAX_14 0xA0D1
+#define mmPA_SC_VPORT_ZMAX_15 0xA0D3
+#define mmPA_SC_MODE_CNTL 0xA293
+#define mmPA_SC_MPASS_PS_CNTL 0xA292
+
+#define mmVGT_DRAW_INITIATOR 0xA1FC
+#define mmVGT_EVENT_INITIATOR 0xA2A4
+#define mmVGT_EVENT_ADDRESS_REG 0xA1FE
+#define mmVGT_DMA_BASE_HI 0xA1F9
+#define mmVGT_DMA_BASE 0xA1FA
+#define mmVGT_DMA_INDEX_TYPE 0xA29F
+#define mmVGT_DMA_NUM_INSTANCES 0xA2A2
+#define mmVGT_DMA_SIZE 0xA29D
+
+#define mmVGT_IMMED_DATA 0xA1FD
+#define mmVGT_INDEX_TYPE 0x2257
+#define mmVGT_NUM_INDICES 0x225C
+#define mmVGT_NUM_INSTANCES 0x225D
+#define mmVGT_PRIMITIVE_TYPE 0x2256
+#define mmVGT_PRIMITIVEID_EN 0xA2A1
+#define mmVGT_VTX_CNT_EN 0xA2AE
+#define mmVGT_REUSE_OFF 0xA2AD
+#define mmVGT_INSTANCE_STEP_RATE_0 0xA2A8
+#define mmVGT_INSTANCE_STEP_RATE_1 0xA2A9
+#define mmVGT_MAX_VTX_INDX 0xA100
+#define mmVGT_MIN_VTX_INDX 0xA101
+#define mmVGT_INDX_OFFSET 0xA102
+#define mmVGT_VERTEX_REUSE_BLOCK_CNTL 0xA316
+#define mmVGT_OUT_DEALLOC_CNTL 0xA317
+#define mmVGT_MULTI_PRIM_IB_RESET_INDX 0xA103
+#define mmVGT_MULTI_PRIM_IB_RESET_EN 0xA2A5
+#define mmVGT_ENHANCE 0xA294
+#define mmVGT_OUTPUT_PATH_CNTL 0xA284
+#define mmVGT_HOS_CNTL 0xA285
+#define mmVGT_HOS_MAX_TESS_LEVEL 0xA286
+#define mmVGT_HOS_MIN_TESS_LEVEL 0xA287
+#define mmVGT_HOS_REUSE_DEPTH 0xA288
+#define mmVGT_GROUP_PRIM_TYPE 0xA289
+#define mmVGT_GROUP_FIRST_DECR 0xA28A
+#define mmVGT_GROUP_DECR 0xA28B
+#define mmVGT_GROUP_VECT_0_CNTL 0xA28C
+#define mmVGT_GROUP_VECT_1_CNTL 0xA28D
+#define mmVGT_GROUP_VECT_0_FMT_CNTL 0xA28E
+#define mmVGT_GROUP_VECT_1_FMT_CNTL 0xA28F
+#define mmVGT_GS_MODE 0xA290
+#define mmVGT_GS_OUT_PRIM_TYPE 0xA29B
+
+#define mmVGT_STRMOUT_EN 0xA2AC
+#define mmVGT_STRMOUT_BUFFER_SIZE_0 0xA2B4
+#define mmVGT_STRMOUT_BUFFER_SIZE_1 0xA2B8
+#define mmVGT_STRMOUT_BUFFER_SIZE_2 0xA2BC
+#define mmVGT_STRMOUT_BUFFER_SIZE_3 0xA2C0
+#define mmVGT_STRMOUT_BUFFER_OFFSET_0 0xA2B7
+#define mmVGT_STRMOUT_BUFFER_OFFSET_1 0xA2BB
+#define mmVGT_STRMOUT_BUFFER_OFFSET_2 0xA2BF
+#define mmVGT_STRMOUT_BUFFER_OFFSET_3 0xA2C3
+#define mmVGT_STRMOUT_VTX_STRIDE_0 0xA2B5
+#define mmVGT_STRMOUT_VTX_STRIDE_1 0xA2B9
+#define mmVGT_STRMOUT_VTX_STRIDE_2 0xA2BD
+#define mmVGT_STRMOUT_VTX_STRIDE_3 0xA2C1
+#define mmVGT_STRMOUT_BUFFER_BASE_0 0xA2B6
+#define mmVGT_STRMOUT_BUFFER_BASE_1 0xA2BA
+#define mmVGT_STRMOUT_BUFFER_BASE_2 0xA2BE
+#define mmVGT_STRMOUT_BUFFER_BASE_3 0xA2C2
+#define mmVGT_STRMOUT_BUFFER_EN 0xA2C8
+#define mmVGT_STRMOUT_BASE_OFFSET_0 0xA2C4
+#define mmVGT_STRMOUT_BASE_OFFSET_1 0xA2C5
+#define mmVGT_STRMOUT_BASE_OFFSET_2 0xA2C6
+#define mmVGT_STRMOUT_BASE_OFFSET_3 0xA2C7
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_0 0xA2D1
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_1 0xA2D2
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_2 0xA2D3
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_3 0xA2D4
+#define mmVGT_STRMOUT_DRAW_OPAQUE_OFFSET 0xA2CA
+#define mmVGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE 0xA2CB
+#define mmVGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE 0xA2CC
+
+#define mmSQ_PGM_START_PS 0xA210
+#define mmSQ_PGM_CF_OFFSET_PS 0xA233
+#define mmSQ_PGM_RESOURCES_PS 0xA214
+#define mmSQ_PGM_EXPORTS_PS 0xA215
+#define mmSQ_PGM_START_VS 0xA216
+#define mmSQ_PGM_CF_OFFSET_VS 0xA234
+#define mmSQ_PGM_RESOURCES_VS 0xA21A
+#define mmSQ_PGM_START_GS 0xA21B
+#define mmSQ_PGM_CF_OFFSET_GS 0xA235
+#define mmSQ_PGM_RESOURCES_GS 0xA21F
+#define mmSQ_PGM_START_ES 0xA220
+#define mmSQ_PGM_CF_OFFSET_ES 0xA236
+#define mmSQ_PGM_RESOURCES_ES 0xA224
+#define mmSQ_PGM_START_FS 0xA225
+#define mmSQ_PGM_CF_OFFSET_FS 0xA237
+#define mmSQ_PGM_RESOURCES_FS 0xA229
+#define mmSQ_ESGS_RING_ITEMSIZE 0xA22A
+#define mmSQ_GSVS_RING_ITEMSIZE 0xA22B
+#define mmSQ_ESTMP_RING_ITEMSIZE 0xA22C
+#define mmSQ_GSTMP_RING_ITEMSIZE 0xA22D
+#define mmSQ_VSTMP_RING_ITEMSIZE 0xA22E
+#define mmSQ_PSTMP_RING_ITEMSIZE 0xA22F
+#define mmSQ_FBUF_RING_ITEMSIZE 0xA230
+#define mmSQ_REDUC_RING_ITEMSIZE 0xA231
+#define mmSQ_GS_VERT_ITEMSIZE 0xA232
+#define mmSQ_VTX_SEMANTIC_CLEAR 0xA238
+
+#define mmSQ_VTX_SEMANTIC_0 0xA0E0
+#define mmSQ_VTX_SEMANTIC_1 0xA0E1
+#define mmSQ_VTX_SEMANTIC_2 0xA0E2
+#define mmSQ_VTX_SEMANTIC_3 0xA0E3
+#define mmSQ_VTX_SEMANTIC_4 0xA0E4
+#define mmSQ_VTX_SEMANTIC_5 0xA0E5
+#define mmSQ_VTX_SEMANTIC_6 0xA0E6
+#define mmSQ_VTX_SEMANTIC_7 0xA0E7
+#define mmSQ_VTX_SEMANTIC_8 0xA0E8
+#define mmSQ_VTX_SEMANTIC_9 0xA0E9
+#define mmSQ_VTX_SEMANTIC_10 0xA0EA
+#define mmSQ_VTX_SEMANTIC_11 0xA0EB
+#define mmSQ_VTX_SEMANTIC_12 0xA0EC
+#define mmSQ_VTX_SEMANTIC_13 0xA0ED
+#define mmSQ_VTX_SEMANTIC_14 0xA0EE
+#define mmSQ_VTX_SEMANTIC_15 0xA0EF
+#define mmSQ_VTX_SEMANTIC_16 0xA0F0
+#define mmSQ_VTX_SEMANTIC_17 0xA0F1
+#define mmSQ_VTX_SEMANTIC_18 0xA0F2
+#define mmSQ_VTX_SEMANTIC_19 0xA0F3
+#define mmSQ_VTX_SEMANTIC_20 0xA0F4
+#define mmSQ_VTX_SEMANTIC_21 0xA0F5
+#define mmSQ_VTX_SEMANTIC_22 0xA0F6
+#define mmSQ_VTX_SEMANTIC_23 0xA0F7
+#define mmSQ_VTX_SEMANTIC_24 0xA0F8
+#define mmSQ_VTX_SEMANTIC_25 0xA0F9
+#define mmSQ_VTX_SEMANTIC_26 0xA0FA
+#define mmSQ_VTX_SEMANTIC_27 0xA0FB
+#define mmSQ_VTX_SEMANTIC_28 0xA0FC
+#define mmSQ_VTX_SEMANTIC_29 0xA0FD
+#define mmSQ_VTX_SEMANTIC_30 0xA0FE
+#define mmSQ_VTX_SEMANTIC_31 0xA0FF
+
+#define mmSQ_ALU_CONST_CACHE_PS_0 0xA250
+#define mmSQ_ALU_CONST_CACHE_PS_1 0xA251
+#define mmSQ_ALU_CONST_CACHE_PS_2 0xA252
+#define mmSQ_ALU_CONST_CACHE_PS_3 0xA253
+#define mmSQ_ALU_CONST_CACHE_PS_4 0xA254
+#define mmSQ_ALU_CONST_CACHE_PS_5 0xA255
+#define mmSQ_ALU_CONST_CACHE_PS_6 0xA256
+#define mmSQ_ALU_CONST_CACHE_PS_7 0xA257
+#define mmSQ_ALU_CONST_CACHE_PS_8 0xA258
+#define mmSQ_ALU_CONST_CACHE_PS_9 0xA259
+#define mmSQ_ALU_CONST_CACHE_PS_10 0xA25A
+#define mmSQ_ALU_CONST_CACHE_PS_11 0xA25B
+#define mmSQ_ALU_CONST_CACHE_PS_12 0xA25C
+#define mmSQ_ALU_CONST_CACHE_PS_13 0xA25D
+#define mmSQ_ALU_CONST_CACHE_PS_14 0xA25E
+#define mmSQ_ALU_CONST_CACHE_PS_15 0xA25F
+#define mmSQ_ALU_CONST_CACHE_VS_0 0xA260
+#define mmSQ_ALU_CONST_CACHE_VS_1 0xA261
+#define mmSQ_ALU_CONST_CACHE_VS_2 0xA262
+#define mmSQ_ALU_CONST_CACHE_VS_3 0xA263
+#define mmSQ_ALU_CONST_CACHE_VS_4 0xA264
+#define mmSQ_ALU_CONST_CACHE_VS_5 0xA265
+#define mmSQ_ALU_CONST_CACHE_VS_6 0xA266
+#define mmSQ_ALU_CONST_CACHE_VS_7 0xA267
+#define mmSQ_ALU_CONST_CACHE_VS_8 0xA268
+#define mmSQ_ALU_CONST_CACHE_VS_9 0xA269
+#define mmSQ_ALU_CONST_CACHE_VS_10 0xA26A
+#define mmSQ_ALU_CONST_CACHE_VS_11 0xA26B
+#define mmSQ_ALU_CONST_CACHE_VS_12 0xA26C
+#define mmSQ_ALU_CONST_CACHE_VS_13 0xA26D
+#define mmSQ_ALU_CONST_CACHE_VS_14 0xA26E
+#define mmSQ_ALU_CONST_CACHE_VS_15 0xA26F
+#define mmSQ_ALU_CONST_CACHE_GS_0 0xA270
+#define mmSQ_ALU_CONST_CACHE_GS_1 0xA271
+#define mmSQ_ALU_CONST_CACHE_GS_2 0xA272
+#define mmSQ_ALU_CONST_CACHE_GS_3 0xA273
+#define mmSQ_ALU_CONST_CACHE_GS_4 0xA274
+#define mmSQ_ALU_CONST_CACHE_GS_5 0xA275
+#define mmSQ_ALU_CONST_CACHE_GS_6 0xA276
+#define mmSQ_ALU_CONST_CACHE_GS_7 0xA277
+#define mmSQ_ALU_CONST_CACHE_GS_8 0xA278
+#define mmSQ_ALU_CONST_CACHE_GS_9 0xA279
+#define mmSQ_ALU_CONST_CACHE_GS_10 0xA27A
+#define mmSQ_ALU_CONST_CACHE_GS_11 0xA27B
+#define mmSQ_ALU_CONST_CACHE_GS_12 0xA27C
+#define mmSQ_ALU_CONST_CACHE_GS_13 0xA27D
+#define mmSQ_ALU_CONST_CACHE_GS_14 0xA27E
+#define mmSQ_ALU_CONST_CACHE_GS_15 0xA27F
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_0 0xA050
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_1 0xA051
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_2 0xA052
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_3 0xA053
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_4 0xA054
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_5 0xA055
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_6 0xA056
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_7 0xA057
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_8 0xA058
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_9 0xA059
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_10 0xA05A
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_11 0xA05B
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_12 0xA05C
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_13 0xA05D
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_14 0xA05E
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_15 0xA05F
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_0 0xA060
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_1 0xA061
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_2 0xA062
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_3 0xA063
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_4 0xA064
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_5 0xA065
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_6 0xA066
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_7 0xA067
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_8 0xA068
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_9 0xA069
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_10 0xA06A
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_11 0xA06B
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_12 0xA06C
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_13 0xA06D
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_14 0xA06E
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_15 0xA06F
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_0 0xA070
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_1 0xA071
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_2 0xA072
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_3 0xA073
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_4 0xA074
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_5 0xA075
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_6 0xA076
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_7 0xA077
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_8 0xA078
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_9 0xA079
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_10 0xA07A
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_11 0xA07B
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_12 0xA07C
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_13 0xA07D
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_14 0xA07E
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_15 0xA07F
+
+#define mmSPI_VS_OUT_ID_0 0xA185
+#define mmSPI_VS_OUT_ID_1 0xA186
+#define mmSPI_VS_OUT_ID_2 0xA187
+#define mmSPI_VS_OUT_ID_3 0xA188
+#define mmSPI_VS_OUT_ID_4 0xA189
+#define mmSPI_VS_OUT_ID_5 0xA18A
+#define mmSPI_VS_OUT_ID_6 0xA18B
+#define mmSPI_VS_OUT_ID_7 0xA18C
+#define mmSPI_VS_OUT_ID_8 0xA18D
+#define mmSPI_VS_OUT_ID_9 0xA18E
+#define mmSPI_PS_INPUT_CNTL_0 0xA191
+#define mmSPI_PS_INPUT_CNTL_1 0xA192
+#define mmSPI_PS_INPUT_CNTL_2 0xA193
+#define mmSPI_PS_INPUT_CNTL_3 0xA194
+#define mmSPI_PS_INPUT_CNTL_4 0xA195
+#define mmSPI_PS_INPUT_CNTL_5 0xA196
+#define mmSPI_PS_INPUT_CNTL_6 0xA197
+#define mmSPI_PS_INPUT_CNTL_7 0xA198
+#define mmSPI_PS_INPUT_CNTL_8 0xA199
+#define mmSPI_PS_INPUT_CNTL_9 0xA19A
+#define mmSPI_PS_INPUT_CNTL_10 0xA19B
+#define mmSPI_PS_INPUT_CNTL_11 0xA19C
+#define mmSPI_PS_INPUT_CNTL_12 0xA19D
+#define mmSPI_PS_INPUT_CNTL_13 0xA19E
+#define mmSPI_PS_INPUT_CNTL_14 0xA19F
+#define mmSPI_PS_INPUT_CNTL_15 0xA1A0
+#define mmSPI_PS_INPUT_CNTL_16 0xA1A1
+#define mmSPI_PS_INPUT_CNTL_17 0xA1A2
+#define mmSPI_PS_INPUT_CNTL_18 0xA1A3
+#define mmSPI_PS_INPUT_CNTL_19 0xA1A4
+#define mmSPI_PS_INPUT_CNTL_20 0xA1A5
+#define mmSPI_PS_INPUT_CNTL_21 0xA1A6
+#define mmSPI_PS_INPUT_CNTL_22 0xA1A7
+#define mmSPI_PS_INPUT_CNTL_23 0xA1A8
+#define mmSPI_PS_INPUT_CNTL_24 0xA1A9
+#define mmSPI_PS_INPUT_CNTL_25 0xA1AA
+#define mmSPI_PS_INPUT_CNTL_26 0xA1AB
+#define mmSPI_PS_INPUT_CNTL_27 0xA1AC
+#define mmSPI_PS_INPUT_CNTL_28 0xA1AD
+#define mmSPI_PS_INPUT_CNTL_29 0xA1AE
+#define mmSPI_PS_INPUT_CNTL_30 0xA1AF
+#define mmSPI_PS_INPUT_CNTL_31 0xA1B0
+#define mmSPI_VS_OUT_CONFIG 0xA1B1
+#define mmSPI_THREAD_GROUPING 0xA1B2
+#define mmSPI_PS_IN_CONTROL_0 0xA1B3
+#define mmSPI_PS_IN_CONTROL_1 0xA1B4
+#define mmSPI_INTERP_CONTROL_0 0xA1B5
+#define mmSPI_INPUT_Z 0xA1B6
+#define mmSPI_FOG_CNTL 0xA1B7
+#define mmSPI_FOG_FUNC_SCALE 0xA1B8
+#define mmSPI_FOG_FUNC_BIAS 0xA1B9
+
+#define mmSX_MISC 0xA0D4
+#define mmSX_ALPHA_TEST_CONTROL 0xA104
+#define mmSX_ALPHA_REF 0xA10E
+
+#define mmDB_DEPTH_BASE 0xA003
+#define mmDB_DEPTH_INFO 0xA004
+#define mmDB_HTILE_DATA_BASE 0xA005
+#define mmDB_DEPTH_SIZE 0xA000
+#define mmDB_DEPTH_VIEW 0xA001
+#define mmDB_RENDER_CONTROL 0xA343
+#define mmDB_RENDER_OVERRIDE 0xA344
+#define mmDB_SHADER_CONTROL 0xA203
+#define mmDB_STENCIL_CLEAR 0xA00A
+#define mmDB_DEPTH_CLEAR 0xA00B
+#define mmDB_HTILE_SURFACE 0xA349
+#define mmDB_PRELOAD_CONTROL 0xA34C
+#define mmDB_PREFETCH_LIMIT 0xA34D
+#define mmDB_STENCILREFMASK 0xA10C
+#define mmDB_STENCILREFMASK_BF 0xA10D
+#define mmDB_SRESULTS_COMPARE_STATE0 0xA34A
+#define mmDB_SRESULTS_COMPARE_STATE1 0xA34B
+#define mmDB_DEPTH_CONTROL 0xA200
+#define mmDB_ALPHA_TO_MASK 0xA351
+
+#define mmCB_CLEAR_RED_R6XX 0xA048
+#define mmCB_CLEAR_GREEN_R6XX 0xA049
+#define mmCB_CLEAR_BLUE_R6XX 0xA04A
+#define mmCB_CLEAR_ALPHA_R6XX 0xA04B
+#define mmCB_BLEND_RED 0xA105
+#define mmCB_BLEND_GREEN 0xA106
+#define mmCB_BLEND_BLUE 0xA107
+#define mmCB_BLEND_ALPHA 0xA108
+#define mmCB_FOG_RED_R6XX 0xA109
+#define mmCB_FOG_GREEN_R6XX 0xA10A
+#define mmCB_FOG_BLUE_R6XX 0xA10B
+#define mmCB_BLEND_CONTROL 0xA201
+#define mmCB_COLOR_CONTROL 0xA202
+#define mmCB_BLEND0_CONTROL 0xA1E0
+#define mmCB_BLEND1_CONTROL 0xA1E1
+#define mmCB_BLEND2_CONTROL 0xA1E2
+#define mmCB_BLEND3_CONTROL 0xA1E3
+#define mmCB_BLEND4_CONTROL 0xA1E4
+#define mmCB_BLEND5_CONTROL 0xA1E5
+#define mmCB_BLEND6_CONTROL 0xA1E6
+#define mmCB_BLEND7_CONTROL 0xA1E7
+#define mmCB_CLRCMP_CONTROL 0xA30C
+#define mmCB_CLRCMP_SRC 0xA30D
+#define mmCB_CLRCMP_DST 0xA30E
+#define mmCB_CLRCMP_MSK 0xA30F
+#define mmCB_COLOR0_BASE 0xA010
+#define mmCB_COLOR1_BASE 0xA011
+#define mmCB_COLOR2_BASE 0xA012
+#define mmCB_COLOR3_BASE 0xA013
+#define mmCB_COLOR4_BASE 0xA014
+#define mmCB_COLOR5_BASE 0xA015
+#define mmCB_COLOR6_BASE 0xA016
+#define mmCB_COLOR7_BASE 0xA017
+#define mmCB_COLOR0_SIZE 0xA018
+#define mmCB_COLOR1_SIZE 0xA019
+#define mmCB_COLOR2_SIZE 0xA01A
+#define mmCB_COLOR3_SIZE 0xA01B
+#define mmCB_COLOR4_SIZE 0xA01C
+#define mmCB_COLOR5_SIZE 0xA01D
+#define mmCB_COLOR6_SIZE 0xA01E
+#define mmCB_COLOR7_SIZE 0xA01F
+#define mmCB_COLOR0_VIEW 0xA020
+#define mmCB_COLOR1_VIEW 0xA021
+#define mmCB_COLOR2_VIEW 0xA022
+#define mmCB_COLOR3_VIEW 0xA023
+#define mmCB_COLOR4_VIEW 0xA024
+#define mmCB_COLOR5_VIEW 0xA025
+#define mmCB_COLOR6_VIEW 0xA026
+#define mmCB_COLOR7_VIEW 0xA027
+#define mmCB_COLOR0_INFO 0xA028
+#define mmCB_COLOR1_INFO 0xA029
+#define mmCB_COLOR2_INFO 0xA02A
+#define mmCB_COLOR3_INFO 0xA02B
+#define mmCB_COLOR4_INFO 0xA02C
+#define mmCB_COLOR5_INFO 0xA02D
+#define mmCB_COLOR6_INFO 0xA02E
+#define mmCB_COLOR7_INFO 0xA02F
+#define mmCB_COLOR0_TILE 0xA030
+#define mmCB_COLOR1_TILE 0xA031
+#define mmCB_COLOR2_TILE 0xA032
+#define mmCB_COLOR3_TILE 0xA033
+#define mmCB_COLOR4_TILE 0xA034
+#define mmCB_COLOR5_TILE 0xA035
+#define mmCB_COLOR6_TILE 0xA036
+#define mmCB_COLOR7_TILE 0xA037
+#define mmCB_COLOR0_FRAG 0xA038
+#define mmCB_COLOR1_FRAG 0xA039
+#define mmCB_COLOR2_FRAG 0xA03A
+#define mmCB_COLOR3_FRAG 0xA03B
+#define mmCB_COLOR4_FRAG 0xA03C
+#define mmCB_COLOR5_FRAG 0xA03D
+#define mmCB_COLOR6_FRAG 0xA03E
+#define mmCB_COLOR7_FRAG 0xA03F
+#define mmCB_COLOR0_MASK 0xA040
+#define mmCB_COLOR1_MASK 0xA041
+#define mmCB_COLOR2_MASK 0xA042
+#define mmCB_COLOR3_MASK 0xA043
+#define mmCB_COLOR4_MASK 0xA044
+#define mmCB_COLOR5_MASK 0xA045
+#define mmCB_COLOR6_MASK 0xA046
+#define mmCB_COLOR7_MASK 0xA047
+#define mmCB_CLEAR_RED_R6XX 0xA048
+#define mmCB_CLEAR_GREEN_R6XX 0xA049
+#define mmCB_CLEAR_BLUE_R6XX 0xA04A
+#define mmCB_CLEAR_ALPHA_R6XX 0xA04B
+#define mmCB_TARGET_MASK 0xA08E
+#define mmCB_SHADER_MASK 0xA08F
+#define mmCB_SHADER_CONTROL 0xA1E8
+
+#define mmSQ_VTX_BASE_VTX_LOC 0xF3FC
+#define mmSQ_VTX_START_INST_LOC 0xF3FD
+
+#endif /* _R700_CHIPOFFSET_H_ */
+
diff --git a/r600/r700_clear.c b/r600/r700_clear.c
new file mode 100644
index 0000000..c6546ab
--- /dev/null
+++ b/r600/r700_clear.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "swrast/swrast.h"
+
+#include "radeon_lock.h"
+#include "r600_context.h"
+
+#include "r700_shaderinst.h"
+#include "r600_emit.h"
+#include "r700_clear.h"
+
+static GLboolean r700ClearFast(context_t *context, GLbitfield mask)
+{
+ /* TODO, fast clear need implementation */
+ return GL_FALSE;
+}
+
+void r700Clear(GLcontext * ctx, GLbitfield mask)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon);
+ const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask);
+ GLbitfield swrast_mask = 0, tri_mask = 0;
+ int i;
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x\n", __func__, mask);
+
+ if( GL_TRUE == r700ClearFast(context, mask) )
+ {
+ return;
+ }
+ if (!context->radeon.radeonScreen->driScreen->dri2.enabled) {
+ LOCK_HARDWARE(&context->radeon);
+ UNLOCK_HARDWARE(&context->radeon);
+ if (dPriv->numClipRects == 0)
+ return;
+ }
+
+ R600_NEWPRIM(context);
+
+ if (colorMask == ~0)
+ tri_mask |= (mask & BUFFER_BITS_COLOR);
+ else
+ tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT));
+
+
+ /* HW stencil */
+ if (mask & BUFFER_BIT_STENCIL) {
+ tri_mask |= BUFFER_BIT_STENCIL;
+ }
+
+ /* HW depth */
+ if (mask & BUFFER_BIT_DEPTH) {
+ tri_mask |= BUFFER_BIT_DEPTH;
+ }
+
+ /* If we're doing a tri pass for depth/stencil, include a likely color
+ * buffer with it.
+ */
+
+ for (i = 0; i < BUFFER_COUNT; i++) {
+ GLuint bufBit = 1 << i;
+ if ((tri_mask) & bufBit) {
+ if (!fb->Attachment[i].Renderbuffer->ClassID) {
+ tri_mask &= ~bufBit;
+ swrast_mask |= bufBit;
+ }
+ }
+ }
+
+ /* SW fallback clearing */
+ swrast_mask = mask & ~tri_mask;
+
+ if (tri_mask) {
+ radeonUserClear(ctx, tri_mask);
+ }
+
+ if (swrast_mask) {
+ radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, "%s: swrast clear, mask: %x\n",
+ __FUNCTION__, swrast_mask);
+ _swrast_Clear(ctx, swrast_mask);
+ }
+
+}
+
+
diff --git a/r600/r700_clear.h b/r600/r700_clear.h
new file mode 100644
index 0000000..bed1d3a
--- /dev/null
+++ b/r600/r700_clear.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef __r700_CLEAR_H__
+#define __r700_CLEAR_H__
+
+extern void r700Clear(GLcontext * ctx, GLbitfield mask);
+
+#endif /* __r700_CLEAR_H__ */
diff --git a/r600/r700_debug.c b/r600/r700_debug.c
new file mode 100644
index 0000000..cd1ba9e
--- /dev/null
+++ b/r600/r700_debug.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include "r700_debug.h"
+#include "radeon_debug.h"
+
+void DumpHwBinary(int type, void *addr, int size)
+{
+ int i;
+ unsigned int *pHw = (unsigned int *)addr;
+
+ return;
+
+ switch (type)
+ {
+ case DUMP_PIXEL_SHADER:
+ radeon_print(RADEON_SHADER, RADEON_TRACE, "Pixel Shader\n");
+ break;
+ case DUMP_VERTEX_SHADER:
+ radeon_print(RADEON_SHADER, RADEON_TRACE, "Vertex Shader\n");
+ break;
+ case DUMP_FETCH_SHADER:
+ radeon_print(RADEON_SHADER, RADEON_TRACE, "Fetch Shader\n");
+ break;
+ }
+
+ for (i = 0; i < size; i++)
+ {
+ radeon_print(RADEON_SHADER, RADEON_TRACE, "0x%08x,\t", *pHw);
+ if (i%4 == 3)
+ radeon_print(RADEON_SHADER, RADEON_TRACE, "0x%08x\n", *pHw);
+ pHw++;
+
+ }
+}
+
diff --git a/r600/r700_debug.h b/r600/r700_debug.h
new file mode 100644
index 0000000..c0921bf
--- /dev/null
+++ b/r600/r700_debug.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef _R700_DEBUG_H_
+#define _R700_DEBUG_H_
+enum R700_DUMP_TYPE
+{
+ DUMP_VERTEX_SHADER = 0x1,
+ DUMP_PIXEL_SHADER = 0x2,
+ DUMP_FETCH_SHADER = 0x4,
+};
+
+extern void DumpHwBinary(int, void *, int);
+
+#endif /*_R700_DEBUG_H_*/
diff --git a/r600/r700_driconf.h b/r600/r700_driconf.h
new file mode 100644
index 0000000..a9e2152
--- /dev/null
+++ b/r600/r700_driconf.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_DRICONF_H_
+#define _R700_DRICONF_H_
+
+#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
+#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
+
+#endif /* _R700_DRICONF_H_ */
diff --git a/r600/r700_fragprog.c b/r600/r700_fragprog.c
new file mode 100644
index 0000000..78ce3ae
--- /dev/null
+++ b/r600/r700_fragprog.c
@@ -0,0 +1,476 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/imports.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+
+#include "r700_fragprog.h"
+
+#include "r700_debug.h"
+
+//TODO : Validate FP input with VP output.
+void Map_Fragment_Program(r700_AssemblerBase *pAsm,
+ struct gl_fragment_program *mesa_fp)
+{
+ unsigned int unBit;
+ unsigned int i;
+ GLuint ui;
+
+ pAsm->number_used_registers = 0;
+
+//Input mapping : mesa_fp->Base.InputsRead set the flag, set in
+ //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
+ //MUST match order in Map_Vertex_Output
+ unBit = 1 << FRAG_ATTRIB_WPOS;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++;
+ }
+
+ unBit = 1 << FRAG_ATTRIB_COL0;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
+ }
+
+ unBit = 1 << FRAG_ATTRIB_COL1;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
+ }
+
+ unBit = 1 << FRAG_ATTRIB_FOGC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
+ }
+
+ for(i=0; i<8; i++)
+ {
+ unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
+ }
+ }
+
+/* Map temporary registers (GPRs) */
+ pAsm->starting_temp_register_number = pAsm->number_used_registers;
+
+ if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
+ {
+ pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
+ }
+ else
+ {
+ pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
+ }
+
+/* Output mapping */
+ pAsm->number_of_exports = 0;
+ pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
+ pAsm->starting_export_register_number = pAsm->number_used_registers;
+ unBit = 1 << FRAG_RESULT_COLOR;
+ if(mesa_fp->Base.OutputsWritten & unBit)
+ {
+ pAsm->uiFP_OutputMap[FRAG_RESULT_COLOR] = pAsm->number_used_registers++;
+ pAsm->number_of_exports++;
+ pAsm->number_of_colorandz_exports++;
+ }
+ unBit = 1 << FRAG_RESULT_DEPTH;
+ if(mesa_fp->Base.OutputsWritten & unBit)
+ {
+ pAsm->depth_export_register_number = pAsm->number_used_registers;
+ pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++;
+ pAsm->number_of_exports++;
+ pAsm->number_of_colorandz_exports++;
+ pAsm->pR700Shader->depthIsExported = 1;
+ }
+
+ pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
+ for(ui=0; ui<pAsm->number_of_exports; ui++)
+ {
+ pAsm->pucOutMask[ui] = 0x0;
+ }
+
+ pAsm->uFirstHelpReg = pAsm->number_used_registers;
+}
+
+GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
+ struct gl_fragment_program *mesa_fp)
+{
+ GLuint i, j;
+ GLint * puiTEMPwrites;
+ struct prog_instruction * pILInst;
+ InstDeps *pInstDeps;
+ struct prog_instruction * texcoord_DepInst;
+ GLint nDepInstID;
+
+ puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+ for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
+ {
+ puiTEMPwrites[i] = -1;
+ }
+
+ pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
+
+ for(i=0; i<mesa_fp->Base.NumInstructions; i++)
+ {
+ pInstDeps[i].nDstDep = -1;
+ pILInst = &(mesa_fp->Base.Instructions[i]);
+
+ //Dst
+ if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
+ {
+ //Set lastwrite for the temp
+ puiTEMPwrites[pILInst->DstReg.Index] = i;
+ }
+
+ //Src
+ for(j=0; j<3; j++)
+ {
+ if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
+ {
+ //Set dep.
+ pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
+ }
+ else
+ {
+ pInstDeps[i].nSrcDeps[j] = -1;
+ }
+ }
+ }
+
+ fp->r700AsmCode.pInstDeps = pInstDeps;
+
+ FREE(puiTEMPwrites);
+
+ //Find dep for tex inst
+ for(i=0; i<mesa_fp->Base.NumInstructions; i++)
+ {
+ pILInst = &(mesa_fp->Base.Instructions[i]);
+
+ if(GL_TRUE == IsTex(pILInst->Opcode))
+ { //src0 is the tex coord register, src1 is texunit, src2 is textype
+ nDepInstID = pInstDeps[i].nSrcDeps[0];
+ if(nDepInstID >= 0)
+ {
+ texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
+ if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
+ {
+ pInstDeps[nDepInstID].nDstDep = i;
+ pInstDeps[i].nDstDep = i;
+ }
+ else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
+ {
+ pInstDeps[i].nDstDep = i;
+ }
+ else
+ { //... other deps?
+ }
+ }
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
+ struct gl_fragment_program *mesa_fp)
+{
+ GLuint number_of_colors_exported;
+ GLboolean z_enabled = GL_FALSE;
+ GLuint unBit;
+
+ //Init_Program
+ Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
+ Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp);
+
+ if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions,
+ &(mesa_fp->Base.Instructions[0]),
+ &(fp->r700AsmCode)) )
+ {
+ return GL_FALSE;
+ }
+
+ if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
+ {
+ return GL_FALSE;
+ }
+
+ fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0
+ : (fp->r700AsmCode.number_used_registers - 1);
+
+ fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
+
+ number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
+
+ unBit = 1 << FRAG_RESULT_DEPTH;
+ if(mesa_fp->Base.OutputsWritten & unBit)
+ {
+ z_enabled = GL_TRUE;
+ number_of_colors_exported--;
+ }
+
+ fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
+
+ fp->translated = GL_TRUE;
+
+ return GL_TRUE;
+}
+
+void r700SelectFragmentShader(GLcontext *ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ struct r700_fragment_program *fp = (struct r700_fragment_program *)
+ (ctx->FragmentProgram._Current);
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+ {
+ fp->r700AsmCode.bR6xx = 1;
+ }
+
+ if (GL_FALSE == fp->translated)
+ r700TranslateFragmentShader(fp, &(fp->mesa_program));
+}
+
+void * r700GetActiveFpShaderBo(GLcontext * ctx)
+{
+ struct r700_fragment_program *fp = (struct r700_fragment_program *)
+ (ctx->FragmentProgram._Current);
+
+ return fp->shaderbo;
+}
+
+GLboolean r700SetupFragmentProgram(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ struct r700_fragment_program *fp = (struct r700_fragment_program *)
+ (ctx->FragmentProgram._Current);
+ r700_AssemblerBase *pAsm = &(fp->r700AsmCode);
+ struct gl_fragment_program *mesa_fp = &(fp->mesa_program);
+ struct gl_program_parameter_list *paramList;
+ unsigned int unNumParamData;
+ unsigned int ui, i;
+ unsigned int unNumOfReg;
+ unsigned int unBit;
+ GLuint exportCount;
+
+ if(GL_FALSE == fp->loaded)
+ {
+ if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
+ {
+ Assemble( &(fp->r700Shader) );
+ }
+
+ /* Load fp to gpu */
+ r600EmitShader(ctx,
+ &(fp->shaderbo),
+ (GLvoid *)(fp->r700Shader.pProgram),
+ fp->r700Shader.uShaderBinaryDWORDSize,
+ "FS");
+
+ fp->loaded = GL_TRUE;
+ }
+
+ DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram),
+ fp->r700Shader.uShaderBinaryDWORDSize);
+
+ /* TODO : enable this after MemUse fixed *=
+ (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
+ */
+
+ R600_STATECHANGE(context, ps);
+
+ r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0;
+ SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
+
+ r700->ps.SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */
+
+ R600_STATECHANGE(context, spi);
+
+ unNumOfReg = fp->r700Shader.nRegs + 1;
+
+ ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
+
+ /* PS uses fragment.position */
+ if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
+ {
+ ui += 1;
+ SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+ SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask);
+ SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
+ SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
+ }
+
+ ui = (unNumOfReg < ui) ? ui : unNumOfReg;
+
+ SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
+
+ CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
+
+ if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
+ {
+ SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
+ STACK_SIZE_shift, STACK_SIZE_mask);
+ }
+
+ SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
+ EXPORT_MODE_shift, EXPORT_MODE_mask);
+
+ R600_STATECHANGE(context, db);
+
+ if(fp->r700Shader.killIsUsed)
+ {
+ SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+ }
+
+ if(fp->r700Shader.depthIsExported)
+ {
+ SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
+ }
+
+ // emit ps input map
+ unBit = 1 << FRAG_ATTRIB_WPOS;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+
+ unBit = 1 << FRAG_ATTRIB_COL0;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+
+ unBit = 1 << FRAG_ATTRIB_COL1;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+
+ unBit = 1 << FRAG_ATTRIB_FOGC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+
+ for(i=0; i<8; i++)
+ {
+ unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+ }
+
+ R600_STATECHANGE(context, cb);
+ exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
+ r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
+
+ /* sent out shader constants. */
+ paramList = fp->mesa_program.Base.Parameters;
+
+ if(NULL != paramList) {
+ _mesa_load_state_parameters(ctx, paramList);
+
+ if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
+ return GL_FALSE;
+
+ R600_STATECHANGE(context, ps_consts);
+
+ r700->ps.num_consts = paramList->NumParameters;
+
+ unNumParamData = paramList->NumParameters;
+
+ for(ui=0; ui<unNumParamData; ui++) {
+ r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
+ r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
+ r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
+ r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ }
+ } else
+ r700->ps.num_consts = 0;
+
+ return GL_TRUE;
+}
+
diff --git a/r600/r700_fragprog.h b/r600/r700_fragprog.h
new file mode 100644
index 0000000..cbb108d
--- /dev/null
+++ b/r600/r700_fragprog.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_FRAGPROG_H_
+#define _R700_FRAGPROG_H_
+
+#include "r600_context.h"
+#include "r700_assembler.h"
+
+struct r700_fragment_program
+{
+ struct gl_fragment_program mesa_program;
+
+ r700_AssemblerBase r700AsmCode;
+ R700_Shader r700Shader;
+
+ GLboolean translated;
+ GLboolean loaded;
+ GLboolean error;
+
+ void * shaderbo;
+
+ GLboolean WritesDepth;
+ GLuint optimization;
+};
+
+/* Internal */
+void Map_Fragment_Program(r700_AssemblerBase *pAsm,
+ struct gl_fragment_program *mesa_fp);
+GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
+ struct gl_fragment_program *mesa_fp);
+
+GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
+ struct gl_fragment_program *mesa_vp);
+
+/* Interface */
+extern void r700SelectFragmentShader(GLcontext *ctx);
+
+extern GLboolean r700SetupFragmentProgram(GLcontext * ctx);
+
+extern void * r700GetActiveFpShaderBo(GLcontext * ctx);
+
+#endif /*_R700_FRAGPROG_H_*/
diff --git a/r600/r700_ioctl.c b/r600/r700_ioctl.c
new file mode 100644
index 0000000..72a8978
--- /dev/null
+++ b/r600/r700_ioctl.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <sched.h>
+#include <errno.h>
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "swrast/swrast.h"
+
+#include "radeon_common.h"
+#include "radeon_lock.h"
+#include "r600_context.h"
+
+#include "r700_ioctl.h"
+#include "r700_clear.h"
+
+
+void r700InitIoctlFuncs(struct dd_function_table *functions)
+{
+ functions->Clear = r700Clear;
+ functions->Finish = radeonFinish;
+ functions->Flush = radeonFlush;
+}
diff --git a/r600/r700_ioctl.h b/r600/r700_ioctl.h
new file mode 100644
index 0000000..414dc3e
--- /dev/null
+++ b/r600/r700_ioctl.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef __R700_IOCTL_H__
+#define __R700_IOCTL_H__
+
+#include "r600_context.h"
+#include "radeon_drm.h"
+
+extern void r700InitIoctlFuncs(struct dd_function_table *functions);
+
+#endif /* __R700_IOCTL_H__ */
diff --git a/r600/r700_oglprog.c b/r600/r700_oglprog.c
new file mode 100644
index 0000000..5290ef3
--- /dev/null
+++ b/r600/r700_oglprog.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <string.h>
+
+#include "main/glheader.h"
+#include "main/imports.h"
+
+#include "shader/program.h"
+#include "tnl/tnl.h"
+
+#include "r600_context.h"
+#include "r600_emit.h"
+
+#include "r700_oglprog.h"
+#include "r700_fragprog.h"
+#include "r700_vertprog.h"
+
+
+static struct gl_program *r700NewProgram(GLcontext * ctx,
+ GLenum target,
+ GLuint id)
+{
+ struct gl_program *pProgram = NULL;
+
+ struct r700_vertex_program_cont *vpc;
+ struct r700_fragment_program *fp;
+
+ radeon_print(RADEON_SHADER, RADEON_VERBOSE,
+ "%s %u, %u\n", __func__, target, id);
+
+ switch (target)
+ {
+ case GL_VERTEX_STATE_PROGRAM_NV:
+ case GL_VERTEX_PROGRAM_ARB:
+ vpc = CALLOC_STRUCT(r700_vertex_program_cont);
+ pProgram = _mesa_init_vertex_program(ctx,
+ &vpc->mesa_program,
+ target,
+ id);
+ break;
+ case GL_FRAGMENT_PROGRAM_NV:
+ case GL_FRAGMENT_PROGRAM_ARB:
+ fp = CALLOC_STRUCT(r700_fragment_program);
+ pProgram = _mesa_init_fragment_program(ctx,
+ &fp->mesa_program,
+ target,
+ id);
+ fp->translated = GL_FALSE;
+ fp->loaded = GL_FALSE;
+
+ fp->shaderbo = NULL;
+
+ break;
+ default:
+ _mesa_problem(ctx, "Bad target in r700NewProgram");
+ }
+
+ return pProgram;
+}
+
+static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
+{
+ struct r700_vertex_program_cont * vpc;
+ struct r700_vertex_program *vp, *tmp;
+ struct r700_fragment_program * fp;
+
+ radeon_print(RADEON_SHADER, RADEON_VERBOSE,
+ "%s %p\n", __func__, prog);
+
+ switch (prog->Target)
+ {
+ case GL_VERTEX_STATE_PROGRAM_NV:
+ case GL_VERTEX_PROGRAM_ARB:
+ vpc = (struct r700_vertex_program_cont*)prog;
+ vp = vpc->progs;
+ while (vp) {
+ tmp = vp->next;
+ /* Release DMA region */
+
+ r600DeleteShader(ctx, vp->shaderbo);
+
+ /* Clean up */
+ Clean_Up_Assembler(&(vp->r700AsmCode));
+ Clean_Up_Shader(&(vp->r700Shader));
+ _mesa_free(vp);
+ vp = tmp;
+ }
+ break;
+ case GL_FRAGMENT_PROGRAM_NV:
+ case GL_FRAGMENT_PROGRAM_ARB:
+ fp = (struct r700_fragment_program*)prog;
+ /* Release DMA region */
+
+ r600DeleteShader(ctx, fp->shaderbo);
+
+ /* Clean up */
+ Clean_Up_Assembler(&(fp->r700AsmCode));
+ Clean_Up_Shader(&(fp->r700Shader));
+ break;
+ default:
+ _mesa_problem(ctx, "Bad target in r700NewProgram");
+ }
+
+ _mesa_delete_program(ctx, prog);
+}
+
+static void
+r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+
+}
+
+static GLboolean r700IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+
+ return GL_TRUE;
+}
+
+void r700InitShaderFuncs(struct dd_function_table *functions)
+{
+ functions->NewProgram = r700NewProgram;
+ functions->DeleteProgram = r700DeleteProgram;
+ functions->ProgramStringNotify = r700ProgramStringNotify;
+ functions->IsProgramNative = r700IsProgramNative;
+}
diff --git a/r600/r700_oglprog.h b/r600/r700_oglprog.h
new file mode 100644
index 0000000..fe2e9d1
--- /dev/null
+++ b/r600/r700_oglprog.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef _R700_OGLPROG_H_
+#define _R700_OGLPROG_H_
+#include "r600_context.h"
+
+extern void r700InitShaderFuncs(struct dd_function_table *functions);
+
+#endif /*_R700_OGLPROG_H_*/
diff --git a/r600/r700_render.c b/r600/r700_render.c
new file mode 100644
index 0000000..b1c3648
--- /dev/null
+++ b/r600/r700_render.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/dd.h"
+#include "main/simple_list.h"
+#include "main/api_arrayelt.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_vp_build.h"
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+#include "tnl/t_pipeline.h"
+
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+
+#include "r600_tex.h"
+
+#include "r700_vertprog.h"
+#include "r700_fragprog.h"
+#include "r700_state.h"
+
+#include "radeon_common_context.h"
+
+void r700WaitForIdle(context_t *context);
+void r700WaitForIdleClean(context_t *context);
+GLboolean r700SendTextureState(context_t *context);
+static unsigned int r700PrimitiveType(int prim);
+void r600UpdateTextureState(GLcontext * ctx);
+GLboolean r700SyncSurf(context_t *context,
+ struct radeon_bo *pbo,
+ uint32_t read_domain,
+ uint32_t write_domain,
+ uint32_t sync_type);
+
+void r700WaitForIdle(context_t *context)
+{
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+ R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
+ R600_OUT_BATCH(WAIT_3D_IDLE_bit);
+
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+void r700WaitForIdleClean(context_t *context)
+{
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
+ BEGIN_BATCH_NO_AUTOSTATE(5);
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
+ R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+ R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
+ R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
+
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+void r700Start3D(context_t *context)
+{
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+ {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
+ R600_OUT_BATCH(0);
+ END_BATCH();
+ }
+
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
+ R600_OUT_BATCH(0x80000000);
+ R600_OUT_BATCH(0x80000000);
+ END_BATCH();
+
+ COMMIT_BATCH();
+
+ r700WaitForIdleClean(context);
+}
+
+GLboolean r700SyncSurf(context_t *context,
+ struct radeon_bo *pbo,
+ uint32_t read_domain,
+ uint32_t write_domain,
+ uint32_t sync_type)
+{
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
+ uint32_t cp_coher_size;
+
+ if (!pbo)
+ return GL_FALSE;
+
+ if (pbo->size == 0xffffffff)
+ cp_coher_size = 0xffffffff;
+ else
+ cp_coher_size = ((pbo->size + 255) >> 8);
+
+ BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
+ R600_OUT_BATCH(sync_type);
+ R600_OUT_BATCH(cp_coher_size);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(10);
+ R600_OUT_BATCH_RELOC(0,
+ pbo,
+ 0,
+ read_domain, write_domain, 0);
+ END_BATCH();
+ COMMIT_BATCH();
+
+ return GL_TRUE;
+}
+
+static unsigned int r700PrimitiveType(int prim)
+{
+ switch (prim & PRIM_MODE_MASK)
+ {
+ case GL_POINTS:
+ return DI_PT_POINTLIST;
+ break;
+ case GL_LINES:
+ return DI_PT_LINELIST;
+ break;
+ case GL_LINE_STRIP:
+ return DI_PT_LINESTRIP;
+ break;
+ case GL_LINE_LOOP:
+ return DI_PT_LINELOOP;
+ break;
+ case GL_TRIANGLES:
+ return DI_PT_TRILIST;
+ break;
+ case GL_TRIANGLE_STRIP:
+ return DI_PT_TRISTRIP;
+ break;
+ case GL_TRIANGLE_FAN:
+ return DI_PT_TRIFAN;
+ break;
+ case GL_QUADS:
+ return DI_PT_QUADLIST;
+ break;
+ case GL_QUAD_STRIP:
+ return DI_PT_QUADSTRIP;
+ break;
+ case GL_POLYGON:
+ return DI_PT_POLYGON;
+ break;
+ default:
+ assert(0);
+ return -1;
+ break;
+ }
+}
+
+static int r700NumVerts(int num_verts, int prim)
+{
+ int verts_off = 0;
+
+ switch (prim & PRIM_MODE_MASK) {
+ case GL_POINTS:
+ verts_off = 0;
+ break;
+ case GL_LINES:
+ verts_off = num_verts % 2;
+ break;
+ case GL_LINE_STRIP:
+ if (num_verts < 2)
+ verts_off = num_verts;
+ break;
+ case GL_LINE_LOOP:
+ if (num_verts < 2)
+ verts_off = num_verts;
+ break;
+ case GL_TRIANGLES:
+ verts_off = num_verts % 3;
+ break;
+ case GL_TRIANGLE_STRIP:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ case GL_TRIANGLE_FAN:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ case GL_QUADS:
+ verts_off = num_verts % 4;
+ break;
+ case GL_QUAD_STRIP:
+ if (num_verts < 4)
+ verts_off = num_verts;
+ else
+ verts_off = num_verts % 2;
+ break;
+ case GL_POLYGON:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ default:
+ assert(0);
+ return -1;
+ break;
+ }
+
+ return num_verts - verts_off;
+}
+
+static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ BATCH_LOCALS(&context->radeon);
+ int type, i, total_emit;
+ int num_indices;
+ uint32_t vgt_draw_initiator = 0;
+ uint32_t vgt_index_type = 0;
+ uint32_t vgt_primitive_type = 0;
+ uint32_t vgt_num_indices = 0;
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *vb = &tnl->vb;
+
+ type = r700PrimitiveType(prim);
+ num_indices = r700NumVerts(end - start, prim);
+
+ radeon_print(RADEON_RENDER, RADEON_TRACE,
+ "%s type %x num_indices %d\n",
+ __func__, type, num_indices);
+
+ if (type < 0 || num_indices <= 0)
+ return;
+
+ total_emit = 3 /* VGT_PRIMITIVE_TYPE */
+ + 2 /* VGT_INDEX_TYPE */
+ + 2 /* NUM_INSTANCES */
+ + num_indices + 3; /* DRAW_INDEX_IMMD */
+
+ BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+ // prim
+ SETfield(vgt_primitive_type, type,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+ R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
+ R600_OUT_BATCH(vgt_primitive_type);
+
+ // index type
+ SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ R600_OUT_BATCH(vgt_index_type);
+
+ // num instances
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ R600_OUT_BATCH(1);
+
+ // draw packet
+ vgt_num_indices = num_indices;
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+
+ for (i = start; i < (start + num_indices); i++) {
+ if(vb->Elts)
+ R600_OUT_BATCH(vb->Elts[i]);
+ else
+ R600_OUT_BATCH(i);
+ }
+ END_BATCH();
+ COMMIT_BATCH();
+
+}
+
+/* start 3d, idle, cb/db flush */
+#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
+
+static GLuint r700PredictRenderSize(GLcontext* ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct r700_vertex_program *vp = context->selected_vp;
+ struct vertex_buffer *vb = &tnl->vb;
+ GLboolean flushed;
+ GLuint dwords, i;
+ GLuint state_size;
+ /* pre calculate aos count so state prediction works */
+ context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead);
+
+ dwords = PRE_EMIT_STATE_BUFSZ;
+ for (i = 0; i < vb->PrimitiveCount; i++)
+ dwords += vb->Primitive[i].count + 10;
+ state_size = radeonCountStateEmitSize(&context->radeon);
+ flushed = rcommonEnsureCmdBufSpace(&context->radeon,
+ dwords + state_size, __FUNCTION__);
+
+ if (flushed)
+ dwords += radeonCountStateEmitSize(&context->radeon);
+ else
+ dwords += state_size;
+
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE,
+ "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
+ return dwords;
+}
+
+static GLboolean r700RunRender(GLcontext * ctx,
+ struct tnl_pipeline_stage *stage)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
+ unsigned int i, id = 0;
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *vb = &tnl->vb;
+ struct radeon_renderbuffer *rrb;
+
+ radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n",
+ __func__, context->radeon.cmdbuf.cs->cdw);
+
+ /* always emit CB base to prevent
+ * lock ups on some chips.
+ */
+ R600_STATECHANGE(context, cb_target);
+ /* mark vtx as dirty since it changes per-draw */
+ R600_STATECHANGE(context, vtx);
+
+ r700SetScissor(context);
+ r700SetupVertexProgram(ctx);
+ r700SetupFragmentProgram(ctx);
+ r600UpdateTextureState(ctx);
+
+ GLuint emit_end = r700PredictRenderSize(ctx)
+ + context->radeon.cmdbuf.cs->cdw;
+ r700SetupStreams(ctx);
+
+ radeonEmitState(radeon);
+
+ radeon_debug_add_indent();
+ /* richard test code */
+ for (i = 0; i < vb->PrimitiveCount; i++) {
+ GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
+ GLuint start = vb->Primitive[i].start;
+ GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
+ r700RunRenderPrimitive(ctx, start, end, prim);
+ }
+ radeon_debug_remove_indent();
+
+ /* Flush render op cached for last several quads. */
+ r700WaitForIdleClean(context);
+
+ rrb = radeon_get_colorbuffer(&context->radeon);
+ if (rrb && rrb->bo)
+ r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+ CB_ACTION_ENA_bit | (1 << (id + 6)));
+
+ rrb = radeon_get_depthbuffer(&context->radeon);
+ if (rrb && rrb->bo)
+ r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+ DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
+
+ radeonReleaseArrays(ctx, ~0);
+
+ radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n",
+ __func__, context->radeon.cmdbuf.cs->cdw);
+
+ if ( emit_end < context->radeon.cmdbuf.cs->cdw )
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
+
+ return GL_FALSE;
+}
+
+static GLboolean r700RunNonTCLRender(GLcontext * ctx,
+ struct tnl_pipeline_stage *stage) /* -------------------- */
+{
+ GLboolean bRet = GL_TRUE;
+
+ return bRet;
+}
+
+static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
+ struct tnl_pipeline_stage *stage)
+{
+ GLboolean bRet = GL_FALSE;
+
+ /* TODO : sw fallback */
+
+ /* Need shader bo's setup before bo check */
+ r700UpdateShaders(ctx);
+ /**
+
+ * Ensure all enabled and complete textures are uploaded along with any buffers being used.
+ */
+ if(!r600ValidateBuffers(ctx))
+ {
+ return GL_TRUE;
+ }
+
+ bRet = r700RunRender(ctx, stage);
+
+ return bRet;
+ //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
+ //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
+}
+
+const struct tnl_pipeline_stage _r700_render_stage = {
+ "r700 Hardware Rasterization",
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ r700RunNonTCLRender
+};
+
+const struct tnl_pipeline_stage _r700_tcl_stage = {
+ "r700 Hardware Transform, Clipping and Lighting",
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ r700RunTCLRender
+};
+
+const struct tnl_pipeline_stage *r700_pipeline[] =
+{
+ &_r700_tcl_stage,
+ &_tnl_vertex_transform_stage,
+ &_tnl_normal_transform_stage,
+ &_tnl_lighting_stage,
+ &_tnl_fog_coordinate_stage,
+ &_tnl_texgen_stage,
+ &_tnl_texture_transform_stage,
+ &_tnl_vertex_program_stage,
+
+ &_r700_render_stage,
+ &_tnl_render_stage,
+ 0,
+};
+
+
diff --git a/r600/r700_shader.c b/r600/r700_shader.c
new file mode 100644
index 0000000..b4fd51c
--- /dev/null
+++ b/r600/r700_shader.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/imports.h"
+
+#include "main/glheader.h"
+
+#include "r600_context.h"
+#include "r700_debug.h"
+
+#include "r700_shader.h"
+
+void r700ShaderInit(GLcontext * ctx)
+{
+}
+
+void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst)
+{
+ if(NULL == plstCFInstructions->pTail)
+ { //first
+ plstCFInstructions->pHead = pInst;
+ plstCFInstructions->pTail = pInst;
+ }
+ else
+ {
+ plstCFInstructions->pTail->pNextInst = pInst;
+ plstCFInstructions->pTail = pInst;
+ }
+ pInst->pNextInst = NULL;
+
+ plstCFInstructions->uNumOfNode++;
+}
+
+void Init_R700_Shader(R700_Shader * pShader)
+{
+ pShader->Type = R700_SHADER_INVALID;
+ pShader->pProgram = NULL;
+ pShader->bBinaryShader = GL_FALSE;
+ pShader->bFetchShaderRequired = GL_FALSE;
+ pShader->bNeedsAssembly = GL_FALSE;
+ pShader->bLinksDirty = GL_TRUE;
+ pShader->uShaderBinaryDWORDSize = 0;
+ pShader->nRegs = 0;
+ pShader->nParamExports = 0;
+ pShader->nMemExports = 0;
+ pShader->resource = 0;
+
+ pShader->exportMode = 0;
+ pShader->depthIsImported = GL_FALSE;
+
+ pShader->positionVectorIsExported = GL_FALSE;
+ pShader->miscVectorIsExported = GL_FALSE;
+ pShader->renderTargetArrayIndexIsExported = GL_FALSE;
+ pShader->ccDist0VectorIsExported = GL_FALSE;
+ pShader->ccDist1VectorIsExported = GL_FALSE;
+
+
+ pShader->depthIsExported = GL_FALSE;
+ pShader->stencilRefIsExported = GL_FALSE;
+ pShader->coverageToMaskIsExported = GL_FALSE;
+ pShader->maskIsExported = GL_FALSE;
+ pShader->killIsUsed = GL_FALSE;
+
+ pShader->uCFOffset = 0;
+ pShader->uStackSize = 0;
+ pShader->uMaxCallDepth = 0;
+
+ pShader->bSurfAllocated = GL_FALSE;
+
+ pShader->lstCFInstructions.pHead=NULL;
+ pShader->lstCFInstructions.pTail=NULL;
+ pShader->lstCFInstructions.uNumOfNode=0;
+ pShader->lstALUInstructions.pHead=NULL;
+ pShader->lstALUInstructions.pTail=NULL;
+ pShader->lstALUInstructions.uNumOfNode=0;
+ pShader->lstTEXInstructions.pHead=NULL;
+ pShader->lstTEXInstructions.pTail=NULL;
+ pShader->lstTEXInstructions.uNumOfNode=0;
+ pShader->lstVTXInstructions.pHead=NULL;
+ pShader->lstVTXInstructions.pTail=NULL;
+ pShader->lstVTXInstructions.uNumOfNode=0;
+}
+
+void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst)
+{
+ R700ControlFlowSXClause* pSXClause;
+ R700ControlFlowSMXClause* pSMXClause;
+
+ pCFInst->m_uIndex = pShader->lstCFInstructions.uNumOfNode;
+ AddInstToList(&(pShader->lstCFInstructions),
+ (R700ShaderInstruction*)pCFInst);
+ pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType);
+
+ pSXClause = NULL;
+ pSMXClause = NULL;
+ switch (pCFInst->m_ShaderInstType)
+ {
+ case SIT_CF_ALL_EXP_SX:
+ pSXClause = (R700ControlFlowSXClause*)pCFInst;
+ break;
+ case SIT_CF_ALL_EXP_SMX:
+ pSMXClause = (R700ControlFlowSMXClause*)pCFInst;
+ break;
+ default:
+ break;
+ };
+
+ if((pSXClause != NULL) && (pSXClause->m_Word0.f.type == SQ_EXPORT_PARAM))
+ {
+ pShader->nParamExports += pSXClause->m_Word1.f.burst_count + 1;
+ }
+ else if ((pSMXClause != NULL) && (pSMXClause->m_Word1.f.cf_inst == SQ_CF_INST_MEM_RING) &&
+ (pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE || pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE_IND))
+ {
+ pShader->nMemExports += pSMXClause->m_Word1.f.burst_count + 1;
+ }
+
+ pShader->bLinksDirty = GL_TRUE;
+ pShader->bNeedsAssembly = GL_TRUE;
+
+ pCFInst->useCount++;
+}
+
+void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst)
+{
+ pVTXInst->m_uIndex = pShader->lstVTXInstructions.uNumOfNode;
+ AddInstToList(&(pShader->lstVTXInstructions),
+ (R700ShaderInstruction*)pVTXInst);
+ pShader->uShaderBinaryDWORDSize += GetInstructionSize(pVTXInst->m_ShaderInstType);
+
+ if(pVTXInst->m_ShaderInstType == SIT_VTX_GENERIC)
+ {
+ R700VertexGenericFetch* pVTXGenericClause = (R700VertexGenericFetch*)pVTXInst;
+ pShader->nRegs = (pShader->nRegs < pVTXGenericClause->m_Word1_GPR.f.dst_gpr) ? pVTXGenericClause->m_Word1_GPR.f.dst_gpr : pShader->nRegs;
+ }
+
+ pShader->bLinksDirty = GL_TRUE;
+ pShader->bNeedsAssembly = GL_TRUE;
+
+ pVTXInst->useCount++;
+}
+
+void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst)
+{
+ pTEXInst->m_uIndex = pShader->lstTEXInstructions.uNumOfNode;
+ AddInstToList(&(pShader->lstTEXInstructions),
+ (R700ShaderInstruction*)pTEXInst);
+ pShader->uShaderBinaryDWORDSize += GetInstructionSize(pTEXInst->m_ShaderInstType);
+
+ pShader->nRegs = (pShader->nRegs < pTEXInst->m_Word1.f.dst_gpr) ? pTEXInst->m_Word1.f.dst_gpr : pShader->nRegs;
+
+ pShader->bLinksDirty = GL_TRUE;
+ pShader->bNeedsAssembly = GL_TRUE;
+
+ pTEXInst->useCount++;
+}
+
+void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst)
+{
+ pALUInst->m_uIndex = pShader->lstALUInstructions.uNumOfNode;
+ AddInstToList(&(pShader->lstALUInstructions),
+ (R700ShaderInstruction*)pALUInst);
+ pShader->uShaderBinaryDWORDSize += GetInstructionSize(pALUInst->m_ShaderInstType);
+
+ pShader->nRegs = (pShader->nRegs < pALUInst->m_Word1.f.dst_gpr) ? pALUInst->m_Word1.f.dst_gpr : pShader->nRegs;
+
+ pShader->bLinksDirty = GL_TRUE;
+ pShader->bNeedsAssembly = GL_TRUE;
+
+ pALUInst->useCount++;
+}
+
+void ResolveLinks(R700_Shader *pShader)
+{
+ GLuint uiSize;
+ R700ShaderInstruction *pInst;
+ R700ALUInstruction *pALUinst;
+ R700TextureInstruction *pTEXinst;
+ R700VertexInstruction *pVTXinst;
+
+ GLuint vtxOffset;
+
+ GLuint cfOffset = 0x0;
+
+ GLuint aluOffset = cfOffset + pShader->lstCFInstructions.uNumOfNode * GetInstructionSize(SIT_CF);
+
+ GLuint texOffset = aluOffset; // + m_lstALUInstructions.size() * R700ALUInstruction::SIZE,
+
+ pInst = pShader->lstALUInstructions.pHead;
+ while(NULL != pInst)
+ {
+ texOffset += GetInstructionSize(pInst->m_ShaderInstType);
+
+ pInst = pInst->pNextInst;
+ };
+
+ vtxOffset = texOffset + pShader->lstTEXInstructions.uNumOfNode * GetInstructionSize(SIT_TEX);
+
+ if ( ((pShader->lstTEXInstructions.uNumOfNode > 0) && (texOffset % 4 != 0)) ||
+ ((pShader->lstVTXInstructions.uNumOfNode > 0) && (vtxOffset % 4 != 0)) )
+ {
+ pALUinst = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ Init_R700ALUInstruction(pALUinst);
+ AddALUInstruction(pShader, pALUinst);
+ texOffset += GetInstructionSize(SIT_ALU);
+ vtxOffset += GetInstructionSize(SIT_ALU);
+ }
+
+ pInst = pShader->lstALUInstructions.pHead;
+ uiSize = 0;
+ while(NULL != pInst)
+ {
+ pALUinst = (R700ALUInstruction*)pInst;
+
+ if(pALUinst->m_pLinkedALUClause != NULL)
+ {
+ // This address is quad-word aligned
+ pALUinst->m_pLinkedALUClause->m_Word0.f.addr = (aluOffset + uiSize) >> 1;
+ }
+
+ uiSize += GetInstructionSize(pALUinst->m_ShaderInstType);
+
+ pInst = pInst->pNextInst;
+ };
+
+ pInst = pShader->lstTEXInstructions.pHead;
+ uiSize = 0;
+ while(NULL != pInst)
+ {
+ pTEXinst = (R700TextureInstruction*)pInst;
+
+ if (pTEXinst->m_pLinkedGenericClause != NULL)
+ {
+ pTEXinst->m_pLinkedGenericClause->m_Word0.f.addr = (texOffset + uiSize) >> 1;
+ }
+
+ uiSize += GetInstructionSize(pTEXinst->m_ShaderInstType);
+
+ pInst = pInst->pNextInst;
+ };
+
+ pInst = pShader->lstVTXInstructions.pHead;
+ uiSize = 0;
+ while(NULL != pInst)
+ {
+ pVTXinst = (R700VertexInstruction*)pInst;
+
+ if (pVTXinst->m_pLinkedGenericClause != NULL)
+ {
+ pVTXinst->m_pLinkedGenericClause->m_Word0.f.addr = (vtxOffset + uiSize) >> 1;
+ }
+
+ uiSize += GetInstructionSize(pVTXinst->m_ShaderInstType);
+
+ pInst = pInst->pNextInst;
+ };
+
+ pShader->bLinksDirty = GL_FALSE;
+}
+
+void Assemble(R700_Shader *pShader)
+{
+ GLuint i;
+ GLuint *pShaderBinary;
+ GLuint size_of_program;
+ GLuint *pCurrPos;
+
+ GLuint end_of_cf_instructions;
+ GLuint number_of_alu_dwords;
+
+ R700ShaderInstruction *pInst;
+
+ if(GL_TRUE == pShader->bBinaryShader)
+ {
+ return;
+ }
+
+ if(pShader->bLinksDirty == GL_TRUE)
+ {
+ ResolveLinks(pShader);
+ }
+
+ size_of_program = pShader->uShaderBinaryDWORDSize;
+
+ pShaderBinary = (GLuint*) MALLOC(sizeof(GLuint)*size_of_program);
+
+ pCurrPos = pShaderBinary;
+
+ for (i = 0; i < size_of_program; i++)
+ {
+ pShaderBinary[i] = 0;
+ }
+
+ pInst = pShader->lstCFInstructions.pHead;
+ while(NULL != pInst)
+ {
+ switch (pInst->m_ShaderInstType)
+ {
+ case SIT_CF_GENERIC:
+ {
+ R700ControlFlowGenericClause* pCFgeneric = (R700ControlFlowGenericClause*)pInst;
+ *pCurrPos++ = pCFgeneric->m_Word0.val;
+ *pCurrPos++ = pCFgeneric->m_Word1.val;
+ }
+ break;
+ case SIT_CF_ALU:
+ {
+ R700ControlFlowALUClause* pCFalu = (R700ControlFlowALUClause*)pInst;
+ *pCurrPos++ = pCFalu->m_Word0.val;
+ *pCurrPos++ = pCFalu->m_Word1.val;
+ }
+ break;
+ case SIT_CF_ALL_EXP_SX:
+ {
+ R700ControlFlowSXClause* pCFsx = (R700ControlFlowSXClause*)pInst;
+ *pCurrPos++ = pCFsx->m_Word0.val;
+ *pCurrPos++ = (pCFsx->m_Word1.val | pCFsx->m_Word1_SWIZ.val);
+ }
+ break;
+ case SIT_CF_ALL_EXP_SMX:
+ {
+ R700ControlFlowSMXClause* pCFsmx = (R700ControlFlowSMXClause*)pInst;
+ *pCurrPos++ = pCFsmx->m_Word0.val;
+ *pCurrPos++ = (pCFsmx->m_Word1.val | pCFsmx->m_Word1_BUF.val);
+ }
+ break;
+ default:
+ break;
+ }
+
+ pInst = pInst->pNextInst;
+ };
+
+ number_of_alu_dwords = 0;
+ pInst = pShader->lstALUInstructions.pHead;
+ while(NULL != pInst)
+ {
+ switch (pInst->m_ShaderInstType)
+ {
+ case SIT_ALU:
+ {
+ R700ALUInstruction* pALU = (R700ALUInstruction*)pInst;
+
+ *pCurrPos++ = pALU->m_Word0.val;
+ *pCurrPos++ = (pALU->m_Word1.val | pALU->m_Word1_OP2.val | pALU->m_Word1_OP3.val);
+
+ number_of_alu_dwords += 2;
+ }
+ break;
+ case SIT_ALU_HALF_LIT:
+ {
+ R700ALUInstructionHalfLiteral* pALUhalf = (R700ALUInstructionHalfLiteral*)pInst;
+
+ *pCurrPos++ = pALUhalf->m_Word0.val;
+ *pCurrPos++ = (pALUhalf->m_Word1.val | pALUhalf->m_Word1_OP2.val | pALUhalf->m_Word1_OP3.val);
+ *pCurrPos++ = *((GLuint*)&(pALUhalf->m_fLiteralX));
+ *pCurrPos++ = *((GLuint*)&(pALUhalf->m_fLiteralY));
+
+ number_of_alu_dwords += 4;
+ }
+ break;
+ case SIT_ALU_FALL_LIT:
+ {
+ R700ALUInstructionFullLiteral* pALUfull = (R700ALUInstructionFullLiteral*)pInst;
+
+ *pCurrPos++ = pALUfull->m_Word0.val;
+ *pCurrPos++ = (pALUfull->m_Word1.val | pALUfull->m_Word1_OP2.val | pALUfull->m_Word1_OP3.val);
+
+ *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralX));
+ *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralY));
+ *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralZ));
+ *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralW));
+
+ number_of_alu_dwords += 6;
+ }
+ break;
+ default:
+ break;
+ }
+
+ pInst = pInst->pNextInst;
+ };
+
+ pInst = pShader->lstTEXInstructions.pHead;
+ while(NULL != pInst)
+ {
+ R700TextureInstruction* pTEX = (R700TextureInstruction*)pInst;
+
+ *pCurrPos++ = pTEX->m_Word0.val;
+ *pCurrPos++ = pTEX->m_Word1.val;
+ *pCurrPos++ = pTEX->m_Word2.val;
+ *pCurrPos++ = 0x0beadeaf;
+
+ pInst = pInst->pNextInst;
+ };
+
+ pInst = pShader->lstVTXInstructions.pHead;
+ while(NULL != pInst)
+ {
+ switch (pInst->m_ShaderInstType)
+ {
+ case SIT_VTX_SEM: //
+ {
+ R700VertexSemanticFetch* pVTXsem = (R700VertexSemanticFetch*)pInst;
+
+ *pCurrPos++ = pVTXsem->m_Word0.val;
+ *pCurrPos++ = (pVTXsem->m_Word1.val | pVTXsem->m_Word1_SEM.val);
+ *pCurrPos++ = pVTXsem->m_Word2.val;
+ *pCurrPos++ = 0x0beadeaf;
+ }
+ break;
+ case SIT_VTX_GENERIC: //
+ {
+ R700VertexGenericFetch* pVTXgeneric = (R700VertexGenericFetch*)pInst;
+
+ *pCurrPos++ = pVTXgeneric->m_Word0.val;
+ *pCurrPos++ = (pVTXgeneric->m_Word1.val | pVTXgeneric->m_Word1_GPR.val);
+ *pCurrPos++ = pVTXgeneric->m_Word2.val;
+ *pCurrPos++ = 0x0beadeaf;
+ }
+ break;
+ default:
+ break;
+ }
+
+ pInst = pInst->pNextInst;
+ };
+
+ if(NULL != pShader->pProgram)
+ {
+ FREE(pShader->pProgram);
+ }
+ pShader->pProgram = (GLubyte*)pShaderBinary;
+
+ end_of_cf_instructions = pShader->uCFOffset + pShader->lstCFInstructions.uNumOfNode * GetInstructionSize(SIT_CF);
+
+ pShader->uEndOfCF = end_of_cf_instructions >> 1;
+
+ pShader->uEndOfALU = (end_of_cf_instructions + number_of_alu_dwords) >> 1;
+
+ pShader->uEndOfFetch = (pShader->uCFOffset + pShader->uShaderBinaryDWORDSize) >> 1;
+
+ pShader->bNeedsAssembly = GL_FALSE;
+}
+
+void LoadProgram(R700_Shader *pShader) //context
+{
+}
+
+void UpdateShaderRegisters(R700_Shader *pShader) //context
+{
+}
+
+void DeleteInstructions(R700_Shader *pShader)
+{
+}
+
+void DebugPrint(void)
+{
+}
+
+void Clean_Up_Shader(R700_Shader *pShader)
+{
+ FREE(pShader->pProgram);
+
+ R700ShaderInstruction *pInst;
+ R700ShaderInstruction *pInstToFree;
+
+ pInst = pShader->lstCFInstructions.pHead;
+ while(NULL != pInst)
+ {
+ pInstToFree = pInst;
+ pInst = pInst->pNextInst;
+ FREE(pInstToFree);
+ };
+ pInst = pShader->lstALUInstructions.pHead;
+ while(NULL != pInst)
+ {
+ pInstToFree = pInst;
+ pInst = pInst->pNextInst;
+ FREE(pInstToFree);
+ };
+ pInst = pShader->lstTEXInstructions.pHead;
+ while(NULL != pInst)
+ {
+ pInstToFree = pInst;
+ pInst = pInst->pNextInst;
+ FREE(pInstToFree);
+ };
+ pInst = pShader->lstVTXInstructions.pHead;
+ while(NULL != pInst)
+ {
+ pInstToFree = pInst;
+ pInst = pInst->pNextInst;
+ FREE(pInstToFree);
+ };
+}
+
diff --git a/r600/r700_shader.h b/r600/r700_shader.h
new file mode 100644
index 0000000..bfd01e1
--- /dev/null
+++ b/r600/r700_shader.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef __R700_SHADER_H__
+#define __R700_SHADER_H__
+
+#include "main/mtypes.h"
+
+#include "r700_shaderinst.h"
+
+
+void r700ShaderInit(GLcontext * ctx);
+
+typedef enum R700ShaderType
+{
+ R700_SHADER_FS = 0x0,
+ R700_SHADER_ES = 0x1,
+ R700_SHADER_GS = 0x2,
+ R700_SHADER_VS = 0x3,
+ R700_SHADER_PS = 0x4,
+ R700_SHADER_INVALID = 0x5,
+} R700ShaderType;
+
+typedef struct TypedShaderList
+{
+ R700ShaderInstruction * pHead;
+ R700ShaderInstruction * pTail;
+ GLuint uNumOfNode;
+} TypedShaderList;
+
+typedef struct RealRegister
+{
+ GLuint uAddr;
+ GLuint uValue;
+} RealRegister;
+
+typedef struct InstDeps
+{
+ GLint nDstDep;
+ GLint nSrcDeps[3];
+} InstDeps;
+
+typedef struct R700_Shader
+{
+ R700ShaderType Type;
+
+ GLubyte* pProgram;
+
+ GLboolean bBinaryShader;
+ GLboolean bFetchShaderRequired;
+ GLboolean bNeedsAssembly;
+ GLboolean bLinksDirty;
+
+ GLuint uShaderBinaryDWORDSize; // in DWORDS
+ GLuint nRegs;
+ GLuint nParamExports; // VS_ EXPORT_COUNT (1 based, the actual register is 0 based!)
+ GLuint nMemExports;
+ GLuint resource; // VS and PS _RESOURCE
+ GLuint exportMode; // VS and PS _EXPORT_MODE
+
+ GLboolean depthIsImported;
+
+ // Vertex program exports
+ GLboolean positionVectorIsExported;
+
+ GLboolean miscVectorIsExported;
+ GLboolean renderTargetArrayIndexIsExported;
+
+ GLboolean ccDist0VectorIsExported;
+ GLboolean ccDist1VectorIsExported;
+
+ // Pixel program exports
+ GLboolean depthIsExported;
+ GLboolean stencilRefIsExported;
+ GLboolean coverageToMaskIsExported;
+ GLboolean maskIsExported;
+
+ GLboolean killIsUsed;
+
+ GLuint uStartAddr;
+ GLuint uCFOffset;
+ GLuint uEndOfCF;
+ GLuint uEndOfALU;
+ GLuint uEndOfFetch;
+ GLuint uStackSize;
+ GLuint uMaxCallDepth;
+
+ TypedShaderList lstCFInstructions;
+ TypedShaderList lstALUInstructions;
+ TypedShaderList lstTEXInstructions;
+ TypedShaderList lstVTXInstructions;
+
+ RealRegister RegStartAddr;
+ RealRegister RegCFOffset;
+ RealRegister RegEndCF;
+ RealRegister RegEndALU;
+ RealRegister egEndFetcg;
+
+ // -------- constants
+ GLfloat ConstantArray[SQ_ALU_CONSTANT_PS_COUNT * 4];
+
+ GLboolean bSurfAllocated;
+} R700_Shader;
+
+//Internal
+void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst);
+void ResolveLinks(R700_Shader *pShader);
+void Assemble(R700_Shader *pShader);
+
+
+//Interface
+void Init_R700_Shader(R700_Shader * pShader);
+void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst);
+void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst);
+void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst);
+void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst);
+
+void LoadProgram(R700_Shader *pShader);
+void UpdateShaderRegisters(R700_Shader *pShader);
+void DeleteInstructions(R700_Shader *pShader);
+void DebugPrint(void);
+
+void Clean_Up_Shader(R700_Shader *pShader);
+
+#endif /*__R700_SHADER_H__*/
+
diff --git a/r600/r700_shaderinst.c b/r600/r700_shaderinst.c
new file mode 100644
index 0000000..f120d9f
--- /dev/null
+++ b/r600/r700_shaderinst.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#include "main/mtypes.h"
+
+#include "radeon_debug.h"
+#include "r700_shaderinst.h"
+
+void Init_R700ControlFlowGenericClause(R700ControlFlowGenericClause* pInst)
+{
+ pInst->m_Word0.val = 0x00000000;
+ pInst->m_Word1.val = 0x00000000;
+
+ pInst->m_pLinkedVTXInstruction = 0;
+ pInst->m_pLinkedTEXInstruction = 0;
+
+ pInst->useCount = 0;
+
+ pInst->m_ShaderInstType = SIT_CF_GENERIC;
+}
+
+void Init_R700ControlFlowALUClause(R700ControlFlowALUClause* pInst)
+{
+ pInst->m_Word0.val = 0x00000000;
+ pInst->m_Word1.val = 0x00000000;
+
+ pInst->m_pLinkedALUInstruction = 0;
+
+ pInst->useCount = 0;
+
+ pInst->m_ShaderInstType = SIT_CF_ALU;
+}
+
+void Init_R700ControlFlowSXClause(R700ControlFlowSXClause* pInst)
+{
+ pInst->m_Word0.val = 0x00000000;
+ pInst->m_Word1.val = 0x00000000;
+ pInst->m_Word1_SWIZ.val = 0x00000000;
+
+ pInst->useCount = 0;
+
+ pInst->m_ShaderInstType = SIT_CF_ALL_EXP_SX;
+}
+
+void Init_R700ControlFlowSMXClause(R700ControlFlowSMXClause* pInst)
+{
+ pInst->m_Word0.val = 0x00000000;
+ pInst->m_Word1.val = 0x00000000;
+ pInst->m_Word1_BUF.val = 0x00000000;
+
+ pInst->useCount = 0;
+
+ pInst->m_ShaderInstType = SIT_CF_ALL_EXP_SMX;
+}
+
+void Init_R700ALUInstruction(R700ALUInstruction* pInst)
+{
+ pInst->m_Word0.val = 0x00000000;
+ pInst->m_Word1.val = 0x00000000;
+ pInst->m_Word1_OP2.val = 0x00000000;
+ pInst->m_Word1_OP3.val = 0x00000000;
+
+ pInst->m_pLinkedALUClause = 0;
+
+ pInst->useCount = 0;
+
+ pInst->m_ShaderInstType = SIT_ALU;
+}
+
+void Init_R700ALUInstructionHalfLiteral(R700ALUInstructionHalfLiteral* pInst, GLfloat x, GLfloat y)
+{
+ pInst->m_Word0.val = 0x00000000;
+ pInst->m_Word1.val = 0x00000000;
+ pInst->m_Word1_OP2.val = 0x00000000;
+ pInst->m_Word1_OP3.val = 0x00000000;
+
+ pInst->m_pLinkedALUClause = 0;
+
+ pInst->m_fLiteralX = x;
+ pInst->m_fLiteralY = y;
+
+ pInst->useCount = 0;
+
+ pInst->m_ShaderInstType = SIT_ALU_HALF_LIT;
+}
+
+void Init_R700ALUInstructionFullLiteral(R700ALUInstructionFullLiteral* pInst, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
+{
+ pInst->m_Word0.val = 0x00000000;
+ pInst->m_Word1.val = 0x00000000;
+ pInst->m_Word1_OP2.val = 0x00000000;
+ pInst->m_Word1_OP3.val = 0x00000000;
+
+ pInst->m_pLinkedALUClause = 0;
+
+ pInst->m_fLiteralX = x;
+ pInst->m_fLiteralY = y;
+ pInst->m_fLiteralZ = z;
+ pInst->m_fLiteralW = w;
+
+ pInst->useCount = 0;
+
+ pInst->m_ShaderInstType = SIT_ALU_FALL_LIT;
+}
+
+void Init_R700TextureInstruction(R700TextureInstruction* pInst)
+{
+ pInst->m_Word0.val = 0x00000000;
+ pInst->m_Word1.val = 0x00000000;
+ pInst->m_Word2.val = 0x00000000;
+
+ pInst->m_pLinkedGenericClause = 0;
+
+ pInst->useCount = 0;
+
+ pInst->m_ShaderInstType = SIT_TEX;
+}
+
+void Init_R700VertexSemanticFetch(R700VertexSemanticFetch* pInst)
+{
+ pInst->m_Word0.val = 0x00000000;
+ pInst->m_Word1.val = 0x00000000;
+ pInst->m_Word1_SEM.val = 0x00000000;
+ pInst->m_Word2.val = 0x00000000;
+
+ pInst->m_pLinkedGenericClause = 0;
+
+ pInst->useCount = 0;
+
+ pInst->m_ShaderInstType = SIT_VTX_SEM;
+}
+
+void Init_R700VertexGenericFetch(R700VertexGenericFetch* pInst)
+{
+ pInst->m_Word0.val = 0x00000000;
+ pInst->m_Word1.val = 0x00000000;
+ pInst->m_Word1_GPR.val = 0x00000000;
+ pInst->m_Word2.val = 0x00000000;
+
+ pInst->m_pLinkedGenericClause = 0;
+
+ pInst->useCount = 0;
+
+ pInst->m_ShaderInstType = SIT_VTX_GENERIC;
+}
+
+unsigned int GetInstructionSize(ShaderInstType instType)
+{
+ switch(instType)
+ {
+ case SIT_ALU_HALF_LIT:
+ case SIT_TEX:
+ case SIT_VTX:
+ case SIT_VTX_GENERIC:
+ case SIT_VTX_SEM:
+ return 4;
+ case SIT_ALU_FALL_LIT:
+ return 6;
+ default:
+ break;
+ }
+
+ return 2;
+}
+
+unsigned int GetCFMaxInstructions(ShaderInstType instType)
+{
+ switch (instType)
+ {
+ case SIT_CF_ALL_EXP:
+ case SIT_CF_ALL_EXP_SX:
+ case SIT_CF_ALL_EXP_SMX:
+ return 0x10;
+ case SIT_CF_GENERIC:
+ return 0x8; //For tex and vtx
+ case SIT_CF_ALU:
+ return 0x80;
+ default:
+ break;
+ }
+ return 0x10;
+}
+
+GLboolean LinkVertexInstruction(R700ControlFlowGenericClause *pCFGeneric,
+ R700VertexInstruction *pVTXInstruction)
+{
+ if (pCFGeneric->m_pLinkedTEXInstruction != 0)
+ {
+ radeon_error("This instruction is already linked to a texture instruction.\n");
+ return GL_FALSE;
+ }
+
+ pCFGeneric->m_pLinkedVTXInstruction = pVTXInstruction;
+ pVTXInstruction->m_pLinkedGenericClause = pCFGeneric;
+
+ return GL_TRUE;
+}
+
+
+
diff --git a/r600/r700_shaderinst.h b/r600/r700_shaderinst.h
new file mode 100644
index 0000000..2829cca
--- /dev/null
+++ b/r600/r700_shaderinst.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef _R700_SHADERINST_H_
+#define _R700_SHADERINST_H_
+
+#include "main/glheader.h"
+
+#include "defaultendian.h"
+#include "sq_micro_reg.h"
+
+#define SQ_ALU_CONSTANT_PS_OFFSET 0x00000000
+#define SQ_ALU_CONSTANT_PS_COUNT 0x00000100
+#define SQ_ALU_CONSTANT_VS_OFFSET 0x00000100
+#define SQ_ALU_CONSTANT_VS_COUNT 0x00000100
+#define SQ_FETCH_RESOURCE_PS_OFFSET 0x00000000
+#define SQ_FETCH_RESOURCE_PS_COUNT 0x000000a0
+#define SQ_FETCH_RESOURCE_VS_OFFSET 0x000000a0
+#define SQ_FETCH_RESOURCE_VS_COUNT 0x000000b0
+
+#define SHADERINST_TYPEMASK_CF 0x10
+#define SHADERINST_TYPEMASK_ALU 0x20
+#define SHADERINST_TYPEMASK_TEX 0x40
+#define SHADERINST_TYPEMASK_VTX 0x80
+
+typedef enum ShaderInstType
+{
+ SIT_CF = 0x10, /*SIZE = 0x2*/
+ SIT_CF_ALL_EXP = 0x14, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/
+ SIT_CF_ALL_EXP_SX = 0x15, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/
+ SIT_CF_ALL_EXP_SMX= 0x16, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/
+ SIT_CF_GENERIC = 0x18, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x8; //For tex and vtx*/
+ SIT_CF_ALU = 0x19, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x80;*/
+ SIT_ALU = 0x20, /*SIZE = 0x2,*/
+ SIT_ALU_HALF_LIT = 0x21, /*SIZE = 0x4,*/
+ SIT_ALU_FALL_LIT = 0x22, /*SIZE = 0x6,*/
+ SIT_TEX = 0x40, /*SIZE = 0x4,*/
+ SIT_VTX = 0x80, /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/
+ SIT_VTX_GENERIC = 0x81, /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/
+ SIT_VTX_SEM = 0x82 /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/
+} ShaderInstType;
+
+typedef struct R700ShaderInstruction
+{
+ ShaderInstType m_ShaderInstType;
+ struct R700ShaderInstruction *pNextInst;
+ GLuint m_uIndex;
+ GLuint useCount;
+} R700ShaderInstruction;
+
+// ------------------ CF insts ---------------------------
+
+typedef R700ShaderInstruction R700ControlFlowInstruction;
+
+typedef struct R700ControlFlowAllocExportClause
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ GLuint m_uIndex;
+ GLuint useCount;
+
+ sq_cf_alloc_export_word0_u m_Word0;
+ sq_cf_alloc_export_word1_u m_Word1;
+} R700ControlFlowAllocExportClause;
+
+typedef struct R700ControlFlowSXClause
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ //R700ControlFlowAllocExportClause
+ //R700ControlFlowInstruction
+ //R700ShaderInstruction
+ GLuint m_uIndex;
+ GLuint useCount;
+ //---------------------
+ //---------------------------
+ sq_cf_alloc_export_word0_u m_Word0;
+ sq_cf_alloc_export_word1_u m_Word1;
+ //-------------------------------------
+
+ sq_cf_alloc_export_word1_swiz_u m_Word1_SWIZ;
+} R700ControlFlowSXClause;
+
+typedef struct R700ControlFlowSMXClause
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ //R700ControlFlowAllocExportClause
+ //R700ControlFlowInstruction
+ //R700ShaderInstruction
+ GLuint m_uIndex;
+ GLuint useCount;
+ //---------------------
+ //---------------------------
+ sq_cf_alloc_export_word0_u m_Word0;
+ sq_cf_alloc_export_word1_u m_Word1;
+ //-------------------------------
+
+ sq_cf_alloc_export_word1_buf_u m_Word1_BUF;
+} R700ControlFlowSMXClause;
+
+typedef struct R700ControlFlowGenericClause
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ //R700ControlFlowInstruction
+ //R700ShaderInstruction
+ GLuint m_uIndex;
+ GLuint useCount;
+ //---------------------
+ //---------------------
+
+ sq_cf_word0_u m_Word0;
+ sq_cf_word1_u m_Word1;
+
+ struct R700VertexInstruction *m_pLinkedVTXInstruction;
+ struct R700TextureInstruction *m_pLinkedTEXInstruction;
+} R700ControlFlowGenericClause;
+
+typedef struct R700ControlFlowALUClause
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ //R700ControlFlowInstruction
+ //R700ShaderInstruction
+ GLuint m_uIndex;
+ GLuint useCount;
+ //---------------------
+ //---------------------
+
+ sq_cf_alu_word0_u m_Word0;
+ sq_cf_alu_word1_u m_Word1;
+
+ struct R700ALUInstruction *m_pLinkedALUInstruction;
+} R700ControlFlowALUClause;
+
+// ------------------- End of CF Inst ------------------------
+
+// ------------------- ALU Inst ------------------------------
+typedef struct R700ALUInstruction
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ //R700ShaderInstruction
+ GLuint m_uIndex;
+ GLuint useCount;
+ //---------------------
+
+ sq_alu_word0_u m_Word0;
+ sq_alu_word1_u m_Word1;
+ sq_alu_word1_op2_v2_u m_Word1_OP2;
+ sq_alu_word1_op3_u m_Word1_OP3;
+
+ struct R700ControlFlowALUClause *m_pLinkedALUClause;
+} R700ALUInstruction;
+
+typedef struct R700ALUInstructionHalfLiteral
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ //R700ALUInstruction
+ //R700ShaderInstruction
+ GLuint m_uIndex;
+ GLuint useCount;
+ //---------------------
+
+ sq_alu_word0_u m_Word0;
+ sq_alu_word1_u m_Word1;
+ sq_alu_word1_op2_v2_u m_Word1_OP2;
+ sq_alu_word1_op3_u m_Word1_OP3;
+
+ struct R700ControlFlowALUClause *m_pLinkedALUClause;
+ //-------------------
+
+ GLfloat m_fLiteralX,
+ m_fLiteralY;
+} R700ALUInstructionHalfLiteral;
+
+typedef struct R700ALUInstructionFullLiteral
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ //R700ALUInstruction
+ //R700ShaderInstruction
+ GLuint m_uIndex;
+ GLuint useCount;
+ //---------------------
+
+ sq_alu_word0_u m_Word0;
+ sq_alu_word1_u m_Word1;
+ sq_alu_word1_op2_v2_u m_Word1_OP2;
+ sq_alu_word1_op3_u m_Word1_OP3;
+
+ struct R700ControlFlowALUClause *m_pLinkedALUClause;
+ //-------------------
+
+ GLfloat m_fLiteralX,
+ m_fLiteralY,
+ m_fLiteralZ,
+ m_fLiteralW;
+} R700ALUInstructionFullLiteral;
+// ------------------- End of ALU Inst -----------------------
+
+// ------------------- Textuer/Vertex Instruction --------------------
+
+typedef struct R700TextureInstruction
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ //R700ShaderInstruction
+ GLuint m_uIndex;
+ GLuint useCount;
+ //---------------------
+
+ sq_tex_word0_u m_Word0;
+ sq_tex_word1_u m_Word1;
+ sq_tex_word2_u m_Word2;
+
+ struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+} R700TextureInstruction;
+
+typedef struct R700VertexInstruction
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ //R700ShaderInstruction
+ GLuint m_uIndex;
+ GLuint useCount;
+ //---------------------
+
+ sq_vtx_word0_u m_Word0;
+ sq_vtx_word1_u m_Word1;
+ sq_vtx_word2_u m_Word2;
+
+ struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+} R700VertexInstruction;
+//
+typedef struct R700VertexSemanticFetch
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ //R700VertexInstruction
+ //R700ShaderInstruction
+ GLuint m_uIndex;
+ GLuint useCount;
+ //---------------------
+
+ sq_vtx_word0_u m_Word0;
+ sq_vtx_word1_u m_Word1;
+ sq_vtx_word2_u m_Word2;
+
+ struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+ //---------------------------
+
+ sq_vtx_word1_sem_u m_Word1_SEM;
+} R700VertexSemanticFetch;
+//
+typedef struct R700VertexGenericFetch
+{
+ ShaderInstType m_ShaderInstType;
+ R700ShaderInstruction * pNextInst;
+ //R700VertexInstruction
+ //R700ShaderInstruction
+ GLuint m_uIndex;
+ GLuint useCount;
+ //---------------------
+
+ sq_vtx_word0_u m_Word0;
+ sq_vtx_word1_u m_Word1;
+ sq_vtx_word2_u m_Word2;
+
+ struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+ //---------------------------
+
+ sq_vtx_word1_gpr_u m_Word1_GPR;
+} R700VertexGenericFetch;
+
+// ------------------- End of Texture Vertex Instruction --------------------
+
+void Init_R700ControlFlowGenericClause(R700ControlFlowGenericClause* pInst);
+void Init_R700ControlFlowALUClause(R700ControlFlowALUClause* pInst);
+void Init_R700ControlFlowSXClause(R700ControlFlowSXClause* pInst);
+void Init_R700ControlFlowSMXClause(R700ControlFlowSMXClause* pInst);
+void Init_R700ALUInstruction(R700ALUInstruction* pInst);
+void Init_R700ALUInstructionHalfLiteral(R700ALUInstructionHalfLiteral* pInst, GLfloat x, GLfloat y);
+void Init_R700ALUInstructionFullLiteral(R700ALUInstructionFullLiteral* pInst, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+void Init_R700TextureInstruction(R700TextureInstruction* pInst);
+void Init_R700VertexSemanticFetch(R700VertexSemanticFetch* pInst);
+void Init_R700VertexGenericFetch(R700VertexGenericFetch* pInst);
+
+unsigned int GetInstructionSize(ShaderInstType instType);
+unsigned int GetCFMaxInstructions(ShaderInstType instType);
+
+GLboolean LinkVertexInstruction(R700ControlFlowGenericClause *pCFGeneric,
+ R700VertexInstruction *pVTXInstruction);
+
+#endif //_R700_SHADERINST_H_
diff --git a/r600/r700_state.c b/r600/r700_state.c
new file mode 100644
index 0000000..124469b
--- /dev/null
+++ b/r600/r700_state.c
@@ -0,0 +1,1808 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/dd.h"
+#include "main/simple_list.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vp_build.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "main/api_arrayelt.h"
+#include "main/state.h"
+#include "main/framebuffer.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "vbo/vbo.h"
+#include "main/texformat.h"
+
+#include "r600_context.h"
+
+#include "r700_state.h"
+
+#include "r700_fragprog.h"
+#include "r700_vertprog.h"
+
+
+static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state);
+static void r700UpdatePolygonMode(GLcontext * ctx);
+static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state);
+static void r700SetStencilState(GLcontext * ctx, GLboolean state);
+
+void r700UpdateShaders (GLcontext * ctx) //----------------------------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
+ GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
+ int i;
+
+ /* should only happenen once, just after context is created */
+ /* TODO: shouldn't we fallback to sw here? */
+ if (!ctx->FragmentProgram._Current) {
+ _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+ return;
+ }
+
+ r700SelectFragmentShader(ctx);
+
+ if (context->radeon.NewGLState) {
+ for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ /* mat states from state var not array for sw */
+ dummy_attrib[i].stride = 0;
+ temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i];
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &(dummy_attrib[i]);
+ }
+
+ _tnl_UpdateFixedFunctionProgram(ctx);
+
+ for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i] = temp_attrib[i];
+ }
+ }
+
+ r700SelectVertexShader(ctx);
+ r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+ context->radeon.NewGLState = 0;
+}
+
+/*
+ * To correctly position primitives:
+ */
+void r700UpdateViewportOffset(GLcontext * ctx) //------------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon);
+ GLfloat xoffset = (GLfloat) dPriv->x;
+ GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+ int id = 0;
+
+ GLfloat tx = v[MAT_TX] + xoffset;
+ GLfloat ty = (-v[MAT_TY]) + yoffset;
+
+ if (r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All != tx ||
+ r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All != ty) {
+ /* Note: this should also modify whatever data the context reset
+ * code uses...
+ */
+ R600_STATECHANGE(context, vpt);
+ r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx;
+ r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty;
+ }
+
+ radeonUpdateScissor(ctx);
+}
+
+void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state) //--------------------
+{
+ struct r700_fragment_program *fp =
+ (struct r700_fragment_program *)ctx->FragmentProgram._Current;
+ struct gl_program_parameter_list *paramList;
+
+ if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
+ return;
+
+ if (!ctx->FragmentProgram._Current || !fp)
+ return;
+
+ paramList = ctx->FragmentProgram._Current->Base.Parameters;
+
+ if (!paramList)
+ return;
+
+ _mesa_load_state_parameters(ctx, paramList);
+
+}
+
+/**
+ * Called by Mesa after an internal state update.
+ */
+static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //-------------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ _swrast_InvalidateState(ctx, new_state);
+ _swsetup_InvalidateState(ctx, new_state);
+ _vbo_InvalidateState(ctx, new_state);
+ _tnl_InvalidateState(ctx, new_state);
+ _ae_invalidate_state(ctx, new_state);
+
+ if (new_state & _NEW_BUFFERS) {
+ _mesa_update_framebuffer(ctx);
+ /* this updates the DrawBuffer's Width/Height if it's a FBO */
+ _mesa_update_draw_buffer_bounds(ctx);
+
+ R600_STATECHANGE(context, cb_target);
+ R600_STATECHANGE(context, db_target);
+ }
+
+ r700UpdateStateParameters(ctx, new_state);
+
+ R600_STATECHANGE(context, cl);
+ R600_STATECHANGE(context, spi);
+
+ if(GL_TRUE == r700->bEnablePerspective)
+ {
+ /* Do scale XY and Z by 1/W0 for perspective correction on pos. For orthogonal case, set both to one. */
+ CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+ CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+
+ SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+ SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+ CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+ }
+ else
+ {
+ /* For orthogonal case. */
+ SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+ SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+
+ SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+ CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+ SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+ }
+
+ context->radeon.NewGLState |= new_state;
+}
+
+static void r700SetDepthState(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ R600_STATECHANGE(context, db);
+
+ if (ctx->Depth.Test)
+ {
+ SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
+ if (ctx->Depth.Mask)
+ {
+ SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+ }
+
+ switch (ctx->Depth.Func)
+ {
+ case GL_NEVER:
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NEVER,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_LESS:
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LESS,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_EQUAL:
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_EQUAL,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_LEQUAL:
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LEQUAL,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_GREATER:
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GREATER,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_NOTEQUAL:
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NOTEQUAL,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_GEQUAL:
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GEQUAL,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_ALWAYS:
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ default:
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ }
+ }
+ else
+ {
+ CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
+ CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+ }
+}
+
+static void r700SetAlphaState(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ uint32_t alpha_func = REF_ALWAYS;
+ GLboolean really_enabled = ctx->Color.AlphaEnabled;
+
+ R600_STATECHANGE(context, sx);
+
+ switch (ctx->Color.AlphaFunc) {
+ case GL_NEVER:
+ alpha_func = REF_NEVER;
+ break;
+ case GL_LESS:
+ alpha_func = REF_LESS;
+ break;
+ case GL_EQUAL:
+ alpha_func = REF_EQUAL;
+ break;
+ case GL_LEQUAL:
+ alpha_func = REF_LEQUAL;
+ break;
+ case GL_GREATER:
+ alpha_func = REF_GREATER;
+ break;
+ case GL_NOTEQUAL:
+ alpha_func = REF_NOTEQUAL;
+ break;
+ case GL_GEQUAL:
+ alpha_func = REF_GEQUAL;
+ break;
+ case GL_ALWAYS:
+ /*alpha_func = REF_ALWAYS; */
+ really_enabled = GL_FALSE;
+ break;
+ }
+
+ if (really_enabled) {
+ SETfield(r700->SX_ALPHA_TEST_CONTROL.u32All, alpha_func,
+ ALPHA_FUNC_shift, ALPHA_FUNC_mask);
+ SETbit(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
+ r700->SX_ALPHA_REF.f32All = ctx->Color.AlphaRef;
+ } else {
+ CLEARbit(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
+ }
+
+}
+
+static void r700AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) //---------------
+{
+ (void)func;
+ (void)ref;
+ r700SetAlphaState(ctx);
+}
+
+
+static void r700BlendColor(GLcontext * ctx, const GLfloat cf[4]) //----------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ R600_STATECHANGE(context, blnd_clr);
+
+ r700->CB_BLEND_RED.f32All = cf[0];
+ r700->CB_BLEND_GREEN.f32All = cf[1];
+ r700->CB_BLEND_BLUE.f32All = cf[2];
+ r700->CB_BLEND_ALPHA.f32All = cf[3];
+}
+
+static int blend_factor(GLenum factor, GLboolean is_src)
+{
+ switch (factor) {
+ case GL_ZERO:
+ return BLEND_ZERO;
+ break;
+ case GL_ONE:
+ return BLEND_ONE;
+ break;
+ case GL_DST_COLOR:
+ return BLEND_DST_COLOR;
+ break;
+ case GL_ONE_MINUS_DST_COLOR:
+ return BLEND_ONE_MINUS_DST_COLOR;
+ break;
+ case GL_SRC_COLOR:
+ return BLEND_SRC_COLOR;
+ break;
+ case GL_ONE_MINUS_SRC_COLOR:
+ return BLEND_ONE_MINUS_SRC_COLOR;
+ break;
+ case GL_SRC_ALPHA:
+ return BLEND_SRC_ALPHA;
+ break;
+ case GL_ONE_MINUS_SRC_ALPHA:
+ return BLEND_ONE_MINUS_SRC_ALPHA;
+ break;
+ case GL_DST_ALPHA:
+ return BLEND_DST_ALPHA;
+ break;
+ case GL_ONE_MINUS_DST_ALPHA:
+ return BLEND_ONE_MINUS_DST_ALPHA;
+ break;
+ case GL_SRC_ALPHA_SATURATE:
+ return (is_src) ? BLEND_SRC_ALPHA_SATURATE : BLEND_ZERO;
+ break;
+ case GL_CONSTANT_COLOR:
+ return BLEND_CONSTANT_COLOR;
+ break;
+ case GL_ONE_MINUS_CONSTANT_COLOR:
+ return BLEND_ONE_MINUS_CONSTANT_COLOR;
+ break;
+ case GL_CONSTANT_ALPHA:
+ return BLEND_CONSTANT_ALPHA;
+ break;
+ case GL_ONE_MINUS_CONSTANT_ALPHA:
+ return BLEND_ONE_MINUS_CONSTANT_ALPHA;
+ break;
+ default:
+ fprintf(stderr, "unknown blend factor %x\n", factor);
+ return (is_src) ? BLEND_ONE : BLEND_ZERO;
+ break;
+ }
+}
+
+static void r700SetBlendState(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ int id = 0;
+ uint32_t blend_reg = 0, eqn, eqnA;
+
+ R600_STATECHANGE(context, blnd);
+
+ if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) {
+ SETfield(blend_reg,
+ BLEND_ONE, COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ZERO, COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+ SETfield(blend_reg,
+ COMB_DST_PLUS_SRC, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask);
+ SETfield(blend_reg,
+ BLEND_ONE, ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ZERO, ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+ SETfield(blend_reg,
+ COMB_DST_PLUS_SRC, ALPHA_COMB_FCN_shift, ALPHA_COMB_FCN_mask);
+ if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_R600)
+ r700->CB_BLEND_CONTROL.u32All = blend_reg;
+ else
+ r700->render_target[id].CB_BLEND0_CONTROL.u32All = blend_reg;
+ return;
+ }
+
+ SETfield(blend_reg,
+ blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE),
+ COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ blend_factor(ctx->Color.BlendDstRGB, GL_FALSE),
+ COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+
+ switch (ctx->Color.BlendEquationRGB) {
+ case GL_FUNC_ADD:
+ eqn = COMB_DST_PLUS_SRC;
+ break;
+ case GL_FUNC_SUBTRACT:
+ eqn = COMB_SRC_MINUS_DST;
+ break;
+ case GL_FUNC_REVERSE_SUBTRACT:
+ eqn = COMB_DST_MINUS_SRC;
+ break;
+ case GL_MIN:
+ eqn = COMB_MIN_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+ break;
+ case GL_MAX:
+ eqn = COMB_MAX_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+ break;
+
+ default:
+ fprintf(stderr,
+ "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+ __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB);
+ return;
+ }
+ SETfield(blend_reg,
+ eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask);
+
+ SETfield(blend_reg,
+ blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE),
+ ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ blend_factor(ctx->Color.BlendDstRGB, GL_FALSE),
+ ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+
+ switch (ctx->Color.BlendEquationA) {
+ case GL_FUNC_ADD:
+ eqnA = COMB_DST_PLUS_SRC;
+ break;
+ case GL_FUNC_SUBTRACT:
+ eqnA = COMB_SRC_MINUS_DST;
+ break;
+ case GL_FUNC_REVERSE_SUBTRACT:
+ eqnA = COMB_DST_MINUS_SRC;
+ break;
+ case GL_MIN:
+ eqnA = COMB_MIN_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+ break;
+ case GL_MAX:
+ eqnA = COMB_MAX_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+ break;
+ default:
+ fprintf(stderr,
+ "[%s:%u] Invalid A blend equation (0x%04x).\n",
+ __FUNCTION__, __LINE__, ctx->Color.BlendEquationA);
+ return;
+ }
+
+ SETfield(blend_reg,
+ eqnA, ALPHA_COMB_FCN_shift, ALPHA_COMB_FCN_mask);
+
+ SETbit(blend_reg, SEPARATE_ALPHA_BLEND_bit);
+
+ if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_R600)
+ r700->CB_BLEND_CONTROL.u32All = blend_reg;
+ else {
+ r700->render_target[id].CB_BLEND0_CONTROL.u32All = blend_reg;
+ SETbit(r700->CB_COLOR_CONTROL.u32All, PER_MRT_BLEND_bit);
+ }
+ SETfield(r700->CB_COLOR_CONTROL.u32All, (1 << id),
+ TARGET_BLEND_ENABLE_shift, TARGET_BLEND_ENABLE_mask);
+
+}
+
+static void r700BlendEquationSeparate(GLcontext * ctx,
+ GLenum modeRGB, GLenum modeA) //-----------------
+{
+ r700SetBlendState(ctx);
+}
+
+static void r700BlendFuncSeparate(GLcontext * ctx,
+ GLenum sfactorRGB, GLenum dfactorRGB,
+ GLenum sfactorA, GLenum dfactorA) //------------------------
+{
+ r700SetBlendState(ctx);
+}
+
+/**
+ * Translate LogicOp enums into hardware representation.
+ */
+static GLuint translate_logicop(GLenum logicop)
+{
+ switch (logicop) {
+ case GL_CLEAR:
+ return 0x00;
+ case GL_SET:
+ return 0xff;
+ case GL_COPY:
+ return 0xcc;
+ case GL_COPY_INVERTED:
+ return 0x33;
+ case GL_NOOP:
+ return 0xaa;
+ case GL_INVERT:
+ return 0x55;
+ case GL_AND:
+ return 0x88;
+ case GL_NAND:
+ return 0x77;
+ case GL_OR:
+ return 0xee;
+ case GL_NOR:
+ return 0x11;
+ case GL_XOR:
+ return 0x66;
+ case GL_EQUIV:
+ return 0xaa;
+ case GL_AND_REVERSE:
+ return 0x44;
+ case GL_AND_INVERTED:
+ return 0x22;
+ case GL_OR_REVERSE:
+ return 0xdd;
+ case GL_OR_INVERTED:
+ return 0xbb;
+ default:
+ fprintf(stderr, "unknown blend logic operation %x\n", logicop);
+ return 0xcc;
+ }
+}
+
+/**
+ * Used internally to update the r300->hw hardware state to match the
+ * current OpenGL state.
+ */
+static void r700SetLogicOpState(GLcontext *ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
+
+ R600_STATECHANGE(context, blnd);
+
+ if (RGBA_LOGICOP_ENABLED(ctx))
+ SETfield(r700->CB_COLOR_CONTROL.u32All,
+ translate_logicop(ctx->Color.LogicOp), ROP3_shift, ROP3_mask);
+ else
+ SETfield(r700->CB_COLOR_CONTROL.u32All, 0xCC, ROP3_shift, ROP3_mask);
+}
+
+/**
+ * Called by Mesa when an application program changes the LogicOp state
+ * via glLogicOp.
+ */
+static void r700LogicOpcode(GLcontext *ctx, GLenum logicop)
+{
+ if (RGBA_LOGICOP_ENABLED(ctx))
+ r700SetLogicOpState(ctx);
+}
+
+static void r700UpdateCulling(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
+
+ R600_STATECHANGE(context, su);
+
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+
+ if (ctx->Polygon.CullFlag)
+ {
+ switch (ctx->Polygon.CullFaceMode)
+ {
+ case GL_FRONT:
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+ break;
+ case GL_BACK:
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+ break;
+ case GL_FRONT_AND_BACK:
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+ break;
+ default:
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+ break;
+ }
+ }
+
+ switch (ctx->Polygon.FrontFace)
+ {
+ case GL_CW:
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+ break;
+ case GL_CCW:
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+ break;
+ default:
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */
+ break;
+ }
+}
+
+static void r700UpdateLineStipple(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
+
+ R600_STATECHANGE(context, sc);
+
+ if (ctx->Line.StippleFlag)
+ {
+ SETbit(r700->PA_SC_MODE_CNTL.u32All, LINE_STIPPLE_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(r700->PA_SC_MODE_CNTL.u32All, LINE_STIPPLE_ENABLE_bit);
+ }
+}
+
+static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //------------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+
+ switch (cap) {
+ case GL_TEXTURE_1D:
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_3D:
+ /* empty */
+ break;
+ case GL_FOG:
+ /* empty */
+ break;
+ case GL_ALPHA_TEST:
+ r700SetAlphaState(ctx);
+ break;
+ case GL_COLOR_LOGIC_OP:
+ r700SetLogicOpState(ctx);
+ /* fall-through, because logic op overrides blending */
+ case GL_BLEND:
+ r700SetBlendState(ctx);
+ break;
+ case GL_CLIP_PLANE0:
+ case GL_CLIP_PLANE1:
+ case GL_CLIP_PLANE2:
+ case GL_CLIP_PLANE3:
+ case GL_CLIP_PLANE4:
+ case GL_CLIP_PLANE5:
+ r700SetClipPlaneState(ctx, cap, state);
+ break;
+ case GL_DEPTH_TEST:
+ r700SetDepthState(ctx);
+ break;
+ case GL_STENCIL_TEST:
+ r700SetStencilState(ctx, state);
+ break;
+ case GL_CULL_FACE:
+ r700UpdateCulling(ctx);
+ break;
+ case GL_POLYGON_OFFSET_POINT:
+ case GL_POLYGON_OFFSET_LINE:
+ case GL_POLYGON_OFFSET_FILL:
+ r700SetPolygonOffsetState(ctx, state);
+ break;
+ case GL_SCISSOR_TEST:
+ radeon_firevertices(&context->radeon);
+ context->radeon.state.scissor.enabled = state;
+ radeonUpdateScissor(ctx);
+ break;
+ case GL_LINE_STIPPLE:
+ r700UpdateLineStipple(ctx);
+ break;
+ default:
+ break;
+ }
+
+}
+
+/**
+ * Handle glColorMask()
+ */
+static void r700ColorMask(GLcontext * ctx,
+ GLboolean r, GLboolean g, GLboolean b, GLboolean a) //------------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
+ unsigned int mask = ((r ? 1 : 0) |
+ (g ? 2 : 0) |
+ (b ? 4 : 0) |
+ (a ? 8 : 0));
+
+ if (mask != r700->CB_SHADER_MASK.u32All) {
+ R600_STATECHANGE(context, cb);
+ SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask);
+ }
+}
+
+/**
+ * Change the depth testing function.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700DepthFunc(GLcontext * ctx, GLenum func) //--------------------
+{
+ r700SetDepthState(ctx);
+}
+
+/**
+ * Enable/Disable depth writing.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700DepthMask(GLcontext * ctx, GLboolean mask) //------------------
+{
+ r700SetDepthState(ctx);
+}
+
+/**
+ * Change the culling mode.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700CullFace(GLcontext * ctx, GLenum mode) //-----------------
+{
+ r700UpdateCulling(ctx);
+}
+
+/* =============================================================
+ * Fog
+ */
+static void r700Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) //--------------
+{
+}
+
+/**
+ * Change the polygon orientation.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700FrontFace(GLcontext * ctx, GLenum mode) //------------------
+{
+ r700UpdateCulling(ctx);
+ r700UpdatePolygonMode(ctx);
+}
+
+static void r700ShadeModel(GLcontext * ctx, GLenum mode) //--------------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ R600_STATECHANGE(context, spi);
+
+ /* also need to set/clear FLAT_SHADE bit per param in SPI_PS_INPUT_CNTL_[0-31] */
+ switch (mode) {
+ case GL_FLAT:
+ SETbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
+ break;
+ case GL_SMOOTH:
+ CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
+ break;
+ default:
+ return;
+ }
+}
+
+/* =============================================================
+ * Point state
+ */
+static void r700PointSize(GLcontext * ctx, GLfloat size)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ R600_STATECHANGE(context, su);
+
+ /* We need to clamp to user defined range here, because
+ * the HW clamping happens only for per vertex point size. */
+ size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize);
+
+ /* same size limits for AA, non-AA points */
+ size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize);
+
+ /* format is 12.4 fixed point */
+ SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0),
+ PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask);
+ SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0),
+ PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask);
+
+}
+
+static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param) //---------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ R600_STATECHANGE(context, su);
+
+ /* format is 12.4 fixed point */
+ switch (pname) {
+ case GL_POINT_SIZE_MIN:
+ SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 8.0),
+ MIN_SIZE_shift, MIN_SIZE_mask);
+ break;
+ case GL_POINT_SIZE_MAX:
+ SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 8.0),
+ MAX_SIZE_shift, MAX_SIZE_mask);
+ break;
+ case GL_POINT_DISTANCE_ATTENUATION:
+ break;
+ case GL_POINT_FADE_THRESHOLD_SIZE:
+ break;
+ default:
+ break;
+ }
+}
+
+static int translate_stencil_func(int func)
+{
+ switch (func) {
+ case GL_NEVER:
+ return REF_NEVER;
+ case GL_LESS:
+ return REF_LESS;
+ case GL_EQUAL:
+ return REF_EQUAL;
+ case GL_LEQUAL:
+ return REF_LEQUAL;
+ case GL_GREATER:
+ return REF_GREATER;
+ case GL_NOTEQUAL:
+ return REF_NOTEQUAL;
+ case GL_GEQUAL:
+ return REF_GEQUAL;
+ case GL_ALWAYS:
+ return REF_ALWAYS;
+ }
+ return 0;
+}
+
+static int translate_stencil_op(int op)
+{
+ switch (op) {
+ case GL_KEEP:
+ return STENCIL_KEEP;
+ case GL_ZERO:
+ return STENCIL_ZERO;
+ case GL_REPLACE:
+ return STENCIL_REPLACE;
+ case GL_INCR:
+ return STENCIL_INCR_CLAMP;
+ case GL_DECR:
+ return STENCIL_DECR_CLAMP;
+ case GL_INCR_WRAP_EXT:
+ return STENCIL_INCR_WRAP;
+ case GL_DECR_WRAP_EXT:
+ return STENCIL_DECR_WRAP;
+ case GL_INVERT:
+ return STENCIL_INVERT;
+ default:
+ WARN_ONCE("Do not know how to translate stencil op");
+ return STENCIL_KEEP;
+ }
+ return 0;
+}
+
+static void r700SetStencilState(GLcontext * ctx, GLboolean state)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ GLboolean hw_stencil = GL_FALSE;
+
+ if (ctx->DrawBuffer) {
+ struct radeon_renderbuffer *rrbStencil
+ = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+ hw_stencil = (rrbStencil && rrbStencil->bo);
+ }
+
+ if (hw_stencil) {
+ R600_STATECHANGE(context, db);
+ if (state) {
+ SETbit(r700->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit);
+ SETbit(r700->DB_DEPTH_CONTROL.u32All, BACKFACE_ENABLE_bit);
+ } else
+ CLEARbit(r700->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit);
+ }
+}
+
+static void r700StencilFuncSeparate(GLcontext * ctx, GLenum face,
+ GLenum func, GLint ref, GLuint mask) //---------------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ const unsigned back = ctx->Stencil._BackFace;
+
+ R600_STATECHANGE(context, stencil);
+ R600_STATECHANGE(context, db);
+
+ //front
+ SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.Ref[0],
+ STENCILREF_shift, STENCILREF_mask);
+ SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.ValueMask[0],
+ STENCILMASK_shift, STENCILMASK_mask);
+
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_func(ctx->Stencil.Function[0]),
+ STENCILFUNC_shift, STENCILFUNC_mask);
+
+ //back
+ SETfield(r700->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.Ref[back],
+ STENCILREF_BF_shift, STENCILREF_BF_mask);
+ SETfield(r700->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.ValueMask[back],
+ STENCILMASK_BF_shift, STENCILMASK_BF_mask);
+
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_func(ctx->Stencil.Function[back]),
+ STENCILFUNC_BF_shift, STENCILFUNC_BF_mask);
+
+}
+
+static void r700StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) //--------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ const unsigned back = ctx->Stencil._BackFace;
+
+ R600_STATECHANGE(context, stencil);
+
+ // front
+ SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.WriteMask[0],
+ STENCILWRITEMASK_shift, STENCILWRITEMASK_mask);
+
+ // back
+ SETfield(r700->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.WriteMask[back],
+ STENCILWRITEMASK_BF_shift, STENCILWRITEMASK_BF_mask);
+
+}
+
+static void r700StencilOpSeparate(GLcontext * ctx, GLenum face,
+ GLenum fail, GLenum zfail, GLenum zpass) //--------------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ const unsigned back = ctx->Stencil._BackFace;
+
+ R600_STATECHANGE(context, db);
+
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.FailFunc[0]),
+ STENCILFAIL_shift, STENCILFAIL_mask);
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZFailFunc[0]),
+ STENCILZFAIL_shift, STENCILZFAIL_mask);
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZPassFunc[0]),
+ STENCILZPASS_shift, STENCILZPASS_mask);
+
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.FailFunc[back]),
+ STENCILFAIL_BF_shift, STENCILFAIL_BF_mask);
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZFailFunc[back]),
+ STENCILZFAIL_BF_shift, STENCILZFAIL_BF_mask);
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZPassFunc[back]),
+ STENCILZPASS_BF_shift, STENCILZPASS_BF_mask);
+}
+
+static void r700UpdateWindow(GLcontext * ctx, int id) //--------------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon);
+ GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+ GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+ const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+ const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+ GLfloat y_scale, y_bias;
+
+ if (render_to_fbo) {
+ y_scale = 1.0;
+ y_bias = 0;
+ } else {
+ y_scale = -1.0;
+ y_bias = yoffset;
+ }
+
+ GLfloat sx = v[MAT_SX];
+ GLfloat tx = v[MAT_TX] + xoffset;
+ GLfloat sy = v[MAT_SY] * y_scale;
+ GLfloat ty = (v[MAT_TY] * y_scale) + y_bias;
+ GLfloat sz = v[MAT_SZ] * depthScale;
+ GLfloat tz = v[MAT_TZ] * depthScale;
+
+ R600_STATECHANGE(context, vpt);
+
+ r700->viewport[id].PA_CL_VPORT_XSCALE.f32All = sx;
+ r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx;
+
+ r700->viewport[id].PA_CL_VPORT_YSCALE.f32All = sy;
+ r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty;
+
+ r700->viewport[id].PA_CL_VPORT_ZSCALE.f32All = sz;
+ r700->viewport[id].PA_CL_VPORT_ZOFFSET.f32All = tz;
+
+ r700->viewport[id].enabled = GL_TRUE;
+
+ r700SetScissor(context);
+}
+
+
+static void r700Viewport(GLcontext * ctx,
+ GLint x,
+ GLint y,
+ GLsizei width,
+ GLsizei height) //--------------------
+{
+ r700UpdateWindow(ctx, 0);
+
+ radeon_viewport(ctx, x, y, width, height);
+}
+
+static void r700DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) //-------------
+{
+ r700UpdateWindow(ctx, 0);
+}
+
+static void r700LineWidth(GLcontext * ctx, GLfloat widthf) //---------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ uint32_t lineWidth = (uint32_t)((widthf * 0.5) * (1 << 4));
+
+ R600_STATECHANGE(context, su);
+
+ if (lineWidth > 0xFFFF)
+ lineWidth = 0xFFFF;
+ SETfield(r700->PA_SU_LINE_CNTL.u32All,(uint16_t)lineWidth,
+ PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask);
+}
+
+static void r700LineStipple(GLcontext *ctx, GLint factor, GLushort pattern)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ R600_STATECHANGE(context, sc);
+
+ SETfield(r700->PA_SC_LINE_STIPPLE.u32All, pattern, LINE_PATTERN_shift, LINE_PATTERN_mask);
+ SETfield(r700->PA_SC_LINE_STIPPLE.u32All, (factor-1), REPEAT_COUNT_shift, REPEAT_COUNT_mask);
+ SETfield(r700->PA_SC_LINE_STIPPLE.u32All, 1, AUTO_RESET_CNTL_shift, AUTO_RESET_CNTL_mask);
+}
+
+static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ R600_STATECHANGE(context, su);
+
+ if (state) {
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit);
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit);
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_PARA_ENABLE_bit);
+ } else {
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit);
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit);
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_PARA_ENABLE_bit);
+ }
+}
+
+static void r700PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) //--------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ GLfloat constant = units;
+ GLchar depth = 0;
+
+ R600_STATECHANGE(context, poly);
+
+ switch (ctx->Visual.depthBits) {
+ case 16:
+ constant *= 4.0;
+ depth = -16;
+ break;
+ case 24:
+ constant *= 2.0;
+ depth = -24;
+ break;
+ }
+
+ factor *= 12.0;
+ SETfield(r700->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All, depth,
+ POLY_OFFSET_NEG_NUM_DB_BITS_shift, POLY_OFFSET_NEG_NUM_DB_BITS_mask);
+ //r700->PA_SU_POLY_OFFSET_CLAMP.f32All = constant; //???
+ r700->PA_SU_POLY_OFFSET_FRONT_SCALE.f32All = factor;
+ r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.f32All = constant;
+ r700->PA_SU_POLY_OFFSET_BACK_SCALE.f32All = factor;
+ r700->PA_SU_POLY_OFFSET_BACK_OFFSET.f32All = constant;
+}
+
+static void r700UpdatePolygonMode(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ R600_STATECHANGE(context, su);
+
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DISABLE_POLY_MODE, POLY_MODE_shift, POLY_MODE_mask);
+
+ /* Only do something if a polygon mode is wanted, default is GL_FILL */
+ if (ctx->Polygon.FrontMode != GL_FILL ||
+ ctx->Polygon.BackMode != GL_FILL) {
+ GLenum f, b;
+
+ /* Handle GL_CW (clock wise and GL_CCW (counter clock wise)
+ * correctly by selecting the correct front and back face
+ */
+ if (ctx->Polygon.FrontFace == GL_CCW) {
+ f = ctx->Polygon.FrontMode;
+ b = ctx->Polygon.BackMode;
+ } else {
+ f = ctx->Polygon.BackMode;
+ b = ctx->Polygon.FrontMode;
+ }
+
+ /* Enable polygon mode */
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DUAL_MODE, POLY_MODE_shift, POLY_MODE_mask);
+
+ switch (f) {
+ case GL_LINE:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_LINES,
+ POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+ break;
+ case GL_POINT:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_POINTS,
+ POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+ break;
+ case GL_FILL:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
+ POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+ break;
+ }
+
+ switch (b) {
+ case GL_LINE:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_LINES,
+ POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+ break;
+ case GL_POINT:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_POINTS,
+ POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+ break;
+ case GL_FILL:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
+ POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+ break;
+ }
+ }
+}
+
+static void r700PolygonMode(GLcontext * ctx, GLenum face, GLenum mode) //------------------
+{
+ (void)face;
+ (void)mode;
+
+ r700UpdatePolygonMode(ctx);
+}
+
+static void r700RenderMode(GLcontext * ctx, GLenum mode) //---------------------
+{
+}
+
+static void r700ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ GLint p;
+ GLint *ip;
+
+ p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+ ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+ R600_STATECHANGE(context, ucp);
+
+ r700->ucp[p].PA_CL_UCP_0_X.u32All = ip[0];
+ r700->ucp[p].PA_CL_UCP_0_Y.u32All = ip[1];
+ r700->ucp[p].PA_CL_UCP_0_Z.u32All = ip[2];
+ r700->ucp[p].PA_CL_UCP_0_W.u32All = ip[3];
+}
+
+static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ GLuint p;
+
+ p = cap - GL_CLIP_PLANE0;
+
+ R600_STATECHANGE(context, cl);
+
+ if (state) {
+ r700->PA_CL_CLIP_CNTL.u32All |= (UCP_ENA_0_bit << p);
+ r700->ucp[p].enabled = GL_TRUE;
+ r700ClipPlane(ctx, cap, NULL);
+ } else {
+ r700->PA_CL_CLIP_CNTL.u32All &= ~(UCP_ENA_0_bit << p);
+ r700->ucp[p].enabled = GL_FALSE;
+ }
+}
+
+void r700SetScissor(context_t *context) //---------------
+{
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ unsigned x1, y1, x2, y2;
+ int id = 0;
+ struct radeon_renderbuffer *rrb;
+
+ rrb = radeon_get_colorbuffer(&context->radeon);
+ if (!rrb || !rrb->bo) {
+ return;
+ }
+ if (context->radeon.state.scissor.enabled) {
+ /* r600 has exclusive scissors */
+ x1 = context->radeon.state.scissor.rect.x1;
+ y1 = context->radeon.state.scissor.rect.y1;
+ x2 = context->radeon.state.scissor.rect.x2 + 1;
+ y2 = context->radeon.state.scissor.rect.y2 + 1;
+ } else {
+ if (context->radeon.radeonScreen->driScreen->dri2.enabled) {
+ x1 = 0;
+ y1 = 0;
+ x2 = rrb->base.Width;
+ y2 = rrb->base.Height;
+ } else {
+ x1 = rrb->dPriv->x;
+ y1 = rrb->dPriv->y;
+ x2 = rrb->dPriv->x + rrb->dPriv->w;
+ y2 = rrb->dPriv->y + rrb->dPriv->h;
+ }
+ }
+
+ R600_STATECHANGE(context, scissor);
+
+ /* screen */
+ SETbit(r700->PA_SC_SCREEN_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+ SETfield(r700->PA_SC_SCREEN_SCISSOR_TL.u32All, x1,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_shift, PA_SC_SCREEN_SCISSOR_TL__TL_X_mask);
+ SETfield(r700->PA_SC_SCREEN_SCISSOR_TL.u32All, y1,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift, PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask);
+
+ SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, x2,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_shift, PA_SC_SCREEN_SCISSOR_BR__BR_X_mask);
+ SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, y2,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift, PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask);
+
+ /* window */
+ SETbit(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+ SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, x1,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_shift, PA_SC_WINDOW_SCISSOR_TL__TL_X_mask);
+ SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, y1,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift, PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask);
+
+ SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, x2,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_shift, PA_SC_WINDOW_SCISSOR_BR__BR_X_mask);
+ SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, y2,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift, PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask);
+
+
+ SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, x1,
+ PA_SC_CLIPRECT_0_TL__TL_X_shift, PA_SC_CLIPRECT_0_TL__TL_X_mask);
+ SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, y1,
+ PA_SC_CLIPRECT_0_TL__TL_Y_shift, PA_SC_CLIPRECT_0_TL__TL_Y_mask);
+ SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, x2,
+ PA_SC_CLIPRECT_0_BR__BR_X_shift, PA_SC_CLIPRECT_0_BR__BR_X_mask);
+ SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, y2,
+ PA_SC_CLIPRECT_0_BR__BR_Y_shift, PA_SC_CLIPRECT_0_BR__BR_Y_mask);
+
+ r700->PA_SC_CLIPRECT_1_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
+ r700->PA_SC_CLIPRECT_1_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
+ r700->PA_SC_CLIPRECT_2_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
+ r700->PA_SC_CLIPRECT_2_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
+ r700->PA_SC_CLIPRECT_3_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
+ r700->PA_SC_CLIPRECT_3_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
+
+ /* more....2d clip */
+ SETbit(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+ SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, x1,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_shift, PA_SC_GENERIC_SCISSOR_TL__TL_X_mask);
+ SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, y1,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift, PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask);
+ SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, x2,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_shift, PA_SC_GENERIC_SCISSOR_BR__BR_X_mask);
+ SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, y2,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift, PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask);
+
+ SETbit(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+ SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, x1,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask);
+ SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, y1,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask);
+ SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, x2,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask);
+ SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, y2,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask);
+
+ r700->viewport[id].PA_SC_VPORT_ZMIN_0.u32All = 0;
+ r700->viewport[id].PA_SC_VPORT_ZMAX_0.u32All = 0x3F800000;
+ r700->viewport[id].enabled = GL_TRUE;
+}
+
+static void r700InitSQConfig(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ int ps_prio;
+ int vs_prio;
+ int gs_prio;
+ int es_prio;
+ int num_ps_gprs;
+ int num_vs_gprs;
+ int num_gs_gprs;
+ int num_es_gprs;
+ int num_temp_gprs;
+ int num_ps_threads;
+ int num_vs_threads;
+ int num_gs_threads;
+ int num_es_threads;
+ int num_ps_stack_entries;
+ int num_vs_stack_entries;
+ int num_gs_stack_entries;
+ int num_es_stack_entries;
+
+ R600_STATECHANGE(context, sq);
+
+ // SQ
+ ps_prio = 0;
+ vs_prio = 1;
+ gs_prio = 2;
+ es_prio = 3;
+ switch (context->radeon.radeonScreen->chip_family) {
+ case CHIP_FAMILY_R600:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV630:
+ case CHIP_FAMILY_RV635:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 40;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV610:
+ case CHIP_FAMILY_RV620:
+ case CHIP_FAMILY_RS780:
+ case CHIP_FAMILY_RS880:
+ default:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV670:
+ num_ps_gprs = 144;
+ num_vs_gprs = 40;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV770:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 256;
+ num_vs_stack_entries = 256;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV730:
+ case CHIP_FAMILY_RV740:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV710:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 48;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ }
+
+ r700->sq_config.SQ_CONFIG.u32All = 0;
+ if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
+ CLEARbit(r700->sq_config.SQ_CONFIG.u32All, VC_ENABLE_bit);
+ else
+ SETbit(r700->sq_config.SQ_CONFIG.u32All, VC_ENABLE_bit);
+ SETbit(r700->sq_config.SQ_CONFIG.u32All, DX9_CONSTS_bit);
+ SETbit(r700->sq_config.SQ_CONFIG.u32All, ALU_INST_PREFER_VECTOR_bit);
+ SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, PS_PRIO_shift, PS_PRIO_mask);
+ SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, VS_PRIO_shift, VS_PRIO_mask);
+ SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, GS_PRIO_shift, GS_PRIO_mask);
+ SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, ES_PRIO_shift, ES_PRIO_mask);
+
+ r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All = 0;
+ SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, num_ps_gprs, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask);
+ SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, num_vs_gprs, NUM_VS_GPRS_shift, NUM_VS_GPRS_mask);
+ SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, num_temp_gprs,
+ NUM_CLAUSE_TEMP_GPRS_shift, NUM_CLAUSE_TEMP_GPRS_mask);
+
+ r700->sq_config.SQ_GPR_RESOURCE_MGMT_2.u32All = 0;
+ SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_2.u32All, num_gs_gprs, NUM_GS_GPRS_shift, NUM_GS_GPRS_mask);
+ SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_2.u32All, num_es_gprs, NUM_ES_GPRS_shift, NUM_ES_GPRS_mask);
+
+ r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All = 0;
+ SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_ps_threads,
+ NUM_PS_THREADS_shift, NUM_PS_THREADS_mask);
+ SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_vs_threads,
+ NUM_VS_THREADS_shift, NUM_VS_THREADS_mask);
+ SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_gs_threads,
+ NUM_GS_THREADS_shift, NUM_GS_THREADS_mask);
+ SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_es_threads,
+ NUM_ES_THREADS_shift, NUM_ES_THREADS_mask);
+
+ r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All = 0;
+ SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All, num_ps_stack_entries,
+ NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask);
+ SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All, num_vs_stack_entries,
+ NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask);
+
+ r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All = 0;
+ SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All, num_gs_stack_entries,
+ NUM_GS_STACK_ENTRIES_shift, NUM_GS_STACK_ENTRIES_mask);
+ SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All, num_es_stack_entries,
+ NUM_ES_STACK_ENTRIES_shift, NUM_ES_STACK_ENTRIES_mask);
+
+}
+
+/**
+ * Calculate initial hardware state and register state functions.
+ * Assumes that the command buffer and state atoms have been
+ * initialized already.
+ */
+void r700InitState(GLcontext * ctx) //-------------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ int id = 0;
+
+ radeon_firevertices(&context->radeon);
+
+ r700->TA_CNTL_AUX.u32All = 0;
+ SETfield(r700->TA_CNTL_AUX.u32All, 28, TD_FIFO_CREDIT_shift, TD_FIFO_CREDIT_mask);
+ r700->VC_ENHANCE.u32All = 0;
+ r700->DB_WATERMARKS.u32All = 0;
+ SETfield(r700->DB_WATERMARKS.u32All, 4, DEPTH_FREE_shift, DEPTH_FREE_mask);
+ SETfield(r700->DB_WATERMARKS.u32All, 16, DEPTH_FLUSH_shift, DEPTH_FLUSH_mask);
+ SETfield(r700->DB_WATERMARKS.u32All, 0, FORCE_SUMMARIZE_shift, FORCE_SUMMARIZE_mask);
+ SETfield(r700->DB_WATERMARKS.u32All, 4, DEPTH_PENDING_FREE_shift, DEPTH_PENDING_FREE_mask);
+ r700->SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All = 0;
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+ SETfield(r700->TA_CNTL_AUX.u32All, 3, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask);
+ r700->DB_DEBUG.u32All = 0x82000000;
+ SETfield(r700->DB_WATERMARKS.u32All, 16, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask);
+ } else {
+ SETfield(r700->TA_CNTL_AUX.u32All, 2, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask);
+ SETfield(r700->DB_WATERMARKS.u32All, 4, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask);
+ SETbit(r700->SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All, VS_PC_LIMIT_ENABLE_bit);
+ }
+
+ /* Turn off vgt reuse */
+ r700->VGT_REUSE_OFF.u32All = 0;
+ SETbit(r700->VGT_REUSE_OFF.u32All, REUSE_OFF_bit);
+
+ /* Specify offsetting and clamp values for vertices */
+ r700->VGT_MAX_VTX_INDX.u32All = 0xFFFFFF;
+ r700->VGT_MIN_VTX_INDX.u32All = 0;
+ r700->VGT_INDX_OFFSET.u32All = 0;
+
+ /* default shader connections. */
+ r700->SPI_VS_OUT_ID_0.u32All = 0x03020100;
+ r700->SPI_VS_OUT_ID_1.u32All = 0x07060504;
+ r700->SPI_VS_OUT_ID_2.u32All = 0x0b0a0908;
+ r700->SPI_VS_OUT_ID_3.u32All = 0x0f0e0d0c;
+
+ r700->SPI_THREAD_GROUPING.u32All = 0;
+ if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+ SETfield(r700->SPI_THREAD_GROUPING.u32All, 1, PS_GROUPING_shift, PS_GROUPING_mask);
+
+ /* 4 clip rectangles */ /* TODO : set these clip rects according to context->currentDraw->numClipRects */
+ r700->PA_SC_CLIPRECT_RULE.u32All = 0;
+ SETfield(r700->PA_SC_CLIPRECT_RULE.u32All, CLIP_RULE_mask, CLIP_RULE_shift, CLIP_RULE_mask);
+
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+ r700->PA_SC_EDGERULE.u32All = 0;
+ else
+ r700->PA_SC_EDGERULE.u32All = 0xAAAAAAAA;
+
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+ r700->PA_SC_MODE_CNTL.u32All = 0;
+ SETbit(r700->PA_SC_MODE_CNTL.u32All, WALK_ORDER_ENABLE_bit);
+ SETbit(r700->PA_SC_MODE_CNTL.u32All, FORCE_EOV_CNTDWN_ENABLE_bit);
+ } else {
+ r700->PA_SC_MODE_CNTL.u32All = 0x00500000;
+ SETbit(r700->PA_SC_MODE_CNTL.u32All, FORCE_EOV_REZ_ENABLE_bit);
+ SETbit(r700->PA_SC_MODE_CNTL.u32All, FORCE_EOV_CNTDWN_ENABLE_bit);
+ }
+
+ /* Do scale XY and Z by 1/W0. */
+ r700->bEnablePerspective = GL_TRUE;
+ CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+ CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+ SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+ /* Enable viewport scaling for all three axis */
+ SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_X_SCALE_ENA_bit);
+ SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_X_OFFSET_ENA_bit);
+ SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Y_SCALE_ENA_bit);
+ SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Y_OFFSET_ENA_bit);
+ SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_SCALE_ENA_bit);
+ SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_OFFSET_ENA_bit);
+
+ /* GL uses last vtx for flat shading components */
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
+
+ /* Set up vertex control */
+ r700->PA_SU_VTX_CNTL.u32All = 0;
+ CLEARfield(r700->PA_SU_VTX_CNTL.u32All, QUANT_MODE_mask);
+ SETbit(r700->PA_SU_VTX_CNTL.u32All, PIX_CENTER_bit);
+ SETfield(r700->PA_SU_VTX_CNTL.u32All, X_ROUND_TO_EVEN,
+ PA_SU_VTX_CNTL__ROUND_MODE_shift, PA_SU_VTX_CNTL__ROUND_MODE_mask);
+
+ /* to 1.0 = no guard band */
+ r700->PA_CL_GB_VERT_CLIP_ADJ.u32All = 0x3F800000; /* 1.0 */
+ r700->PA_CL_GB_VERT_DISC_ADJ.u32All = 0x3F800000;
+ r700->PA_CL_GB_HORZ_CLIP_ADJ.u32All = 0x3F800000;
+ r700->PA_CL_GB_HORZ_DISC_ADJ.u32All = 0x3F800000;
+
+ /* Enable all samples for multi-sample anti-aliasing */
+ r700->PA_SC_AA_MASK.u32All = 0xFFFFFFFF;
+ /* Turn off AA */
+ r700->PA_SC_AA_CONFIG.u32All = 0;
+
+ r700->SX_MISC.u32All = 0;
+
+ r700InitSQConfig(ctx);
+
+ r700ColorMask(ctx,
+ ctx->Color.ColorMask[RCOMP],
+ ctx->Color.ColorMask[GCOMP],
+ ctx->Color.ColorMask[BCOMP],
+ ctx->Color.ColorMask[ACOMP]);
+
+ r700Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
+ r700DepthMask(ctx, ctx->Depth.Mask);
+ r700DepthFunc(ctx, ctx->Depth.Func);
+ SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
+
+ r700->DB_DEPTH_CLEAR.u32All = 0x3F800000;
+
+ r700->DB_RENDER_CONTROL.u32All = 0;
+ SETbit(r700->DB_RENDER_CONTROL.u32All, STENCIL_COMPRESS_DISABLE_bit);
+ SETbit(r700->DB_RENDER_CONTROL.u32All, DEPTH_COMPRESS_DISABLE_bit);
+ r700->DB_RENDER_OVERRIDE.u32All = 0;
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+ SETbit(r700->DB_RENDER_OVERRIDE.u32All, FORCE_SHADER_Z_ORDER_bit);
+ SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask);
+ SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask);
+ SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask);
+
+ r700->DB_ALPHA_TO_MASK.u32All = 0;
+ SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask);
+ SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET1_shift, ALPHA_TO_MASK_OFFSET1_mask);
+ SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET2_shift, ALPHA_TO_MASK_OFFSET2_mask);
+ SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET3_shift, ALPHA_TO_MASK_OFFSET3_mask);
+
+ /* stencil */
+ r700Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled);
+ r700StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]);
+ r700StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0],
+ ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]);
+ r700StencilOpSeparate(ctx, 0, ctx->Stencil.FailFunc[0],
+ ctx->Stencil.ZFailFunc[0],
+ ctx->Stencil.ZPassFunc[0]);
+
+ r700UpdateCulling(ctx);
+
+ r700SetBlendState(ctx);
+ r700SetLogicOpState(ctx);
+
+ r700AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
+ r700Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
+
+ r700PointSize(ctx, 1.0);
+
+ CLEARfield(r700->PA_SU_POINT_MINMAX.u32All, MIN_SIZE_mask);
+ SETfield(r700->PA_SU_POINT_MINMAX.u32All, 0x8000, MAX_SIZE_shift, MAX_SIZE_mask);
+
+ r700LineWidth(ctx, 1.0);
+
+ r700->PA_SC_LINE_CNTL.u32All = 0;
+ CLEARbit(r700->PA_SC_LINE_CNTL.u32All, EXPAND_LINE_WIDTH_bit);
+ SETbit(r700->PA_SC_LINE_CNTL.u32All, LAST_PIXEL_bit);
+
+ r700ShadeModel(ctx, ctx->Light.ShadeModel);
+ r700PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode);
+ r700PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode);
+ r700PolygonOffset(ctx, ctx->Polygon.OffsetFactor,
+ ctx->Polygon.OffsetUnits);
+ r700Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint);
+ r700Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine);
+ r700Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill);
+
+ /* CB */
+ r700BlendColor(ctx, ctx->Color.BlendColor);
+
+ r700->CB_CLEAR_RED_R6XX.f32All = 1.0; //r6xx only
+ r700->CB_CLEAR_GREEN_R6XX.f32All = 0.0; //r6xx only
+ r700->CB_CLEAR_BLUE_R6XX.f32All = 1.0; //r6xx only
+ r700->CB_CLEAR_ALPHA_R6XX.f32All = 1.0; //r6xx only
+ r700->CB_FOG_RED_R6XX.u32All = 0; //r6xx only
+ r700->CB_FOG_GREEN_R6XX.u32All = 0; //r6xx only
+ r700->CB_FOG_BLUE_R6XX.u32All = 0; //r6xx only
+
+ /* Disable color compares */
+ SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS,
+ CLRCMP_FCN_SRC_shift, CLRCMP_FCN_SRC_mask);
+ SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS,
+ CLRCMP_FCN_DST_shift, CLRCMP_FCN_DST_mask);
+ SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_SEL_SRC,
+ CLRCMP_FCN_SEL_shift, CLRCMP_FCN_SEL_mask);
+
+ /* Zero out source */
+ r700->CB_CLRCMP_SRC.u32All = 0x00000000;
+
+ /* Put a compare color in for error checking */
+ r700->CB_CLRCMP_DST.u32All = 0x000000FF;
+
+ /* Set up color compare mask */
+ r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF;
+
+ /* screen/window/view */
+ SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask);
+
+ context->radeon.hw.all_dirty = GL_TRUE;
+
+}
+
+void r700InitStateFuncs(struct dd_function_table *functions) //-----------------
+{
+ functions->UpdateState = r700InvalidateState;
+ functions->AlphaFunc = r700AlphaFunc;
+ functions->BlendColor = r700BlendColor;
+ functions->BlendEquationSeparate = r700BlendEquationSeparate;
+ functions->BlendFuncSeparate = r700BlendFuncSeparate;
+ functions->Enable = r700Enable;
+ functions->ColorMask = r700ColorMask;
+ functions->DepthFunc = r700DepthFunc;
+ functions->DepthMask = r700DepthMask;
+ functions->CullFace = r700CullFace;
+ functions->Fogfv = r700Fogfv;
+ functions->FrontFace = r700FrontFace;
+ functions->ShadeModel = r700ShadeModel;
+ functions->LogicOpcode = r700LogicOpcode;
+
+ /* ARB_point_parameters */
+ functions->PointParameterfv = r700PointParameter;
+
+ /* Stencil related */
+ functions->StencilFuncSeparate = r700StencilFuncSeparate;
+ functions->StencilMaskSeparate = r700StencilMaskSeparate;
+ functions->StencilOpSeparate = r700StencilOpSeparate;
+
+ /* Viewport related */
+ functions->Viewport = r700Viewport;
+ functions->DepthRange = r700DepthRange;
+ functions->PointSize = r700PointSize;
+ functions->LineWidth = r700LineWidth;
+ functions->LineStipple = r700LineStipple;
+
+ functions->PolygonOffset = r700PolygonOffset;
+ functions->PolygonMode = r700PolygonMode;
+
+ functions->RenderMode = r700RenderMode;
+
+ functions->ClipPlane = r700ClipPlane;
+
+ functions->Scissor = radeonScissor;
+
+ functions->DrawBuffer = radeonDrawBuffer;
+ functions->ReadBuffer = radeonReadBuffer;
+
+}
+
diff --git a/r600/r700_state.h b/r600/r700_state.h
new file mode 100644
index 0000000..0f53d5b
--- /dev/null
+++ b/r600/r700_state.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_STATE_H
+#define _R700_STATE_H
+
+#include "main/mtypes.h"
+
+#include "r600_context.h"
+
+#include "r700_chip.h"
+
+extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state);
+extern void r700UpdateShaders (GLcontext * ctx);
+
+extern void r700UpdateViewportOffset(GLcontext * ctx);
+
+extern void r700InitState (GLcontext * ctx);
+extern void r700InitStateFuncs (struct dd_function_table *functions);
+
+extern void r700SetScissor(context_t *context);
+
+#endif /* _R600_SCREEN_H */
diff --git a/r600/r700_vertprog.c b/r600/r700_vertprog.c
new file mode 100644
index 0000000..04726ec
--- /dev/null
+++ b/r600/r700_vertprog.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+
+#include "tnl/t_context.h"
+#include "shader/program.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+
+#include "radeon_debug.h"
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+#include "shader/programopt.h"
+
+#include "r700_debug.h"
+#include "r700_vertprog.h"
+
+unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
+ struct gl_vertex_program *mesa_vp,
+ unsigned int unStart)
+{
+ unsigned int i;
+ unsigned int unBit;
+ unsigned int unTotal = unStart;
+
+ //!!!!!!! THE ORDER MATCH FS INPUT
+
+ unBit = 1 << VERT_RESULT_HPOS;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
+ }
+
+ unBit = 1 << VERT_RESULT_COL0;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
+ }
+
+ unBit = 1 << VERT_RESULT_COL1;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
+ }
+
+ //TODO : dealing back face.
+ unBit = 1 << VERT_RESULT_BFC0;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++;
+ }
+
+ unBit = 1 << VERT_RESULT_BFC1;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++;
+ }
+
+ //TODO : dealing fog.
+ unBit = 1 << VERT_RESULT_FOGC;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
+ }
+
+ //TODO : dealing point size.
+ unBit = 1 << VERT_RESULT_PSIZ;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
+ }
+
+ for(i=0; i<8; i++)
+ {
+ unBit = 1 << (VERT_RESULT_TEX0 + i);
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
+ }
+ }
+
+ return (unTotal - unStart);
+}
+
+unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm,
+ struct gl_vertex_program *mesa_vp,
+ unsigned int unStart)
+{
+ int i;
+ unsigned int unBit;
+ unsigned int unTotal = unStart;
+ for(i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(mesa_vp->Base.InputsRead & unBit)
+ {
+ pAsm->ucVP_AttributeMap[i] = unTotal++;
+ }
+ }
+ return (unTotal - unStart);
+}
+
+GLboolean Process_Vertex_Program_Vfetch_Instructions(
+ struct r700_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp)
+{
+ int i;
+ unsigned int unBit;
+ VTX_FETCH_METHOD vtxFetchMethod;
+ vtxFetchMethod.bEnableMini = GL_FALSE;
+ vtxFetchMethod.mega_fetch_remainder = 0;
+
+ for(i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(mesa_vp->Base.InputsRead & unBit)
+ {
+ assemble_vfetch_instruction(&vp->r700AsmCode,
+ i,
+ vp->r700AsmCode.ucVP_AttributeMap[i],
+ vp->aos_desc[i].size,
+ vp->aos_desc[i].type,
+ &vtxFetchMethod);
+ }
+ }
+
+ return GL_TRUE;
+}
+
+void Map_Vertex_Program(struct r700_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp)
+{
+ GLuint ui;
+ r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
+ unsigned int num_inputs;
+
+ // R0 will always be used for index into vertex buffer
+ pAsm->number_used_registers = 1;
+ pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
+
+ // Map Inputs: Add 1 to mapping since R0 is used for index
+ num_inputs = Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
+ pAsm->number_used_registers += num_inputs;
+
+ // Create VFETCH instructions for inputs
+ if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) )
+ {
+ radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n");
+ return; //error
+ }
+
+ // Map Outputs
+ pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
+
+ pAsm->starting_export_register_number = pAsm->number_used_registers;
+
+ pAsm->number_used_registers += pAsm->number_of_exports;
+
+ pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
+
+ for(ui=0; ui<pAsm->number_of_exports; ui++)
+ {
+ pAsm->pucOutMask[ui] = 0x0;
+ }
+
+ /* Map temporary registers (GPRs) */
+ pAsm->starting_temp_register_number = pAsm->number_used_registers;
+
+ if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
+ { /* arb uses NumNativeTemporaries */
+ pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
+ }
+ else
+ { /* fix func t_vp uses NumTemporaries */
+ pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
+ }
+
+ pAsm->uFirstHelpReg = pAsm->number_used_registers;
+}
+
+GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp)
+{
+ GLuint i, j;
+ GLint * puiTEMPwrites;
+ struct prog_instruction *pILInst;
+ InstDeps *pInstDeps;
+
+ puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
+ for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
+ {
+ puiTEMPwrites[i] = -1;
+ }
+
+ pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
+
+ for(i=0; i<mesa_vp->Base.NumInstructions; i++)
+ {
+ pInstDeps[i].nDstDep = -1;
+ pILInst = &(mesa_vp->Base.Instructions[i]);
+
+ //Dst
+ if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
+ {
+ //Set lastwrite for the temp
+ puiTEMPwrites[pILInst->DstReg.Index] = i;
+ }
+
+ //Src
+ for(j=0; j<3; j++)
+ {
+ if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
+ {
+ //Set dep.
+ pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
+ }
+ else
+ {
+ pInstDeps[i].nSrcDeps[j] = -1;
+ }
+ }
+ }
+
+ vp->r700AsmCode.pInstDeps = pInstDeps;
+
+ FREE(puiTEMPwrites);
+
+ return GL_TRUE;
+}
+
+struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
+ struct gl_vertex_program *mesa_vp)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ struct r700_vertex_program *vp;
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *vb = &tnl->vb;
+ unsigned int unBit;
+ unsigned int i;
+
+ vp = _mesa_calloc(sizeof(*vp));
+ vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base);
+
+ if (mesa_vp->IsPositionInvariant)
+ {
+ _mesa_insert_mvp_code(ctx, vp->mesa_program);
+ }
+
+ for(i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(vp->mesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */
+ {
+ vp->aos_desc[i].size = vb->AttribPtr[i]->size;
+ vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/
+ vp->aos_desc[i].type = GL_FLOAT;
+ }
+ }
+
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+ {
+ vp->r700AsmCode.bR6xx = 1;
+ }
+
+ //Init_Program
+ Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
+ Map_Vertex_Program( vp, vp->mesa_program );
+
+ if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
+ {
+ return NULL;
+ }
+
+ if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions,
+ &(vp->mesa_program->Base.Instructions[0]),
+ &(vp->r700AsmCode)) )
+ {
+ return NULL;
+ }
+
+ if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
+ {
+ return NULL;
+ }
+
+ vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
+ : (vp->r700AsmCode.number_used_registers - 1);
+
+ vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
+
+ vp->translated = GL_TRUE;
+
+ return vp;
+}
+
+void r700SelectVertexShader(GLcontext *ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ struct r700_vertex_program_cont *vpc;
+ struct r700_vertex_program *vp;
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *vb = &tnl->vb;
+ unsigned int unBit;
+ unsigned int i;
+ GLboolean match;
+ GLbitfield InputsRead;
+
+ vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
+
+ InputsRead = vpc->mesa_program.Base.InputsRead;
+ if (vpc->mesa_program.IsPositionInvariant)
+ {
+ InputsRead |= VERT_BIT_POS;
+ }
+
+ for (vp = vpc->progs; vp; vp = vp->next)
+ {
+ match = GL_TRUE;
+ for(i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(InputsRead & unBit)
+ {
+ if (vp->aos_desc[i].size != vb->AttribPtr[i]->size)
+ match = GL_FALSE;
+ break;
+ }
+ }
+ if (match)
+ {
+ context->selected_vp = vp;
+ return;
+ }
+ }
+
+ vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program) );
+ if(!vp)
+ {
+ radeon_error("Failed to translate vertex shader. \n");
+ return;
+ }
+ vp->next = vpc->progs;
+ vpc->progs = vp;
+ context->selected_vp = vp;
+ return;
+}
+
+void * r700GetActiveVpShaderBo(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ struct r700_vertex_program *vp = context->selected_vp;;
+
+ if (vp)
+ return vp->shaderbo;
+ else
+ return NULL;
+}
+
+GLboolean r700SetupVertexProgram(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ struct r700_vertex_program *vp = context->selected_vp;
+
+ struct gl_program_parameter_list *paramList;
+ unsigned int unNumParamData;
+ unsigned int ui;
+
+ if(GL_FALSE == vp->loaded)
+ {
+ if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
+ {
+ Assemble( &(vp->r700Shader) );
+ }
+
+ /* Load vp to gpu */
+ r600EmitShader(ctx,
+ &(vp->shaderbo),
+ (GLvoid *)(vp->r700Shader.pProgram),
+ vp->r700Shader.uShaderBinaryDWORDSize,
+ "VS");
+
+ vp->loaded = GL_TRUE;
+ }
+
+ DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram),
+ vp->r700Shader.uShaderBinaryDWORDSize);
+
+ /* TODO : enable this after MemUse fixed *=
+ (context->chipobj.MemUse)(context, vp->shadercode.buf->id);
+ */
+
+ R600_STATECHANGE(context, vs);
+ R600_STATECHANGE(context, fs); /* hack */
+
+ r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
+ SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
+
+ r700->vs.SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */
+
+ SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
+ NUM_GPRS_shift, NUM_GPRS_mask);
+
+ if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
+ {
+ SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
+ STACK_SIZE_shift, STACK_SIZE_mask);
+ }
+
+ R600_STATECHANGE(context, spi);
+
+ SETfield(r700->SPI_VS_OUT_CONFIG.u32All,
+ vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0,
+ VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
+ SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
+ NUM_INTERP_shift, NUM_INTERP_mask);
+
+ /*
+ SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+ CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+ */
+
+ /* sent out shader constants. */
+ paramList = vp->mesa_program->Base.Parameters;
+
+ if(NULL != paramList) {
+ _mesa_load_state_parameters(ctx, paramList);
+
+ if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
+ return GL_FALSE;
+
+ R600_STATECHANGE(context, vs_consts);
+
+ r700->vs.num_consts = paramList->NumParameters;
+
+ unNumParamData = paramList->NumParameters;
+
+ for(ui=0; ui<unNumParamData; ui++) {
+ r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
+ r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
+ r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
+ r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ }
+ } else
+ r700->vs.num_consts = 0;
+
+ return GL_TRUE;
+}
diff --git a/r600/r700_vertprog.h b/r600/r700_vertprog.h
new file mode 100644
index 0000000..c48764c
--- /dev/null
+++ b/r600/r700_vertprog.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef _R700_VERTPROG_H_
+#define _R700_VERTPROG_H_
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+
+#include "r700_shader.h"
+#include "r700_assembler.h"
+
+typedef struct ArrayDesc //TEMP
+{
+ GLint size; //number of data element
+ GLenum type; //data element type
+ GLsizei stride;
+} ArrayDesc;
+
+struct r700_vertex_program
+{
+ struct gl_vertex_program *mesa_program; /* Must be first */
+
+ struct r700_vertex_program *next;
+
+ r700_AssemblerBase r700AsmCode;
+ R700_Shader r700Shader;
+
+ GLboolean translated;
+ GLboolean loaded;
+ GLboolean needUpdateVF;
+
+ void * shaderbo;
+
+ ArrayDesc aos_desc[VERT_ATTRIB_MAX];
+};
+
+struct r700_vertex_program_cont
+{
+ struct gl_vertex_program mesa_program;
+
+ struct r700_vertex_program *progs;
+};
+
+//Internal
+unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
+ struct gl_vertex_program *mesa_vp,
+ unsigned int unStart);
+unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm,
+ struct gl_vertex_program *mesa_vp,
+ unsigned int unStart);
+GLboolean Process_Vertex_Program_Vfetch_Instructions(
+ struct r700_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp);
+void Map_Vertex_Program(struct r700_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp);
+GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp);
+
+struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
+ struct gl_vertex_program *mesa_vp);
+
+/* Interface */
+extern void r700SelectVertexShader(GLcontext *ctx);
+
+extern GLboolean r700SetupVertexProgram(GLcontext * ctx);
+
+extern void * r700GetActiveVpShaderBo(GLcontext * ctx);
+
+#endif /* _R700_VERTPROG_H_ */
diff --git a/r600/sq_micro_reg.h b/r600/sq_micro_reg.h
new file mode 100644
index 0000000..bfd21ce
--- /dev/null
+++ b/r600/sq_micro_reg.h
@@ -0,0 +1,2008 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Contacts:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#if !defined (_SQ_MICRO_REG_H)
+#define _SQ_MICRO_REG_H
+
+#if defined(LITTLEENDIAN_CPU)
+#elif defined(BIGENDIAN_CPU)
+#else
+#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
+#endif
+
+/*
+ * SQ_ALU_SRC_GPR_BASE value
+ */
+
+#define SQ_ALU_SRC_GPR_BASE 0x00000000
+
+/*
+ * SQ_ALU_SRC_GPR_SIZE value
+ */
+
+#define SQ_ALU_SRC_GPR_SIZE 0x00000080
+
+/*
+ * SQ_ALU_SRC_KCACHE0_BASE value
+ */
+
+#define SQ_ALU_SRC_KCACHE0_BASE 0x00000080
+
+/*
+ * SQ_ALU_SRC_KCACHE0_SIZE value
+ */
+
+#define SQ_ALU_SRC_KCACHE0_SIZE 0x00000020
+
+/*
+ * SQ_ALU_SRC_KCACHE1_BASE value
+ */
+
+#define SQ_ALU_SRC_KCACHE1_BASE 0x000000a0
+
+/*
+ * SQ_ALU_SRC_KCACHE1_SIZE value
+ */
+
+#define SQ_ALU_SRC_KCACHE1_SIZE 0x00000020
+
+/*
+ * SQ_ALU_SRC_CFILE_BASE value
+ */
+
+#define SQ_ALU_SRC_CFILE_BASE 0x00000100
+
+/*
+ * SQ_ALU_SRC_CFILE_SIZE value
+ */
+
+#define SQ_ALU_SRC_CFILE_SIZE 0x00000100
+
+/*
+ * SQ_SP_OP_REDUC_BEGIN value
+ */
+
+#define SQ_SP_OP_REDUC_BEGIN 0x00000050
+
+/*
+ * SQ_SP_OP_REDUC_END value
+ */
+
+#define SQ_SP_OP_REDUC_END 0x00000053
+
+/*
+ * SQ_SP_OP_TRANS_BEGIN value
+ */
+
+#define SQ_SP_OP_TRANS_BEGIN 0x00000060
+
+/*
+ * SQ_SP_OP_TRANS_END value
+ */
+
+#define SQ_SP_OP_TRANS_END 0x0000007f
+
+/*
+ * SQ_CF_WORD0 struct
+ */
+
+#define SQ_CF_WORD0_ADDR_SIZE 32
+
+#define SQ_CF_WORD0_ADDR_SHIFT 0
+
+#define SQ_CF_WORD0_ADDR_MASK 0xffffffff
+
+#define SQ_CF_WORD0_MASK \
+ (SQ_CF_WORD0_ADDR_MASK)
+
+#define SQ_CF_WORD0_DEFAULT 0xcdcdcdcd
+
+#define SQ_CF_WORD0_GET_ADDR(sq_cf_word0) \
+ ((sq_cf_word0 & SQ_CF_WORD0_ADDR_MASK) >> SQ_CF_WORD0_ADDR_SHIFT)
+
+#define SQ_CF_WORD0_SET_ADDR(sq_cf_word0_reg, addr) \
+ sq_cf_word0_reg = (sq_cf_word0_reg & ~SQ_CF_WORD0_ADDR_MASK) | (addr << SQ_CF_WORD0_ADDR_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_cf_word0_t {
+ unsigned int addr : SQ_CF_WORD0_ADDR_SIZE;
+ } sq_cf_word0_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_cf_word0_t {
+ unsigned int addr : SQ_CF_WORD0_ADDR_SIZE;
+ } sq_cf_word0_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_cf_word0_t f;
+} sq_cf_word0_u;
+
+
+/*
+ * SQ_CF_WORD1 struct
+ */
+
+#define SQ_CF_WORD1_POP_COUNT_SIZE 3
+#define SQ_CF_WORD1_CF_CONST_SIZE 5
+#define SQ_CF_WORD1_COND_SIZE 2
+#define SQ_CF_WORD1_COUNT_SIZE 3
+#define SQ_CF_WORD1_CALL_COUNT_SIZE 6
+#define SQ_CF_WORD1_COUNT_3_SIZE 1
+#define SQ_CF_WORD1_END_OF_PROGRAM_SIZE 1
+#define SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE 1
+#define SQ_CF_WORD1_CF_INST_SIZE 7
+#define SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE 1
+#define SQ_CF_WORD1_BARRIER_SIZE 1
+
+#define SQ_CF_WORD1_POP_COUNT_SHIFT 0
+#define SQ_CF_WORD1_CF_CONST_SHIFT 3
+#define SQ_CF_WORD1_COND_SHIFT 8
+#define SQ_CF_WORD1_COUNT_SHIFT 10
+#define SQ_CF_WORD1_CALL_COUNT_SHIFT 13
+#define SQ_CF_WORD1_COUNT_3_SHIFT 19
+#define SQ_CF_WORD1_END_OF_PROGRAM_SHIFT 21
+#define SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT 22
+#define SQ_CF_WORD1_CF_INST_SHIFT 23
+#define SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT 30
+#define SQ_CF_WORD1_BARRIER_SHIFT 31
+
+#define SQ_CF_WORD1_POP_COUNT_MASK 0x00000007
+#define SQ_CF_WORD1_CF_CONST_MASK 0x000000f8
+#define SQ_CF_WORD1_COND_MASK 0x00000300
+#define SQ_CF_WORD1_COUNT_MASK 0x00001c00
+#define SQ_CF_WORD1_CALL_COUNT_MASK 0x0007e000
+#define SQ_CF_WORD1_COUNT_3_MASK 0x00080000
+#define SQ_CF_WORD1_END_OF_PROGRAM_MASK 0x00200000
+#define SQ_CF_WORD1_VALID_PIXEL_MODE_MASK 0x00400000
+#define SQ_CF_WORD1_CF_INST_MASK 0x3f800000
+#define SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000
+#define SQ_CF_WORD1_BARRIER_MASK 0x80000000
+
+#define SQ_CF_WORD1_MASK \
+ (SQ_CF_WORD1_POP_COUNT_MASK | \
+ SQ_CF_WORD1_CF_CONST_MASK | \
+ SQ_CF_WORD1_COND_MASK | \
+ SQ_CF_WORD1_COUNT_MASK | \
+ SQ_CF_WORD1_CALL_COUNT_MASK | \
+ SQ_CF_WORD1_COUNT_3_MASK | \
+ SQ_CF_WORD1_END_OF_PROGRAM_MASK | \
+ SQ_CF_WORD1_VALID_PIXEL_MODE_MASK | \
+ SQ_CF_WORD1_CF_INST_MASK | \
+ SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK | \
+ SQ_CF_WORD1_BARRIER_MASK)
+
+#define SQ_CF_WORD1_DEFAULT 0xcdcdcdcd
+
+#define SQ_CF_WORD1_GET_POP_COUNT(sq_cf_word1) \
+ ((sq_cf_word1 & SQ_CF_WORD1_POP_COUNT_MASK) >> SQ_CF_WORD1_POP_COUNT_SHIFT)
+#define SQ_CF_WORD1_GET_CF_CONST(sq_cf_word1) \
+ ((sq_cf_word1 & SQ_CF_WORD1_CF_CONST_MASK) >> SQ_CF_WORD1_CF_CONST_SHIFT)
+#define SQ_CF_WORD1_GET_COND(sq_cf_word1) \
+ ((sq_cf_word1 & SQ_CF_WORD1_COND_MASK) >> SQ_CF_WORD1_COND_SHIFT)
+#define SQ_CF_WORD1_GET_COUNT(sq_cf_word1) \
+ ((sq_cf_word1 & SQ_CF_WORD1_COUNT_MASK) >> SQ_CF_WORD1_COUNT_SHIFT)
+#define SQ_CF_WORD1_GET_CALL_COUNT(sq_cf_word1) \
+ ((sq_cf_word1 & SQ_CF_WORD1_CALL_COUNT_MASK) >> SQ_CF_WORD1_CALL_COUNT_SHIFT)
+#define SQ_CF_WORD1_GET_COUNT_3(sq_cf_word1) \
+ ((sq_cf_word1 & SQ_CF_WORD1_COUNT_3_MASK) >> SQ_CF_WORD1_COUNT_3_SHIFT)
+#define SQ_CF_WORD1_GET_END_OF_PROGRAM(sq_cf_word1) \
+ ((sq_cf_word1 & SQ_CF_WORD1_END_OF_PROGRAM_MASK) >> SQ_CF_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_WORD1_GET_VALID_PIXEL_MODE(sq_cf_word1) \
+ ((sq_cf_word1 & SQ_CF_WORD1_VALID_PIXEL_MODE_MASK) >> SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_WORD1_GET_CF_INST(sq_cf_word1) \
+ ((sq_cf_word1 & SQ_CF_WORD1_CF_INST_MASK) >> SQ_CF_WORD1_CF_INST_SHIFT)
+#define SQ_CF_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_word1) \
+ ((sq_cf_word1 & SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_WORD1_GET_BARRIER(sq_cf_word1) \
+ ((sq_cf_word1 & SQ_CF_WORD1_BARRIER_MASK) >> SQ_CF_WORD1_BARRIER_SHIFT)
+
+#define SQ_CF_WORD1_SET_POP_COUNT(sq_cf_word1_reg, pop_count) \
+ sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_POP_COUNT_MASK) | (pop_count << SQ_CF_WORD1_POP_COUNT_SHIFT)
+#define SQ_CF_WORD1_SET_CF_CONST(sq_cf_word1_reg, cf_const) \
+ sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CF_CONST_MASK) | (cf_const << SQ_CF_WORD1_CF_CONST_SHIFT)
+#define SQ_CF_WORD1_SET_COND(sq_cf_word1_reg, cond) \
+ sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COND_MASK) | (cond << SQ_CF_WORD1_COND_SHIFT)
+#define SQ_CF_WORD1_SET_COUNT(sq_cf_word1_reg, count) \
+ sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COUNT_MASK) | (count << SQ_CF_WORD1_COUNT_SHIFT)
+#define SQ_CF_WORD1_SET_CALL_COUNT(sq_cf_word1_reg, call_count) \
+ sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CALL_COUNT_MASK) | (call_count << SQ_CF_WORD1_CALL_COUNT_SHIFT)
+#define SQ_CF_WORD1_SET_COUNT_3(sq_cf_word1_reg, count_3) \
+ sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COUNT_3_MASK) | (count_3 << SQ_CF_WORD1_COUNT_3_SHIFT)
+#define SQ_CF_WORD1_SET_END_OF_PROGRAM(sq_cf_word1_reg, end_of_program) \
+ sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_END_OF_PROGRAM_MASK) | (end_of_program << SQ_CF_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_WORD1_SET_VALID_PIXEL_MODE(sq_cf_word1_reg, valid_pixel_mode) \
+ sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_VALID_PIXEL_MODE_MASK) | (valid_pixel_mode << SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_WORD1_SET_CF_INST(sq_cf_word1_reg, cf_inst) \
+ sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_WORD1_CF_INST_SHIFT)
+#define SQ_CF_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_word1_reg, whole_quad_mode) \
+ sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_WORD1_SET_BARRIER(sq_cf_word1_reg, barrier) \
+ sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_BARRIER_MASK) | (barrier << SQ_CF_WORD1_BARRIER_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_cf_word1_t {
+ unsigned int pop_count : SQ_CF_WORD1_POP_COUNT_SIZE;
+ unsigned int cf_const : SQ_CF_WORD1_CF_CONST_SIZE;
+ unsigned int cond : SQ_CF_WORD1_COND_SIZE;
+ unsigned int count : SQ_CF_WORD1_COUNT_SIZE;
+ unsigned int call_count : SQ_CF_WORD1_CALL_COUNT_SIZE;
+ unsigned int count_3 : SQ_CF_WORD1_COUNT_3_SIZE;
+ unsigned int : 1;
+ unsigned int end_of_program : SQ_CF_WORD1_END_OF_PROGRAM_SIZE;
+ unsigned int valid_pixel_mode : SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE;
+ unsigned int cf_inst : SQ_CF_WORD1_CF_INST_SIZE;
+ unsigned int whole_quad_mode : SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE;
+ unsigned int barrier : SQ_CF_WORD1_BARRIER_SIZE;
+ } sq_cf_word1_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_cf_word1_t {
+ unsigned int barrier : SQ_CF_WORD1_BARRIER_SIZE;
+ unsigned int whole_quad_mode : SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE;
+ unsigned int cf_inst : SQ_CF_WORD1_CF_INST_SIZE;
+ unsigned int valid_pixel_mode : SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE;
+ unsigned int end_of_program : SQ_CF_WORD1_END_OF_PROGRAM_SIZE;
+ unsigned int : 1;
+ unsigned int count_3 : SQ_CF_WORD1_COUNT_3_SIZE;
+ unsigned int call_count : SQ_CF_WORD1_CALL_COUNT_SIZE;
+ unsigned int count : SQ_CF_WORD1_COUNT_SIZE;
+ unsigned int cond : SQ_CF_WORD1_COND_SIZE;
+ unsigned int cf_const : SQ_CF_WORD1_CF_CONST_SIZE;
+ unsigned int pop_count : SQ_CF_WORD1_POP_COUNT_SIZE;
+ } sq_cf_word1_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_cf_word1_t f;
+} sq_cf_word1_u;
+
+
+/*
+ * SQ_CF_ALU_WORD0 struct
+ */
+
+#define SQ_CF_ALU_WORD0_ADDR_SIZE 22
+#define SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE 4
+#define SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE 4
+#define SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE 2
+
+#define SQ_CF_ALU_WORD0_ADDR_SHIFT 0
+#define SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT 22
+#define SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT 26
+#define SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT 30
+
+#define SQ_CF_ALU_WORD0_ADDR_MASK 0x003fffff
+#define SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK 0x03c00000
+#define SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK 0x3c000000
+#define SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK 0xc0000000
+
+#define SQ_CF_ALU_WORD0_MASK \
+ (SQ_CF_ALU_WORD0_ADDR_MASK | \
+ SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK | \
+ SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK | \
+ SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK)
+
+#define SQ_CF_ALU_WORD0_DEFAULT 0xcdcdcdcd
+
+#define SQ_CF_ALU_WORD0_GET_ADDR(sq_cf_alu_word0) \
+ ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_ADDR_MASK) >> SQ_CF_ALU_WORD0_ADDR_SHIFT)
+#define SQ_CF_ALU_WORD0_GET_KCACHE_BANK0(sq_cf_alu_word0) \
+ ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK) >> SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT)
+#define SQ_CF_ALU_WORD0_GET_KCACHE_BANK1(sq_cf_alu_word0) \
+ ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK) >> SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT)
+#define SQ_CF_ALU_WORD0_GET_KCACHE_MODE0(sq_cf_alu_word0) \
+ ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK) >> SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT)
+
+#define SQ_CF_ALU_WORD0_SET_ADDR(sq_cf_alu_word0_reg, addr) \
+ sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_ADDR_MASK) | (addr << SQ_CF_ALU_WORD0_ADDR_SHIFT)
+#define SQ_CF_ALU_WORD0_SET_KCACHE_BANK0(sq_cf_alu_word0_reg, kcache_bank0) \
+ sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK) | (kcache_bank0 << SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT)
+#define SQ_CF_ALU_WORD0_SET_KCACHE_BANK1(sq_cf_alu_word0_reg, kcache_bank1) \
+ sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK) | (kcache_bank1 << SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT)
+#define SQ_CF_ALU_WORD0_SET_KCACHE_MODE0(sq_cf_alu_word0_reg, kcache_mode0) \
+ sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK) | (kcache_mode0 << SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_cf_alu_word0_t {
+ unsigned int addr : SQ_CF_ALU_WORD0_ADDR_SIZE;
+ unsigned int kcache_bank0 : SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE;
+ unsigned int kcache_bank1 : SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE;
+ unsigned int kcache_mode0 : SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE;
+ } sq_cf_alu_word0_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_cf_alu_word0_t {
+ unsigned int kcache_mode0 : SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE;
+ unsigned int kcache_bank1 : SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE;
+ unsigned int kcache_bank0 : SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE;
+ unsigned int addr : SQ_CF_ALU_WORD0_ADDR_SIZE;
+ } sq_cf_alu_word0_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_cf_alu_word0_t f;
+} sq_cf_alu_word0_u;
+
+
+/*
+ * SQ_CF_ALU_WORD1 struct
+ */
+
+#define SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE 2
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE 8
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE 8
+#define SQ_CF_ALU_WORD1_COUNT_SIZE 7
+#define SQ_CF_ALU_WORD1_ALT_CONST_SIZE 1
+#define SQ_CF_ALU_WORD1_CF_INST_SIZE 4
+#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE 1
+#define SQ_CF_ALU_WORD1_BARRIER_SIZE 1
+
+#define SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT 0
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT 2
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT 10
+#define SQ_CF_ALU_WORD1_COUNT_SHIFT 18
+#define SQ_CF_ALU_WORD1_ALT_CONST_SHIFT 25
+#define SQ_CF_ALU_WORD1_CF_INST_SHIFT 26
+#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT 30
+#define SQ_CF_ALU_WORD1_BARRIER_SHIFT 31
+
+#define SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK 0x00000003
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK 0x000003fc
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK 0x0003fc00
+#define SQ_CF_ALU_WORD1_COUNT_MASK 0x01fc0000
+#define SQ_CF_ALU_WORD1_ALT_CONST_MASK 0x02000000
+#define SQ_CF_ALU_WORD1_CF_INST_MASK 0x3c000000
+#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000
+#define SQ_CF_ALU_WORD1_BARRIER_MASK 0x80000000
+
+#define SQ_CF_ALU_WORD1_MASK \
+ (SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK | \
+ SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK | \
+ SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK | \
+ SQ_CF_ALU_WORD1_COUNT_MASK | \
+ SQ_CF_ALU_WORD1_ALT_CONST_MASK | \
+ SQ_CF_ALU_WORD1_CF_INST_MASK | \
+ SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK | \
+ SQ_CF_ALU_WORD1_BARRIER_MASK)
+
+#define SQ_CF_ALU_WORD1_DEFAULT 0xcdcdcdcd
+
+#define SQ_CF_ALU_WORD1_GET_KCACHE_MODE1(sq_cf_alu_word1) \
+ ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK) >> SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_KCACHE_ADDR0(sq_cf_alu_word1) \
+ ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK) >> SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_KCACHE_ADDR1(sq_cf_alu_word1) \
+ ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK) >> SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_COUNT(sq_cf_alu_word1) \
+ ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_COUNT_MASK) >> SQ_CF_ALU_WORD1_COUNT_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_ALT_CONST(sq_cf_alu_word1) \
+ ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_ALT_CONST_MASK) >> SQ_CF_ALU_WORD1_ALT_CONST_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_CF_INST(sq_cf_alu_word1) \
+ ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_CF_INST_MASK) >> SQ_CF_ALU_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_alu_word1) \
+ ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_BARRIER(sq_cf_alu_word1) \
+ ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_BARRIER_MASK) >> SQ_CF_ALU_WORD1_BARRIER_SHIFT)
+
+#define SQ_CF_ALU_WORD1_SET_KCACHE_MODE1(sq_cf_alu_word1_reg, kcache_mode1) \
+ sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK) | (kcache_mode1 << SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_KCACHE_ADDR0(sq_cf_alu_word1_reg, kcache_addr0) \
+ sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK) | (kcache_addr0 << SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_KCACHE_ADDR1(sq_cf_alu_word1_reg, kcache_addr1) \
+ sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK) | (kcache_addr1 << SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_COUNT(sq_cf_alu_word1_reg, count) \
+ sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_COUNT_MASK) | (count << SQ_CF_ALU_WORD1_COUNT_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_ALT_CONST(sq_cf_alu_word1_reg, alt_const) \
+ sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_ALT_CONST_MASK) | (alt_const << SQ_CF_ALU_WORD1_ALT_CONST_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_CF_INST(sq_cf_alu_word1_reg, cf_inst) \
+ sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_ALU_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_alu_word1_reg, whole_quad_mode) \
+ sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_BARRIER(sq_cf_alu_word1_reg, barrier) \
+ sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_BARRIER_MASK) | (barrier << SQ_CF_ALU_WORD1_BARRIER_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_cf_alu_word1_t {
+ unsigned int kcache_mode1 : SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE;
+ unsigned int kcache_addr0 : SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE;
+ unsigned int kcache_addr1 : SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE;
+ unsigned int count : SQ_CF_ALU_WORD1_COUNT_SIZE;
+ unsigned int alt_const : SQ_CF_ALU_WORD1_ALT_CONST_SIZE;
+ unsigned int cf_inst : SQ_CF_ALU_WORD1_CF_INST_SIZE;
+ unsigned int whole_quad_mode : SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE;
+ unsigned int barrier : SQ_CF_ALU_WORD1_BARRIER_SIZE;
+ } sq_cf_alu_word1_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_cf_alu_word1_t {
+ unsigned int barrier : SQ_CF_ALU_WORD1_BARRIER_SIZE;
+ unsigned int whole_quad_mode : SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE;
+ unsigned int cf_inst : SQ_CF_ALU_WORD1_CF_INST_SIZE;
+ unsigned int alt_const : SQ_CF_ALU_WORD1_ALT_CONST_SIZE;
+ unsigned int count : SQ_CF_ALU_WORD1_COUNT_SIZE;
+ unsigned int kcache_addr1 : SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE;
+ unsigned int kcache_addr0 : SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE;
+ unsigned int kcache_mode1 : SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE;
+ } sq_cf_alu_word1_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_cf_alu_word1_t f;
+} sq_cf_alu_word1_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD0 struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE 13
+#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE 2
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE 7
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE 7
+#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE 2
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT 0
+#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT 13
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT 15
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT 22
+#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT 23
+#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT 30
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK 0x00001fff
+#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK 0x00006000
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK 0x003f8000
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK 0x00400000
+#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK 0x3f800000
+#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK 0xc0000000
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_MASK \
+ (SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_DEFAULT 0xcdcdcdcd
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_ARRAY_BASE(sq_cf_alloc_export_word0) \
+ ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_TYPE(sq_cf_alloc_export_word0) \
+ ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_RW_GPR(sq_cf_alloc_export_word0) \
+ ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_RW_REL(sq_cf_alloc_export_word0) \
+ ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_INDEX_GPR(sq_cf_alloc_export_word0) \
+ ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_ELEM_SIZE(sq_cf_alloc_export_word0) \
+ ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_ARRAY_BASE(sq_cf_alloc_export_word0_reg, array_base) \
+ sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK) | (array_base << SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_TYPE(sq_cf_alloc_export_word0_reg, type) \
+ sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK) | (type << SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_RW_GPR(sq_cf_alloc_export_word0_reg, rw_gpr) \
+ sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK) | (rw_gpr << SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_RW_REL(sq_cf_alloc_export_word0_reg, rw_rel) \
+ sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK) | (rw_rel << SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_INDEX_GPR(sq_cf_alloc_export_word0_reg, index_gpr) \
+ sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK) | (index_gpr << SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_ELEM_SIZE(sq_cf_alloc_export_word0_reg, elem_size) \
+ sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK) | (elem_size << SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_cf_alloc_export_word0_t {
+ unsigned int array_base : SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE;
+ unsigned int type : SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE;
+ unsigned int rw_gpr : SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE;
+ unsigned int rw_rel : SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE;
+ unsigned int index_gpr : SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE;
+ unsigned int elem_size : SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE;
+ } sq_cf_alloc_export_word0_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_cf_alloc_export_word0_t {
+ unsigned int elem_size : SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE;
+ unsigned int index_gpr : SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE;
+ unsigned int rw_rel : SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE;
+ unsigned int rw_gpr : SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE;
+ unsigned int type : SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE;
+ unsigned int array_base : SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE;
+ } sq_cf_alloc_export_word0_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_cf_alloc_export_word0_t f;
+} sq_cf_alloc_export_word0_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD1 struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE 4
+#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE 7
+#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE 1
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT 17
+#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT 21
+#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT 22
+#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT 23
+#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT 30
+#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT 31
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK 0x001e0000
+#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK 0x00200000
+#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK 0x00400000
+#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK 0x3f800000
+#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000
+#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK 0x80000000
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_MASK \
+ (SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_DEFAULT 0xcdcc0000
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_BURST_COUNT(sq_cf_alloc_export_word1) \
+ ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_END_OF_PROGRAM(sq_cf_alloc_export_word1) \
+ ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_VALID_PIXEL_MODE(sq_cf_alloc_export_word1) \
+ ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_CF_INST(sq_cf_alloc_export_word1) \
+ ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_alloc_export_word1) \
+ ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_BARRIER(sq_cf_alloc_export_word1) \
+ ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_BURST_COUNT(sq_cf_alloc_export_word1_reg, burst_count) \
+ sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK) | (burst_count << SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_END_OF_PROGRAM(sq_cf_alloc_export_word1_reg, end_of_program) \
+ sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK) | (end_of_program << SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_VALID_PIXEL_MODE(sq_cf_alloc_export_word1_reg, valid_pixel_mode) \
+ sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK) | (valid_pixel_mode << SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_CF_INST(sq_cf_alloc_export_word1_reg, cf_inst) \
+ sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_alloc_export_word1_reg, whole_quad_mode) \
+ sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_BARRIER(sq_cf_alloc_export_word1_reg, barrier) \
+ sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK) | (barrier << SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_cf_alloc_export_word1_t {
+ unsigned int : 17;
+ unsigned int burst_count : SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE;
+ unsigned int end_of_program : SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE;
+ unsigned int valid_pixel_mode : SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE;
+ unsigned int cf_inst : SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE;
+ unsigned int whole_quad_mode : SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE;
+ unsigned int barrier : SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE;
+ } sq_cf_alloc_export_word1_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_cf_alloc_export_word1_t {
+ unsigned int barrier : SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE;
+ unsigned int whole_quad_mode : SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE;
+ unsigned int cf_inst : SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE;
+ unsigned int valid_pixel_mode : SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE;
+ unsigned int end_of_program : SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE;
+ unsigned int burst_count : SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE;
+ unsigned int : 17;
+ } sq_cf_alloc_export_word1_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_cf_alloc_export_word1_t f;
+} sq_cf_alloc_export_word1_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD1_BUF struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE 12
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE 4
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT 0
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT 12
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK 0x00000fff
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK 0x0000f000
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_MASK \
+ (SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_DEFAULT 0x0000cdcd
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_GET_ARRAY_SIZE(sq_cf_alloc_export_word1_buf) \
+ ((sq_cf_alloc_export_word1_buf & SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_GET_COMP_MASK(sq_cf_alloc_export_word1_buf) \
+ ((sq_cf_alloc_export_word1_buf & SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_SET_ARRAY_SIZE(sq_cf_alloc_export_word1_buf_reg, array_size) \
+ sq_cf_alloc_export_word1_buf_reg = (sq_cf_alloc_export_word1_buf_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK) | (array_size << SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_SET_COMP_MASK(sq_cf_alloc_export_word1_buf_reg, comp_mask) \
+ sq_cf_alloc_export_word1_buf_reg = (sq_cf_alloc_export_word1_buf_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK) | (comp_mask << SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_cf_alloc_export_word1_buf_t {
+ unsigned int array_size : SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE;
+ unsigned int comp_mask : SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE;
+ unsigned int : 16;
+ } sq_cf_alloc_export_word1_buf_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_cf_alloc_export_word1_buf_t {
+ unsigned int : 16;
+ unsigned int comp_mask : SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE;
+ unsigned int array_size : SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE;
+ } sq_cf_alloc_export_word1_buf_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_cf_alloc_export_word1_buf_t f;
+} sq_cf_alloc_export_word1_buf_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD1_SWIZ struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE 3
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT 0
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT 6
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT 9
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK 0x00000007
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK 0x00000038
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK 0x000001c0
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK 0x00000e00
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_MASK \
+ (SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK | \
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_DEFAULT 0x00000dcd
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_X(sq_cf_alloc_export_word1_swiz) \
+ ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_Y(sq_cf_alloc_export_word1_swiz) \
+ ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_Z(sq_cf_alloc_export_word1_swiz) \
+ ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_W(sq_cf_alloc_export_word1_swiz) \
+ ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_X(sq_cf_alloc_export_word1_swiz_reg, sel_x) \
+ sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK) | (sel_x << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_Y(sq_cf_alloc_export_word1_swiz_reg, sel_y) \
+ sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK) | (sel_y << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_Z(sq_cf_alloc_export_word1_swiz_reg, sel_z) \
+ sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK) | (sel_z << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_W(sq_cf_alloc_export_word1_swiz_reg, sel_w) \
+ sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK) | (sel_w << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_cf_alloc_export_word1_swiz_t {
+ unsigned int sel_x : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE;
+ unsigned int sel_y : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE;
+ unsigned int sel_z : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE;
+ unsigned int sel_w : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE;
+ unsigned int : 20;
+ } sq_cf_alloc_export_word1_swiz_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_cf_alloc_export_word1_swiz_t {
+ unsigned int : 20;
+ unsigned int sel_w : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE;
+ unsigned int sel_z : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE;
+ unsigned int sel_y : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE;
+ unsigned int sel_x : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE;
+ } sq_cf_alloc_export_word1_swiz_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_cf_alloc_export_word1_swiz_t f;
+} sq_cf_alloc_export_word1_swiz_u;
+
+
+/*
+ * SQ_ALU_WORD0 struct
+ */
+
+#define SQ_ALU_WORD0_SRC0_SEL_SIZE 9
+#define SQ_ALU_WORD0_SRC0_REL_SIZE 1
+#define SQ_ALU_WORD0_SRC0_CHAN_SIZE 2
+#define SQ_ALU_WORD0_SRC0_NEG_SIZE 1
+#define SQ_ALU_WORD0_SRC1_SEL_SIZE 9
+#define SQ_ALU_WORD0_SRC1_REL_SIZE 1
+#define SQ_ALU_WORD0_SRC1_CHAN_SIZE 2
+#define SQ_ALU_WORD0_SRC1_NEG_SIZE 1
+#define SQ_ALU_WORD0_INDEX_MODE_SIZE 3
+#define SQ_ALU_WORD0_PRED_SEL_SIZE 2
+#define SQ_ALU_WORD0_LAST_SIZE 1
+
+#define SQ_ALU_WORD0_SRC0_SEL_SHIFT 0
+#define SQ_ALU_WORD0_SRC0_REL_SHIFT 9
+#define SQ_ALU_WORD0_SRC0_CHAN_SHIFT 10
+#define SQ_ALU_WORD0_SRC0_NEG_SHIFT 12
+#define SQ_ALU_WORD0_SRC1_SEL_SHIFT 13
+#define SQ_ALU_WORD0_SRC1_REL_SHIFT 22
+#define SQ_ALU_WORD0_SRC1_CHAN_SHIFT 23
+#define SQ_ALU_WORD0_SRC1_NEG_SHIFT 25
+#define SQ_ALU_WORD0_INDEX_MODE_SHIFT 26
+#define SQ_ALU_WORD0_PRED_SEL_SHIFT 29
+#define SQ_ALU_WORD0_LAST_SHIFT 31
+
+#define SQ_ALU_WORD0_SRC0_SEL_MASK 0x000001ff
+#define SQ_ALU_WORD0_SRC0_REL_MASK 0x00000200
+#define SQ_ALU_WORD0_SRC0_CHAN_MASK 0x00000c00
+#define SQ_ALU_WORD0_SRC0_NEG_MASK 0x00001000
+#define SQ_ALU_WORD0_SRC1_SEL_MASK 0x003fe000
+#define SQ_ALU_WORD0_SRC1_REL_MASK 0x00400000
+#define SQ_ALU_WORD0_SRC1_CHAN_MASK 0x01800000
+#define SQ_ALU_WORD0_SRC1_NEG_MASK 0x02000000
+#define SQ_ALU_WORD0_INDEX_MODE_MASK 0x1c000000
+#define SQ_ALU_WORD0_PRED_SEL_MASK 0x60000000
+#define SQ_ALU_WORD0_LAST_MASK 0x80000000
+
+#define SQ_ALU_WORD0_MASK \
+ (SQ_ALU_WORD0_SRC0_SEL_MASK | \
+ SQ_ALU_WORD0_SRC0_REL_MASK | \
+ SQ_ALU_WORD0_SRC0_CHAN_MASK | \
+ SQ_ALU_WORD0_SRC0_NEG_MASK | \
+ SQ_ALU_WORD0_SRC1_SEL_MASK | \
+ SQ_ALU_WORD0_SRC1_REL_MASK | \
+ SQ_ALU_WORD0_SRC1_CHAN_MASK | \
+ SQ_ALU_WORD0_SRC1_NEG_MASK | \
+ SQ_ALU_WORD0_INDEX_MODE_MASK | \
+ SQ_ALU_WORD0_PRED_SEL_MASK | \
+ SQ_ALU_WORD0_LAST_MASK)
+
+#define SQ_ALU_WORD0_DEFAULT 0xcdcdcdcd
+
+#define SQ_ALU_WORD0_GET_SRC0_SEL(sq_alu_word0) \
+ ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_SEL_MASK) >> SQ_ALU_WORD0_SRC0_SEL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC0_REL(sq_alu_word0) \
+ ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_REL_MASK) >> SQ_ALU_WORD0_SRC0_REL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC0_CHAN(sq_alu_word0) \
+ ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_CHAN_MASK) >> SQ_ALU_WORD0_SRC0_CHAN_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC0_NEG(sq_alu_word0) \
+ ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_NEG_MASK) >> SQ_ALU_WORD0_SRC0_NEG_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_SEL(sq_alu_word0) \
+ ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_SEL_MASK) >> SQ_ALU_WORD0_SRC1_SEL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_REL(sq_alu_word0) \
+ ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_REL_MASK) >> SQ_ALU_WORD0_SRC1_REL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_CHAN(sq_alu_word0) \
+ ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_CHAN_MASK) >> SQ_ALU_WORD0_SRC1_CHAN_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_NEG(sq_alu_word0) \
+ ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_NEG_MASK) >> SQ_ALU_WORD0_SRC1_NEG_SHIFT)
+#define SQ_ALU_WORD0_GET_INDEX_MODE(sq_alu_word0) \
+ ((sq_alu_word0 & SQ_ALU_WORD0_INDEX_MODE_MASK) >> SQ_ALU_WORD0_INDEX_MODE_SHIFT)
+#define SQ_ALU_WORD0_GET_PRED_SEL(sq_alu_word0) \
+ ((sq_alu_word0 & SQ_ALU_WORD0_PRED_SEL_MASK) >> SQ_ALU_WORD0_PRED_SEL_SHIFT)
+#define SQ_ALU_WORD0_GET_LAST(sq_alu_word0) \
+ ((sq_alu_word0 & SQ_ALU_WORD0_LAST_MASK) >> SQ_ALU_WORD0_LAST_SHIFT)
+
+#define SQ_ALU_WORD0_SET_SRC0_SEL(sq_alu_word0_reg, src0_sel) \
+ sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_SEL_MASK) | (src0_sel << SQ_ALU_WORD0_SRC0_SEL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC0_REL(sq_alu_word0_reg, src0_rel) \
+ sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_REL_MASK) | (src0_rel << SQ_ALU_WORD0_SRC0_REL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC0_CHAN(sq_alu_word0_reg, src0_chan) \
+ sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_CHAN_MASK) | (src0_chan << SQ_ALU_WORD0_SRC0_CHAN_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC0_NEG(sq_alu_word0_reg, src0_neg) \
+ sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_NEG_MASK) | (src0_neg << SQ_ALU_WORD0_SRC0_NEG_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_SEL(sq_alu_word0_reg, src1_sel) \
+ sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_SEL_MASK) | (src1_sel << SQ_ALU_WORD0_SRC1_SEL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_REL(sq_alu_word0_reg, src1_rel) \
+ sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_REL_MASK) | (src1_rel << SQ_ALU_WORD0_SRC1_REL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_CHAN(sq_alu_word0_reg, src1_chan) \
+ sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_CHAN_MASK) | (src1_chan << SQ_ALU_WORD0_SRC1_CHAN_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_NEG(sq_alu_word0_reg, src1_neg) \
+ sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_NEG_MASK) | (src1_neg << SQ_ALU_WORD0_SRC1_NEG_SHIFT)
+#define SQ_ALU_WORD0_SET_INDEX_MODE(sq_alu_word0_reg, index_mode) \
+ sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_INDEX_MODE_MASK) | (index_mode << SQ_ALU_WORD0_INDEX_MODE_SHIFT)
+#define SQ_ALU_WORD0_SET_PRED_SEL(sq_alu_word0_reg, pred_sel) \
+ sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_PRED_SEL_MASK) | (pred_sel << SQ_ALU_WORD0_PRED_SEL_SHIFT)
+#define SQ_ALU_WORD0_SET_LAST(sq_alu_word0_reg, last) \
+ sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_LAST_MASK) | (last << SQ_ALU_WORD0_LAST_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_alu_word0_t {
+ unsigned int src0_sel : SQ_ALU_WORD0_SRC0_SEL_SIZE;
+ unsigned int src0_rel : SQ_ALU_WORD0_SRC0_REL_SIZE;
+ unsigned int src0_chan : SQ_ALU_WORD0_SRC0_CHAN_SIZE;
+ unsigned int src0_neg : SQ_ALU_WORD0_SRC0_NEG_SIZE;
+ unsigned int src1_sel : SQ_ALU_WORD0_SRC1_SEL_SIZE;
+ unsigned int src1_rel : SQ_ALU_WORD0_SRC1_REL_SIZE;
+ unsigned int src1_chan : SQ_ALU_WORD0_SRC1_CHAN_SIZE;
+ unsigned int src1_neg : SQ_ALU_WORD0_SRC1_NEG_SIZE;
+ unsigned int index_mode : SQ_ALU_WORD0_INDEX_MODE_SIZE;
+ unsigned int pred_sel : SQ_ALU_WORD0_PRED_SEL_SIZE;
+ unsigned int last : SQ_ALU_WORD0_LAST_SIZE;
+ } sq_alu_word0_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_alu_word0_t {
+ unsigned int last : SQ_ALU_WORD0_LAST_SIZE;
+ unsigned int pred_sel : SQ_ALU_WORD0_PRED_SEL_SIZE;
+ unsigned int index_mode : SQ_ALU_WORD0_INDEX_MODE_SIZE;
+ unsigned int src1_neg : SQ_ALU_WORD0_SRC1_NEG_SIZE;
+ unsigned int src1_chan : SQ_ALU_WORD0_SRC1_CHAN_SIZE;
+ unsigned int src1_rel : SQ_ALU_WORD0_SRC1_REL_SIZE;
+ unsigned int src1_sel : SQ_ALU_WORD0_SRC1_SEL_SIZE;
+ unsigned int src0_neg : SQ_ALU_WORD0_SRC0_NEG_SIZE;
+ unsigned int src0_chan : SQ_ALU_WORD0_SRC0_CHAN_SIZE;
+ unsigned int src0_rel : SQ_ALU_WORD0_SRC0_REL_SIZE;
+ unsigned int src0_sel : SQ_ALU_WORD0_SRC0_SEL_SIZE;
+ } sq_alu_word0_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_alu_word0_t f;
+} sq_alu_word0_u;
+
+
+/*
+ * SQ_ALU_WORD1 struct
+ */
+
+#define SQ_ALU_WORD1_ENCODING_SIZE 3
+#define SQ_ALU_WORD1_BANK_SWIZZLE_SIZE 3
+#define SQ_ALU_WORD1_DST_GPR_SIZE 7
+#define SQ_ALU_WORD1_DST_REL_SIZE 1
+#define SQ_ALU_WORD1_DST_CHAN_SIZE 2
+#define SQ_ALU_WORD1_CLAMP_SIZE 1
+
+#define SQ_ALU_WORD1_ENCODING_SHIFT 15
+#define SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT 18
+#define SQ_ALU_WORD1_DST_GPR_SHIFT 21
+#define SQ_ALU_WORD1_DST_REL_SHIFT 28
+#define SQ_ALU_WORD1_DST_CHAN_SHIFT 29
+#define SQ_ALU_WORD1_CLAMP_SHIFT 31
+
+#define SQ_ALU_WORD1_ENCODING_MASK 0x00038000
+#define SQ_ALU_WORD1_BANK_SWIZZLE_MASK 0x001c0000
+#define SQ_ALU_WORD1_DST_GPR_MASK 0x0fe00000
+#define SQ_ALU_WORD1_DST_REL_MASK 0x10000000
+#define SQ_ALU_WORD1_DST_CHAN_MASK 0x60000000
+#define SQ_ALU_WORD1_CLAMP_MASK 0x80000000
+
+#define SQ_ALU_WORD1_MASK \
+ (SQ_ALU_WORD1_ENCODING_MASK | \
+ SQ_ALU_WORD1_BANK_SWIZZLE_MASK | \
+ SQ_ALU_WORD1_DST_GPR_MASK | \
+ SQ_ALU_WORD1_DST_REL_MASK | \
+ SQ_ALU_WORD1_DST_CHAN_MASK | \
+ SQ_ALU_WORD1_CLAMP_MASK)
+
+#define SQ_ALU_WORD1_DEFAULT 0xcdcd8000
+
+#define SQ_ALU_WORD1_GET_ENCODING(sq_alu_word1) \
+ ((sq_alu_word1 & SQ_ALU_WORD1_ENCODING_MASK) >> SQ_ALU_WORD1_ENCODING_SHIFT)
+#define SQ_ALU_WORD1_GET_BANK_SWIZZLE(sq_alu_word1) \
+ ((sq_alu_word1 & SQ_ALU_WORD1_BANK_SWIZZLE_MASK) >> SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT)
+#define SQ_ALU_WORD1_GET_DST_GPR(sq_alu_word1) \
+ ((sq_alu_word1 & SQ_ALU_WORD1_DST_GPR_MASK) >> SQ_ALU_WORD1_DST_GPR_SHIFT)
+#define SQ_ALU_WORD1_GET_DST_REL(sq_alu_word1) \
+ ((sq_alu_word1 & SQ_ALU_WORD1_DST_REL_MASK) >> SQ_ALU_WORD1_DST_REL_SHIFT)
+#define SQ_ALU_WORD1_GET_DST_CHAN(sq_alu_word1) \
+ ((sq_alu_word1 & SQ_ALU_WORD1_DST_CHAN_MASK) >> SQ_ALU_WORD1_DST_CHAN_SHIFT)
+#define SQ_ALU_WORD1_GET_CLAMP(sq_alu_word1) \
+ ((sq_alu_word1 & SQ_ALU_WORD1_CLAMP_MASK) >> SQ_ALU_WORD1_CLAMP_SHIFT)
+
+#define SQ_ALU_WORD1_SET_ENCODING(sq_alu_word1_reg, encoding) \
+ sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_ENCODING_MASK) | (encoding << SQ_ALU_WORD1_ENCODING_SHIFT)
+#define SQ_ALU_WORD1_SET_BANK_SWIZZLE(sq_alu_word1_reg, bank_swizzle) \
+ sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_BANK_SWIZZLE_MASK) | (bank_swizzle << SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT)
+#define SQ_ALU_WORD1_SET_DST_GPR(sq_alu_word1_reg, dst_gpr) \
+ sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_GPR_MASK) | (dst_gpr << SQ_ALU_WORD1_DST_GPR_SHIFT)
+#define SQ_ALU_WORD1_SET_DST_REL(sq_alu_word1_reg, dst_rel) \
+ sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_REL_MASK) | (dst_rel << SQ_ALU_WORD1_DST_REL_SHIFT)
+#define SQ_ALU_WORD1_SET_DST_CHAN(sq_alu_word1_reg, dst_chan) \
+ sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_CHAN_MASK) | (dst_chan << SQ_ALU_WORD1_DST_CHAN_SHIFT)
+#define SQ_ALU_WORD1_SET_CLAMP(sq_alu_word1_reg, clamp) \
+ sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_CLAMP_MASK) | (clamp << SQ_ALU_WORD1_CLAMP_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_alu_word1_t {
+ unsigned int : 15;
+ unsigned int encoding : SQ_ALU_WORD1_ENCODING_SIZE;
+ unsigned int bank_swizzle : SQ_ALU_WORD1_BANK_SWIZZLE_SIZE;
+ unsigned int dst_gpr : SQ_ALU_WORD1_DST_GPR_SIZE;
+ unsigned int dst_rel : SQ_ALU_WORD1_DST_REL_SIZE;
+ unsigned int dst_chan : SQ_ALU_WORD1_DST_CHAN_SIZE;
+ unsigned int clamp : SQ_ALU_WORD1_CLAMP_SIZE;
+ } sq_alu_word1_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_alu_word1_t {
+ unsigned int clamp : SQ_ALU_WORD1_CLAMP_SIZE;
+ unsigned int dst_chan : SQ_ALU_WORD1_DST_CHAN_SIZE;
+ unsigned int dst_rel : SQ_ALU_WORD1_DST_REL_SIZE;
+ unsigned int dst_gpr : SQ_ALU_WORD1_DST_GPR_SIZE;
+ unsigned int bank_swizzle : SQ_ALU_WORD1_BANK_SWIZZLE_SIZE;
+ unsigned int encoding : SQ_ALU_WORD1_ENCODING_SIZE;
+ unsigned int : 15;
+ } sq_alu_word1_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_alu_word1_t f;
+} sq_alu_word1_u;
+
+
+/*
+ * SQ_ALU_WORD1_OP2_V2 struct
+ */
+
+#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_OMOD_SIZE 2
+#define SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE 11
+
+#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT 0
+#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT 1
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT 2
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT 3
+#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT 4
+#define SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT 5
+#define SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT 7
+
+#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK 0x00000001
+#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK 0x00000002
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK 0x00000004
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK 0x00000008
+#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK 0x00000010
+#define SQ_ALU_WORD1_OP2_V2_OMOD_MASK 0x00000060
+#define SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK 0x0003ff80
+
+#define SQ_ALU_WORD1_OP2_V2_MASK \
+ (SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK | \
+ SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK | \
+ SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK | \
+ SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK | \
+ SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK | \
+ SQ_ALU_WORD1_OP2_V2_OMOD_MASK | \
+ SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK)
+
+#define SQ_ALU_WORD1_OP2_V2_DEFAULT 0x0001cdcd
+
+#define SQ_ALU_WORD1_OP2_V2_GET_SRC0_ABS(sq_alu_word1_op2_v2) \
+ ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK) >> SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_SRC1_ABS(sq_alu_word1_op2_v2) \
+ ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK) >> SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_UPDATE_EXECUTE_MASK(sq_alu_word1_op2_v2) \
+ ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK) >> SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_UPDATE_PRED(sq_alu_word1_op2_v2) \
+ ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK) >> SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_WRITE_MASK(sq_alu_word1_op2_v2) \
+ ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK) >> SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_OMOD(sq_alu_word1_op2_v2) \
+ ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_OMOD_MASK) >> SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_ALU_INST(sq_alu_word1_op2_v2) \
+ ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK) >> SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT)
+
+#define SQ_ALU_WORD1_OP2_V2_SET_SRC0_ABS(sq_alu_word1_op2_v2_reg, src0_abs) \
+ sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK) | (src0_abs << SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_SRC1_ABS(sq_alu_word1_op2_v2_reg, src1_abs) \
+ sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK) | (src1_abs << SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_UPDATE_EXECUTE_MASK(sq_alu_word1_op2_v2_reg, update_execute_mask) \
+ sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK) | (update_execute_mask << SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_UPDATE_PRED(sq_alu_word1_op2_v2_reg, update_pred) \
+ sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK) | (update_pred << SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_WRITE_MASK(sq_alu_word1_op2_v2_reg, write_mask) \
+ sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK) | (write_mask << SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_OMOD(sq_alu_word1_op2_v2_reg, omod) \
+ sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_OMOD_MASK) | (omod << SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_ALU_INST(sq_alu_word1_op2_v2_reg, alu_inst) \
+ sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK) | (alu_inst << SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_alu_word1_op2_v2_t {
+ unsigned int src0_abs : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+ unsigned int src1_abs : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+ unsigned int update_execute_mask : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+ unsigned int update_pred : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+ unsigned int write_mask : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+ unsigned int omod : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+ unsigned int alu_inst : SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE;
+ unsigned int : 14;
+ } sq_alu_word1_op2_v2_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_alu_word1_op2_v2_t {
+ unsigned int : 14;
+ unsigned int alu_inst : SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE;
+ unsigned int omod : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+ unsigned int write_mask : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+ unsigned int update_pred : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+ unsigned int update_execute_mask : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+ unsigned int src1_abs : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+ unsigned int src0_abs : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+ } sq_alu_word1_op2_v2_t;
+
+#endif
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_alu_word1_op2_r6xx_t {
+ unsigned int src0_abs : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+ unsigned int src1_abs : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+ unsigned int update_execute_mask : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+ unsigned int update_pred : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+ unsigned int write_mask : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+ unsigned int fog_export : 1;
+ unsigned int omod : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+ unsigned int alu_inst : 10;
+ unsigned int : 14;
+ } sq_alu_word1_op2_v1_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_alu_word1_op2_r6xx_t {
+ unsigned int : 14;
+ unsigned int alu_inst : 10;
+ unsigned int omod : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+ unsigned int fog_export : 1;
+ unsigned int write_mask : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+ unsigned int update_pred : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+ unsigned int update_execute_mask : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+ unsigned int src1_abs : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+ unsigned int src0_abs : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+ } sq_alu_word1_op2_v1_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_alu_word1_op2_v2_t f;
+ sq_alu_word1_op2_v1_t f6;
+} sq_alu_word1_op2_v2_u;
+
+
+/*
+ * SQ_ALU_WORD1_OP3 struct
+ */
+
+#define SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE 9
+#define SQ_ALU_WORD1_OP3_SRC2_REL_SIZE 1
+#define SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE 2
+#define SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE 1
+#define SQ_ALU_WORD1_OP3_ALU_INST_SIZE 5
+
+#define SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT 0
+#define SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT 9
+#define SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT 10
+#define SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT 12
+#define SQ_ALU_WORD1_OP3_ALU_INST_SHIFT 13
+
+#define SQ_ALU_WORD1_OP3_SRC2_SEL_MASK 0x000001ff
+#define SQ_ALU_WORD1_OP3_SRC2_REL_MASK 0x00000200
+#define SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK 0x00000c00
+#define SQ_ALU_WORD1_OP3_SRC2_NEG_MASK 0x00001000
+#define SQ_ALU_WORD1_OP3_ALU_INST_MASK 0x0003e000
+
+#define SQ_ALU_WORD1_OP3_MASK \
+ (SQ_ALU_WORD1_OP3_SRC2_SEL_MASK | \
+ SQ_ALU_WORD1_OP3_SRC2_REL_MASK | \
+ SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK | \
+ SQ_ALU_WORD1_OP3_SRC2_NEG_MASK | \
+ SQ_ALU_WORD1_OP3_ALU_INST_MASK)
+
+#define SQ_ALU_WORD1_OP3_DEFAULT 0x0001cdcd
+
+#define SQ_ALU_WORD1_OP3_GET_SRC2_SEL(sq_alu_word1_op3) \
+ ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_SEL_MASK) >> SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_SRC2_REL(sq_alu_word1_op3) \
+ ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_REL_MASK) >> SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_SRC2_CHAN(sq_alu_word1_op3) \
+ ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK) >> SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_SRC2_NEG(sq_alu_word1_op3) \
+ ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_NEG_MASK) >> SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_ALU_INST(sq_alu_word1_op3) \
+ ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_ALU_INST_MASK) >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT)
+
+#define SQ_ALU_WORD1_OP3_SET_SRC2_SEL(sq_alu_word1_op3_reg, src2_sel) \
+ sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_SEL_MASK) | (src2_sel << SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_SRC2_REL(sq_alu_word1_op3_reg, src2_rel) \
+ sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_REL_MASK) | (src2_rel << SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_SRC2_CHAN(sq_alu_word1_op3_reg, src2_chan) \
+ sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK) | (src2_chan << SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_SRC2_NEG(sq_alu_word1_op3_reg, src2_neg) \
+ sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_NEG_MASK) | (src2_neg << SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_ALU_INST(sq_alu_word1_op3_reg, alu_inst) \
+ sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_ALU_INST_MASK) | (alu_inst << SQ_ALU_WORD1_OP3_ALU_INST_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_alu_word1_op3_t {
+ unsigned int src2_sel : SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE;
+ unsigned int src2_rel : SQ_ALU_WORD1_OP3_SRC2_REL_SIZE;
+ unsigned int src2_chan : SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE;
+ unsigned int src2_neg : SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE;
+ unsigned int alu_inst : SQ_ALU_WORD1_OP3_ALU_INST_SIZE;
+ unsigned int : 14;
+ } sq_alu_word1_op3_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_alu_word1_op3_t {
+ unsigned int : 14;
+ unsigned int alu_inst : SQ_ALU_WORD1_OP3_ALU_INST_SIZE;
+ unsigned int src2_neg : SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE;
+ unsigned int src2_chan : SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE;
+ unsigned int src2_rel : SQ_ALU_WORD1_OP3_SRC2_REL_SIZE;
+ unsigned int src2_sel : SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE;
+ } sq_alu_word1_op3_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_alu_word1_op3_t f;
+} sq_alu_word1_op3_u;
+
+
+/*
+ * SQ_TEX_WORD0 struct
+ */
+
+#define SQ_TEX_WORD0_TEX_INST_SIZE 5
+#define SQ_TEX_WORD0_BC_FRAC_MODE_SIZE 1
+#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE 1
+#define SQ_TEX_WORD0_RESOURCE_ID_SIZE 8
+#define SQ_TEX_WORD0_SRC_GPR_SIZE 7
+#define SQ_TEX_WORD0_SRC_REL_SIZE 1
+#define SQ_TEX_WORD0_ALT_CONST_SIZE 1
+
+#define SQ_TEX_WORD0_TEX_INST_SHIFT 0
+#define SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT 5
+#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT 7
+#define SQ_TEX_WORD0_RESOURCE_ID_SHIFT 8
+#define SQ_TEX_WORD0_SRC_GPR_SHIFT 16
+#define SQ_TEX_WORD0_SRC_REL_SHIFT 23
+#define SQ_TEX_WORD0_ALT_CONST_SHIFT 24
+
+#define SQ_TEX_WORD0_TEX_INST_MASK 0x0000001f
+#define SQ_TEX_WORD0_BC_FRAC_MODE_MASK 0x00000020
+#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK 0x00000080
+#define SQ_TEX_WORD0_RESOURCE_ID_MASK 0x0000ff00
+#define SQ_TEX_WORD0_SRC_GPR_MASK 0x007f0000
+#define SQ_TEX_WORD0_SRC_REL_MASK 0x00800000
+#define SQ_TEX_WORD0_ALT_CONST_MASK 0x01000000
+
+#define SQ_TEX_WORD0_MASK \
+ (SQ_TEX_WORD0_TEX_INST_MASK | \
+ SQ_TEX_WORD0_BC_FRAC_MODE_MASK | \
+ SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK | \
+ SQ_TEX_WORD0_RESOURCE_ID_MASK | \
+ SQ_TEX_WORD0_SRC_GPR_MASK | \
+ SQ_TEX_WORD0_SRC_REL_MASK | \
+ SQ_TEX_WORD0_ALT_CONST_MASK)
+
+#define SQ_TEX_WORD0_DEFAULT 0x01cdcd8d
+
+#define SQ_TEX_WORD0_GET_TEX_INST(sq_tex_word0) \
+ ((sq_tex_word0 & SQ_TEX_WORD0_TEX_INST_MASK) >> SQ_TEX_WORD0_TEX_INST_SHIFT)
+#define SQ_TEX_WORD0_GET_BC_FRAC_MODE(sq_tex_word0) \
+ ((sq_tex_word0 & SQ_TEX_WORD0_BC_FRAC_MODE_MASK) >> SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT)
+#define SQ_TEX_WORD0_GET_FETCH_WHOLE_QUAD(sq_tex_word0) \
+ ((sq_tex_word0 & SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK) >> SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_TEX_WORD0_GET_RESOURCE_ID(sq_tex_word0) \
+ ((sq_tex_word0 & SQ_TEX_WORD0_RESOURCE_ID_MASK) >> SQ_TEX_WORD0_RESOURCE_ID_SHIFT)
+#define SQ_TEX_WORD0_GET_SRC_GPR(sq_tex_word0) \
+ ((sq_tex_word0 & SQ_TEX_WORD0_SRC_GPR_MASK) >> SQ_TEX_WORD0_SRC_GPR_SHIFT)
+#define SQ_TEX_WORD0_GET_SRC_REL(sq_tex_word0) \
+ ((sq_tex_word0 & SQ_TEX_WORD0_SRC_REL_MASK) >> SQ_TEX_WORD0_SRC_REL_SHIFT)
+#define SQ_TEX_WORD0_GET_ALT_CONST(sq_tex_word0) \
+ ((sq_tex_word0 & SQ_TEX_WORD0_ALT_CONST_MASK) >> SQ_TEX_WORD0_ALT_CONST_SHIFT)
+
+#define SQ_TEX_WORD0_SET_TEX_INST(sq_tex_word0_reg, tex_inst) \
+ sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_TEX_INST_MASK) | (tex_inst << SQ_TEX_WORD0_TEX_INST_SHIFT)
+#define SQ_TEX_WORD0_SET_BC_FRAC_MODE(sq_tex_word0_reg, bc_frac_mode) \
+ sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_BC_FRAC_MODE_MASK) | (bc_frac_mode << SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT)
+#define SQ_TEX_WORD0_SET_FETCH_WHOLE_QUAD(sq_tex_word0_reg, fetch_whole_quad) \
+ sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK) | (fetch_whole_quad << SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_TEX_WORD0_SET_RESOURCE_ID(sq_tex_word0_reg, resource_id) \
+ sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_RESOURCE_ID_MASK) | (resource_id << SQ_TEX_WORD0_RESOURCE_ID_SHIFT)
+#define SQ_TEX_WORD0_SET_SRC_GPR(sq_tex_word0_reg, src_gpr) \
+ sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_SRC_GPR_MASK) | (src_gpr << SQ_TEX_WORD0_SRC_GPR_SHIFT)
+#define SQ_TEX_WORD0_SET_SRC_REL(sq_tex_word0_reg, src_rel) \
+ sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_SRC_REL_MASK) | (src_rel << SQ_TEX_WORD0_SRC_REL_SHIFT)
+#define SQ_TEX_WORD0_SET_ALT_CONST(sq_tex_word0_reg, alt_const) \
+ sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_ALT_CONST_MASK) | (alt_const << SQ_TEX_WORD0_ALT_CONST_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_tex_word0_t {
+ unsigned int tex_inst : SQ_TEX_WORD0_TEX_INST_SIZE;
+ unsigned int bc_frac_mode : SQ_TEX_WORD0_BC_FRAC_MODE_SIZE;
+ unsigned int : 1;
+ unsigned int fetch_whole_quad : SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+ unsigned int resource_id : SQ_TEX_WORD0_RESOURCE_ID_SIZE;
+ unsigned int src_gpr : SQ_TEX_WORD0_SRC_GPR_SIZE;
+ unsigned int src_rel : SQ_TEX_WORD0_SRC_REL_SIZE;
+ unsigned int alt_const : SQ_TEX_WORD0_ALT_CONST_SIZE;
+ unsigned int : 7;
+ } sq_tex_word0_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_tex_word0_t {
+ unsigned int : 7;
+ unsigned int alt_const : SQ_TEX_WORD0_ALT_CONST_SIZE;
+ unsigned int src_rel : SQ_TEX_WORD0_SRC_REL_SIZE;
+ unsigned int src_gpr : SQ_TEX_WORD0_SRC_GPR_SIZE;
+ unsigned int resource_id : SQ_TEX_WORD0_RESOURCE_ID_SIZE;
+ unsigned int fetch_whole_quad : SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+ unsigned int : 1;
+ unsigned int bc_frac_mode : SQ_TEX_WORD0_BC_FRAC_MODE_SIZE;
+ unsigned int tex_inst : SQ_TEX_WORD0_TEX_INST_SIZE;
+ } sq_tex_word0_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_tex_word0_t f;
+} sq_tex_word0_u;
+
+
+/*
+ * SQ_TEX_WORD1 struct
+ */
+
+#define SQ_TEX_WORD1_DST_GPR_SIZE 7
+#define SQ_TEX_WORD1_DST_REL_SIZE 1
+#define SQ_TEX_WORD1_DST_SEL_X_SIZE 3
+#define SQ_TEX_WORD1_DST_SEL_Y_SIZE 3
+#define SQ_TEX_WORD1_DST_SEL_Z_SIZE 3
+#define SQ_TEX_WORD1_DST_SEL_W_SIZE 3
+#define SQ_TEX_WORD1_LOD_BIAS_SIZE 7
+#define SQ_TEX_WORD1_COORD_TYPE_X_SIZE 1
+#define SQ_TEX_WORD1_COORD_TYPE_Y_SIZE 1
+#define SQ_TEX_WORD1_COORD_TYPE_Z_SIZE 1
+#define SQ_TEX_WORD1_COORD_TYPE_W_SIZE 1
+
+#define SQ_TEX_WORD1_DST_GPR_SHIFT 0
+#define SQ_TEX_WORD1_DST_REL_SHIFT 7
+#define SQ_TEX_WORD1_DST_SEL_X_SHIFT 9
+#define SQ_TEX_WORD1_DST_SEL_Y_SHIFT 12
+#define SQ_TEX_WORD1_DST_SEL_Z_SHIFT 15
+#define SQ_TEX_WORD1_DST_SEL_W_SHIFT 18
+#define SQ_TEX_WORD1_LOD_BIAS_SHIFT 21
+#define SQ_TEX_WORD1_COORD_TYPE_X_SHIFT 28
+#define SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT 29
+#define SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT 30
+#define SQ_TEX_WORD1_COORD_TYPE_W_SHIFT 31
+
+#define SQ_TEX_WORD1_DST_GPR_MASK 0x0000007f
+#define SQ_TEX_WORD1_DST_REL_MASK 0x00000080
+#define SQ_TEX_WORD1_DST_SEL_X_MASK 0x00000e00
+#define SQ_TEX_WORD1_DST_SEL_Y_MASK 0x00007000
+#define SQ_TEX_WORD1_DST_SEL_Z_MASK 0x00038000
+#define SQ_TEX_WORD1_DST_SEL_W_MASK 0x001c0000
+#define SQ_TEX_WORD1_LOD_BIAS_MASK 0x0fe00000
+#define SQ_TEX_WORD1_COORD_TYPE_X_MASK 0x10000000
+#define SQ_TEX_WORD1_COORD_TYPE_Y_MASK 0x20000000
+#define SQ_TEX_WORD1_COORD_TYPE_Z_MASK 0x40000000
+#define SQ_TEX_WORD1_COORD_TYPE_W_MASK 0x80000000
+
+#define SQ_TEX_WORD1_MASK \
+ (SQ_TEX_WORD1_DST_GPR_MASK | \
+ SQ_TEX_WORD1_DST_REL_MASK | \
+ SQ_TEX_WORD1_DST_SEL_X_MASK | \
+ SQ_TEX_WORD1_DST_SEL_Y_MASK | \
+ SQ_TEX_WORD1_DST_SEL_Z_MASK | \
+ SQ_TEX_WORD1_DST_SEL_W_MASK | \
+ SQ_TEX_WORD1_LOD_BIAS_MASK | \
+ SQ_TEX_WORD1_COORD_TYPE_X_MASK | \
+ SQ_TEX_WORD1_COORD_TYPE_Y_MASK | \
+ SQ_TEX_WORD1_COORD_TYPE_Z_MASK | \
+ SQ_TEX_WORD1_COORD_TYPE_W_MASK)
+
+#define SQ_TEX_WORD1_DEFAULT 0xcdcdcccd
+
+#define SQ_TEX_WORD1_GET_DST_GPR(sq_tex_word1) \
+ ((sq_tex_word1 & SQ_TEX_WORD1_DST_GPR_MASK) >> SQ_TEX_WORD1_DST_GPR_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_REL(sq_tex_word1) \
+ ((sq_tex_word1 & SQ_TEX_WORD1_DST_REL_MASK) >> SQ_TEX_WORD1_DST_REL_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_X(sq_tex_word1) \
+ ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_X_MASK) >> SQ_TEX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_Y(sq_tex_word1) \
+ ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_Y_MASK) >> SQ_TEX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_Z(sq_tex_word1) \
+ ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_Z_MASK) >> SQ_TEX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_W(sq_tex_word1) \
+ ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_W_MASK) >> SQ_TEX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_TEX_WORD1_GET_LOD_BIAS(sq_tex_word1) \
+ ((sq_tex_word1 & SQ_TEX_WORD1_LOD_BIAS_MASK) >> SQ_TEX_WORD1_LOD_BIAS_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_X(sq_tex_word1) \
+ ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_X_MASK) >> SQ_TEX_WORD1_COORD_TYPE_X_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_Y(sq_tex_word1) \
+ ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_Y_MASK) >> SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_Z(sq_tex_word1) \
+ ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_Z_MASK) >> SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_W(sq_tex_word1) \
+ ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_W_MASK) >> SQ_TEX_WORD1_COORD_TYPE_W_SHIFT)
+
+#define SQ_TEX_WORD1_SET_DST_GPR(sq_tex_word1_reg, dst_gpr) \
+ sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_GPR_MASK) | (dst_gpr << SQ_TEX_WORD1_DST_GPR_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_REL(sq_tex_word1_reg, dst_rel) \
+ sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_REL_MASK) | (dst_rel << SQ_TEX_WORD1_DST_REL_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_X(sq_tex_word1_reg, dst_sel_x) \
+ sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_X_MASK) | (dst_sel_x << SQ_TEX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_Y(sq_tex_word1_reg, dst_sel_y) \
+ sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_Y_MASK) | (dst_sel_y << SQ_TEX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_Z(sq_tex_word1_reg, dst_sel_z) \
+ sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_Z_MASK) | (dst_sel_z << SQ_TEX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_W(sq_tex_word1_reg, dst_sel_w) \
+ sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_W_MASK) | (dst_sel_w << SQ_TEX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_TEX_WORD1_SET_LOD_BIAS(sq_tex_word1_reg, lod_bias) \
+ sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_LOD_BIAS_MASK) | (lod_bias << SQ_TEX_WORD1_LOD_BIAS_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_X(sq_tex_word1_reg, coord_type_x) \
+ sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_X_MASK) | (coord_type_x << SQ_TEX_WORD1_COORD_TYPE_X_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_Y(sq_tex_word1_reg, coord_type_y) \
+ sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_Y_MASK) | (coord_type_y << SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_Z(sq_tex_word1_reg, coord_type_z) \
+ sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_Z_MASK) | (coord_type_z << SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_W(sq_tex_word1_reg, coord_type_w) \
+ sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_W_MASK) | (coord_type_w << SQ_TEX_WORD1_COORD_TYPE_W_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_tex_word1_t {
+ unsigned int dst_gpr : SQ_TEX_WORD1_DST_GPR_SIZE;
+ unsigned int dst_rel : SQ_TEX_WORD1_DST_REL_SIZE;
+ unsigned int : 1;
+ unsigned int dst_sel_x : SQ_TEX_WORD1_DST_SEL_X_SIZE;
+ unsigned int dst_sel_y : SQ_TEX_WORD1_DST_SEL_Y_SIZE;
+ unsigned int dst_sel_z : SQ_TEX_WORD1_DST_SEL_Z_SIZE;
+ unsigned int dst_sel_w : SQ_TEX_WORD1_DST_SEL_W_SIZE;
+ unsigned int lod_bias : SQ_TEX_WORD1_LOD_BIAS_SIZE;
+ unsigned int coord_type_x : SQ_TEX_WORD1_COORD_TYPE_X_SIZE;
+ unsigned int coord_type_y : SQ_TEX_WORD1_COORD_TYPE_Y_SIZE;
+ unsigned int coord_type_z : SQ_TEX_WORD1_COORD_TYPE_Z_SIZE;
+ unsigned int coord_type_w : SQ_TEX_WORD1_COORD_TYPE_W_SIZE;
+ } sq_tex_word1_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_tex_word1_t {
+ unsigned int coord_type_w : SQ_TEX_WORD1_COORD_TYPE_W_SIZE;
+ unsigned int coord_type_z : SQ_TEX_WORD1_COORD_TYPE_Z_SIZE;
+ unsigned int coord_type_y : SQ_TEX_WORD1_COORD_TYPE_Y_SIZE;
+ unsigned int coord_type_x : SQ_TEX_WORD1_COORD_TYPE_X_SIZE;
+ unsigned int lod_bias : SQ_TEX_WORD1_LOD_BIAS_SIZE;
+ unsigned int dst_sel_w : SQ_TEX_WORD1_DST_SEL_W_SIZE;
+ unsigned int dst_sel_z : SQ_TEX_WORD1_DST_SEL_Z_SIZE;
+ unsigned int dst_sel_y : SQ_TEX_WORD1_DST_SEL_Y_SIZE;
+ unsigned int dst_sel_x : SQ_TEX_WORD1_DST_SEL_X_SIZE;
+ unsigned int : 1;
+ unsigned int dst_rel : SQ_TEX_WORD1_DST_REL_SIZE;
+ unsigned int dst_gpr : SQ_TEX_WORD1_DST_GPR_SIZE;
+ } sq_tex_word1_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_tex_word1_t f;
+} sq_tex_word1_u;
+
+
+/*
+ * SQ_TEX_WORD2 struct
+ */
+
+#define SQ_TEX_WORD2_OFFSET_X_SIZE 5
+#define SQ_TEX_WORD2_OFFSET_Y_SIZE 5
+#define SQ_TEX_WORD2_OFFSET_Z_SIZE 5
+#define SQ_TEX_WORD2_SAMPLER_ID_SIZE 5
+#define SQ_TEX_WORD2_SRC_SEL_X_SIZE 3
+#define SQ_TEX_WORD2_SRC_SEL_Y_SIZE 3
+#define SQ_TEX_WORD2_SRC_SEL_Z_SIZE 3
+#define SQ_TEX_WORD2_SRC_SEL_W_SIZE 3
+
+#define SQ_TEX_WORD2_OFFSET_X_SHIFT 0
+#define SQ_TEX_WORD2_OFFSET_Y_SHIFT 5
+#define SQ_TEX_WORD2_OFFSET_Z_SHIFT 10
+#define SQ_TEX_WORD2_SAMPLER_ID_SHIFT 15
+#define SQ_TEX_WORD2_SRC_SEL_X_SHIFT 20
+#define SQ_TEX_WORD2_SRC_SEL_Y_SHIFT 23
+#define SQ_TEX_WORD2_SRC_SEL_Z_SHIFT 26
+#define SQ_TEX_WORD2_SRC_SEL_W_SHIFT 29
+
+#define SQ_TEX_WORD2_OFFSET_X_MASK 0x0000001f
+#define SQ_TEX_WORD2_OFFSET_Y_MASK 0x000003e0
+#define SQ_TEX_WORD2_OFFSET_Z_MASK 0x00007c00
+#define SQ_TEX_WORD2_SAMPLER_ID_MASK 0x000f8000
+#define SQ_TEX_WORD2_SRC_SEL_X_MASK 0x00700000
+#define SQ_TEX_WORD2_SRC_SEL_Y_MASK 0x03800000
+#define SQ_TEX_WORD2_SRC_SEL_Z_MASK 0x1c000000
+#define SQ_TEX_WORD2_SRC_SEL_W_MASK 0xe0000000
+
+#define SQ_TEX_WORD2_MASK \
+ (SQ_TEX_WORD2_OFFSET_X_MASK | \
+ SQ_TEX_WORD2_OFFSET_Y_MASK | \
+ SQ_TEX_WORD2_OFFSET_Z_MASK | \
+ SQ_TEX_WORD2_SAMPLER_ID_MASK | \
+ SQ_TEX_WORD2_SRC_SEL_X_MASK | \
+ SQ_TEX_WORD2_SRC_SEL_Y_MASK | \
+ SQ_TEX_WORD2_SRC_SEL_Z_MASK | \
+ SQ_TEX_WORD2_SRC_SEL_W_MASK)
+
+#define SQ_TEX_WORD2_DEFAULT 0xcdcdcdcd
+
+#define SQ_TEX_WORD2_GET_OFFSET_X(sq_tex_word2) \
+ ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_X_MASK) >> SQ_TEX_WORD2_OFFSET_X_SHIFT)
+#define SQ_TEX_WORD2_GET_OFFSET_Y(sq_tex_word2) \
+ ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_Y_MASK) >> SQ_TEX_WORD2_OFFSET_Y_SHIFT)
+#define SQ_TEX_WORD2_GET_OFFSET_Z(sq_tex_word2) \
+ ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_Z_MASK) >> SQ_TEX_WORD2_OFFSET_Z_SHIFT)
+#define SQ_TEX_WORD2_GET_SAMPLER_ID(sq_tex_word2) \
+ ((sq_tex_word2 & SQ_TEX_WORD2_SAMPLER_ID_MASK) >> SQ_TEX_WORD2_SAMPLER_ID_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_X(sq_tex_word2) \
+ ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_X_MASK) >> SQ_TEX_WORD2_SRC_SEL_X_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_Y(sq_tex_word2) \
+ ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_Y_MASK) >> SQ_TEX_WORD2_SRC_SEL_Y_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_Z(sq_tex_word2) \
+ ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_Z_MASK) >> SQ_TEX_WORD2_SRC_SEL_Z_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_W(sq_tex_word2) \
+ ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_W_MASK) >> SQ_TEX_WORD2_SRC_SEL_W_SHIFT)
+
+#define SQ_TEX_WORD2_SET_OFFSET_X(sq_tex_word2_reg, offset_x) \
+ sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_X_MASK) | (offset_x << SQ_TEX_WORD2_OFFSET_X_SHIFT)
+#define SQ_TEX_WORD2_SET_OFFSET_Y(sq_tex_word2_reg, offset_y) \
+ sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_Y_MASK) | (offset_y << SQ_TEX_WORD2_OFFSET_Y_SHIFT)
+#define SQ_TEX_WORD2_SET_OFFSET_Z(sq_tex_word2_reg, offset_z) \
+ sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_Z_MASK) | (offset_z << SQ_TEX_WORD2_OFFSET_Z_SHIFT)
+#define SQ_TEX_WORD2_SET_SAMPLER_ID(sq_tex_word2_reg, sampler_id) \
+ sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SAMPLER_ID_MASK) | (sampler_id << SQ_TEX_WORD2_SAMPLER_ID_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_X(sq_tex_word2_reg, src_sel_x) \
+ sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_X_MASK) | (src_sel_x << SQ_TEX_WORD2_SRC_SEL_X_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_Y(sq_tex_word2_reg, src_sel_y) \
+ sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_Y_MASK) | (src_sel_y << SQ_TEX_WORD2_SRC_SEL_Y_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_Z(sq_tex_word2_reg, src_sel_z) \
+ sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_Z_MASK) | (src_sel_z << SQ_TEX_WORD2_SRC_SEL_Z_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_W(sq_tex_word2_reg, src_sel_w) \
+ sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_W_MASK) | (src_sel_w << SQ_TEX_WORD2_SRC_SEL_W_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_tex_word2_t {
+ unsigned int offset_x : SQ_TEX_WORD2_OFFSET_X_SIZE;
+ unsigned int offset_y : SQ_TEX_WORD2_OFFSET_Y_SIZE;
+ unsigned int offset_z : SQ_TEX_WORD2_OFFSET_Z_SIZE;
+ unsigned int sampler_id : SQ_TEX_WORD2_SAMPLER_ID_SIZE;
+ unsigned int src_sel_x : SQ_TEX_WORD2_SRC_SEL_X_SIZE;
+ unsigned int src_sel_y : SQ_TEX_WORD2_SRC_SEL_Y_SIZE;
+ unsigned int src_sel_z : SQ_TEX_WORD2_SRC_SEL_Z_SIZE;
+ unsigned int src_sel_w : SQ_TEX_WORD2_SRC_SEL_W_SIZE;
+ } sq_tex_word2_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_tex_word2_t {
+ unsigned int src_sel_w : SQ_TEX_WORD2_SRC_SEL_W_SIZE;
+ unsigned int src_sel_z : SQ_TEX_WORD2_SRC_SEL_Z_SIZE;
+ unsigned int src_sel_y : SQ_TEX_WORD2_SRC_SEL_Y_SIZE;
+ unsigned int src_sel_x : SQ_TEX_WORD2_SRC_SEL_X_SIZE;
+ unsigned int sampler_id : SQ_TEX_WORD2_SAMPLER_ID_SIZE;
+ unsigned int offset_z : SQ_TEX_WORD2_OFFSET_Z_SIZE;
+ unsigned int offset_y : SQ_TEX_WORD2_OFFSET_Y_SIZE;
+ unsigned int offset_x : SQ_TEX_WORD2_OFFSET_X_SIZE;
+ } sq_tex_word2_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_tex_word2_t f;
+} sq_tex_word2_u;
+
+
+/*
+ * SQ_VTX_WORD0 struct
+ */
+
+#define SQ_VTX_WORD0_VTX_INST_SIZE 5
+#define SQ_VTX_WORD0_FETCH_TYPE_SIZE 2
+#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE 1
+#define SQ_VTX_WORD0_BUFFER_ID_SIZE 8
+#define SQ_VTX_WORD0_SRC_GPR_SIZE 7
+#define SQ_VTX_WORD0_SRC_REL_SIZE 1
+#define SQ_VTX_WORD0_SRC_SEL_X_SIZE 2
+#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE 6
+
+#define SQ_VTX_WORD0_VTX_INST_SHIFT 0
+#define SQ_VTX_WORD0_FETCH_TYPE_SHIFT 5
+#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT 7
+#define SQ_VTX_WORD0_BUFFER_ID_SHIFT 8
+#define SQ_VTX_WORD0_SRC_GPR_SHIFT 16
+#define SQ_VTX_WORD0_SRC_REL_SHIFT 23
+#define SQ_VTX_WORD0_SRC_SEL_X_SHIFT 24
+#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT 26
+
+#define SQ_VTX_WORD0_VTX_INST_MASK 0x0000001f
+#define SQ_VTX_WORD0_FETCH_TYPE_MASK 0x00000060
+#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK 0x00000080
+#define SQ_VTX_WORD0_BUFFER_ID_MASK 0x0000ff00
+#define SQ_VTX_WORD0_SRC_GPR_MASK 0x007f0000
+#define SQ_VTX_WORD0_SRC_REL_MASK 0x00800000
+#define SQ_VTX_WORD0_SRC_SEL_X_MASK 0x03000000
+#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK 0xfc000000
+
+#define SQ_VTX_WORD0_MASK \
+ (SQ_VTX_WORD0_VTX_INST_MASK | \
+ SQ_VTX_WORD0_FETCH_TYPE_MASK | \
+ SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK | \
+ SQ_VTX_WORD0_BUFFER_ID_MASK | \
+ SQ_VTX_WORD0_SRC_GPR_MASK | \
+ SQ_VTX_WORD0_SRC_REL_MASK | \
+ SQ_VTX_WORD0_SRC_SEL_X_MASK | \
+ SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK)
+
+#define SQ_VTX_WORD0_DEFAULT 0xcdcdcdcd
+
+#define SQ_VTX_WORD0_GET_VTX_INST(sq_vtx_word0) \
+ ((sq_vtx_word0 & SQ_VTX_WORD0_VTX_INST_MASK) >> SQ_VTX_WORD0_VTX_INST_SHIFT)
+#define SQ_VTX_WORD0_GET_FETCH_TYPE(sq_vtx_word0) \
+ ((sq_vtx_word0 & SQ_VTX_WORD0_FETCH_TYPE_MASK) >> SQ_VTX_WORD0_FETCH_TYPE_SHIFT)
+#define SQ_VTX_WORD0_GET_FETCH_WHOLE_QUAD(sq_vtx_word0) \
+ ((sq_vtx_word0 & SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK) >> SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_VTX_WORD0_GET_BUFFER_ID(sq_vtx_word0) \
+ ((sq_vtx_word0 & SQ_VTX_WORD0_BUFFER_ID_MASK) >> SQ_VTX_WORD0_BUFFER_ID_SHIFT)
+#define SQ_VTX_WORD0_GET_SRC_GPR(sq_vtx_word0) \
+ ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_GPR_MASK) >> SQ_VTX_WORD0_SRC_GPR_SHIFT)
+#define SQ_VTX_WORD0_GET_SRC_REL(sq_vtx_word0) \
+ ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_REL_MASK) >> SQ_VTX_WORD0_SRC_REL_SHIFT)
+#define SQ_VTX_WORD0_GET_SRC_SEL_X(sq_vtx_word0) \
+ ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_SEL_X_MASK) >> SQ_VTX_WORD0_SRC_SEL_X_SHIFT)
+#define SQ_VTX_WORD0_GET_MEGA_FETCH_COUNT(sq_vtx_word0) \
+ ((sq_vtx_word0 & SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK) >> SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT)
+
+#define SQ_VTX_WORD0_SET_VTX_INST(sq_vtx_word0_reg, vtx_inst) \
+ sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_VTX_INST_MASK) | (vtx_inst << SQ_VTX_WORD0_VTX_INST_SHIFT)
+#define SQ_VTX_WORD0_SET_FETCH_TYPE(sq_vtx_word0_reg, fetch_type) \
+ sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_FETCH_TYPE_MASK) | (fetch_type << SQ_VTX_WORD0_FETCH_TYPE_SHIFT)
+#define SQ_VTX_WORD0_SET_FETCH_WHOLE_QUAD(sq_vtx_word0_reg, fetch_whole_quad) \
+ sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK) | (fetch_whole_quad << SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_VTX_WORD0_SET_BUFFER_ID(sq_vtx_word0_reg, buffer_id) \
+ sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_BUFFER_ID_MASK) | (buffer_id << SQ_VTX_WORD0_BUFFER_ID_SHIFT)
+#define SQ_VTX_WORD0_SET_SRC_GPR(sq_vtx_word0_reg, src_gpr) \
+ sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_GPR_MASK) | (src_gpr << SQ_VTX_WORD0_SRC_GPR_SHIFT)
+#define SQ_VTX_WORD0_SET_SRC_REL(sq_vtx_word0_reg, src_rel) \
+ sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_REL_MASK) | (src_rel << SQ_VTX_WORD0_SRC_REL_SHIFT)
+#define SQ_VTX_WORD0_SET_SRC_SEL_X(sq_vtx_word0_reg, src_sel_x) \
+ sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_SEL_X_MASK) | (src_sel_x << SQ_VTX_WORD0_SRC_SEL_X_SHIFT)
+#define SQ_VTX_WORD0_SET_MEGA_FETCH_COUNT(sq_vtx_word0_reg, mega_fetch_count) \
+ sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK) | (mega_fetch_count << SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_vtx_word0_t {
+ unsigned int vtx_inst : SQ_VTX_WORD0_VTX_INST_SIZE;
+ unsigned int fetch_type : SQ_VTX_WORD0_FETCH_TYPE_SIZE;
+ unsigned int fetch_whole_quad : SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+ unsigned int buffer_id : SQ_VTX_WORD0_BUFFER_ID_SIZE;
+ unsigned int src_gpr : SQ_VTX_WORD0_SRC_GPR_SIZE;
+ unsigned int src_rel : SQ_VTX_WORD0_SRC_REL_SIZE;
+ unsigned int src_sel_x : SQ_VTX_WORD0_SRC_SEL_X_SIZE;
+ unsigned int mega_fetch_count : SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE;
+ } sq_vtx_word0_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_vtx_word0_t {
+ unsigned int mega_fetch_count : SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE;
+ unsigned int src_sel_x : SQ_VTX_WORD0_SRC_SEL_X_SIZE;
+ unsigned int src_rel : SQ_VTX_WORD0_SRC_REL_SIZE;
+ unsigned int src_gpr : SQ_VTX_WORD0_SRC_GPR_SIZE;
+ unsigned int buffer_id : SQ_VTX_WORD0_BUFFER_ID_SIZE;
+ unsigned int fetch_whole_quad : SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+ unsigned int fetch_type : SQ_VTX_WORD0_FETCH_TYPE_SIZE;
+ unsigned int vtx_inst : SQ_VTX_WORD0_VTX_INST_SIZE;
+ } sq_vtx_word0_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_vtx_word0_t f;
+} sq_vtx_word0_u;
+
+
+/*
+ * SQ_VTX_WORD1 struct
+ */
+
+#define SQ_VTX_WORD1_DST_SEL_X_SIZE 3
+#define SQ_VTX_WORD1_DST_SEL_Y_SIZE 3
+#define SQ_VTX_WORD1_DST_SEL_Z_SIZE 3
+#define SQ_VTX_WORD1_DST_SEL_W_SIZE 3
+#define SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE 1
+#define SQ_VTX_WORD1_DATA_FORMAT_SIZE 6
+#define SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE 2
+#define SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE 1
+#define SQ_VTX_WORD1_SRF_MODE_ALL_SIZE 1
+
+#define SQ_VTX_WORD1_DST_SEL_X_SHIFT 9
+#define SQ_VTX_WORD1_DST_SEL_Y_SHIFT 12
+#define SQ_VTX_WORD1_DST_SEL_Z_SHIFT 15
+#define SQ_VTX_WORD1_DST_SEL_W_SHIFT 18
+#define SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT 21
+#define SQ_VTX_WORD1_DATA_FORMAT_SHIFT 22
+#define SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT 28
+#define SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT 30
+#define SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT 31
+
+#define SQ_VTX_WORD1_DST_SEL_X_MASK 0x00000e00
+#define SQ_VTX_WORD1_DST_SEL_Y_MASK 0x00007000
+#define SQ_VTX_WORD1_DST_SEL_Z_MASK 0x00038000
+#define SQ_VTX_WORD1_DST_SEL_W_MASK 0x001c0000
+#define SQ_VTX_WORD1_USE_CONST_FIELDS_MASK 0x00200000
+#define SQ_VTX_WORD1_DATA_FORMAT_MASK 0x0fc00000
+#define SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK 0x30000000
+#define SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK 0x40000000
+#define SQ_VTX_WORD1_SRF_MODE_ALL_MASK 0x80000000
+
+#define SQ_VTX_WORD1_MASK \
+ (SQ_VTX_WORD1_DST_SEL_X_MASK | \
+ SQ_VTX_WORD1_DST_SEL_Y_MASK | \
+ SQ_VTX_WORD1_DST_SEL_Z_MASK | \
+ SQ_VTX_WORD1_DST_SEL_W_MASK | \
+ SQ_VTX_WORD1_USE_CONST_FIELDS_MASK | \
+ SQ_VTX_WORD1_DATA_FORMAT_MASK | \
+ SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK | \
+ SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK | \
+ SQ_VTX_WORD1_SRF_MODE_ALL_MASK)
+
+#define SQ_VTX_WORD1_DEFAULT 0xcdcdcc00
+
+#define SQ_VTX_WORD1_GET_DST_SEL_X(sq_vtx_word1) \
+ ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_X_MASK) >> SQ_VTX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_VTX_WORD1_GET_DST_SEL_Y(sq_vtx_word1) \
+ ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_Y_MASK) >> SQ_VTX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_VTX_WORD1_GET_DST_SEL_Z(sq_vtx_word1) \
+ ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_Z_MASK) >> SQ_VTX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_VTX_WORD1_GET_DST_SEL_W(sq_vtx_word1) \
+ ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_W_MASK) >> SQ_VTX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_VTX_WORD1_GET_USE_CONST_FIELDS(sq_vtx_word1) \
+ ((sq_vtx_word1 & SQ_VTX_WORD1_USE_CONST_FIELDS_MASK) >> SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT)
+#define SQ_VTX_WORD1_GET_DATA_FORMAT(sq_vtx_word1) \
+ ((sq_vtx_word1 & SQ_VTX_WORD1_DATA_FORMAT_MASK) >> SQ_VTX_WORD1_DATA_FORMAT_SHIFT)
+#define SQ_VTX_WORD1_GET_NUM_FORMAT_ALL(sq_vtx_word1) \
+ ((sq_vtx_word1 & SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK) >> SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT)
+#define SQ_VTX_WORD1_GET_FORMAT_COMP_ALL(sq_vtx_word1) \
+ ((sq_vtx_word1 & SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK) >> SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT)
+#define SQ_VTX_WORD1_GET_SRF_MODE_ALL(sq_vtx_word1) \
+ ((sq_vtx_word1 & SQ_VTX_WORD1_SRF_MODE_ALL_MASK) >> SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT)
+
+#define SQ_VTX_WORD1_SET_DST_SEL_X(sq_vtx_word1_reg, dst_sel_x) \
+ sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_X_MASK) | (dst_sel_x << SQ_VTX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_VTX_WORD1_SET_DST_SEL_Y(sq_vtx_word1_reg, dst_sel_y) \
+ sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_Y_MASK) | (dst_sel_y << SQ_VTX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_VTX_WORD1_SET_DST_SEL_Z(sq_vtx_word1_reg, dst_sel_z) \
+ sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_Z_MASK) | (dst_sel_z << SQ_VTX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_VTX_WORD1_SET_DST_SEL_W(sq_vtx_word1_reg, dst_sel_w) \
+ sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_W_MASK) | (dst_sel_w << SQ_VTX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_VTX_WORD1_SET_USE_CONST_FIELDS(sq_vtx_word1_reg, use_const_fields) \
+ sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_USE_CONST_FIELDS_MASK) | (use_const_fields << SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT)
+#define SQ_VTX_WORD1_SET_DATA_FORMAT(sq_vtx_word1_reg, data_format) \
+ sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DATA_FORMAT_MASK) | (data_format << SQ_VTX_WORD1_DATA_FORMAT_SHIFT)
+#define SQ_VTX_WORD1_SET_NUM_FORMAT_ALL(sq_vtx_word1_reg, num_format_all) \
+ sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK) | (num_format_all << SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT)
+#define SQ_VTX_WORD1_SET_FORMAT_COMP_ALL(sq_vtx_word1_reg, format_comp_all) \
+ sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK) | (format_comp_all << SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT)
+#define SQ_VTX_WORD1_SET_SRF_MODE_ALL(sq_vtx_word1_reg, srf_mode_all) \
+ sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_SRF_MODE_ALL_MASK) | (srf_mode_all << SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_vtx_word1_t {
+ unsigned int : 9;
+ unsigned int dst_sel_x : SQ_VTX_WORD1_DST_SEL_X_SIZE;
+ unsigned int dst_sel_y : SQ_VTX_WORD1_DST_SEL_Y_SIZE;
+ unsigned int dst_sel_z : SQ_VTX_WORD1_DST_SEL_Z_SIZE;
+ unsigned int dst_sel_w : SQ_VTX_WORD1_DST_SEL_W_SIZE;
+ unsigned int use_const_fields : SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE;
+ unsigned int data_format : SQ_VTX_WORD1_DATA_FORMAT_SIZE;
+ unsigned int num_format_all : SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE;
+ unsigned int format_comp_all : SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE;
+ unsigned int srf_mode_all : SQ_VTX_WORD1_SRF_MODE_ALL_SIZE;
+ } sq_vtx_word1_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_vtx_word1_t {
+ unsigned int srf_mode_all : SQ_VTX_WORD1_SRF_MODE_ALL_SIZE;
+ unsigned int format_comp_all : SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE;
+ unsigned int num_format_all : SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE;
+ unsigned int data_format : SQ_VTX_WORD1_DATA_FORMAT_SIZE;
+ unsigned int use_const_fields : SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE;
+ unsigned int dst_sel_w : SQ_VTX_WORD1_DST_SEL_W_SIZE;
+ unsigned int dst_sel_z : SQ_VTX_WORD1_DST_SEL_Z_SIZE;
+ unsigned int dst_sel_y : SQ_VTX_WORD1_DST_SEL_Y_SIZE;
+ unsigned int dst_sel_x : SQ_VTX_WORD1_DST_SEL_X_SIZE;
+ unsigned int : 9;
+ } sq_vtx_word1_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_vtx_word1_t f;
+} sq_vtx_word1_u;
+
+
+/*
+ * SQ_VTX_WORD1_GPR struct
+ */
+
+#define SQ_VTX_WORD1_GPR_DST_GPR_SIZE 7
+#define SQ_VTX_WORD1_GPR_DST_REL_SIZE 1
+
+#define SQ_VTX_WORD1_GPR_DST_GPR_SHIFT 0
+#define SQ_VTX_WORD1_GPR_DST_REL_SHIFT 7
+
+#define SQ_VTX_WORD1_GPR_DST_GPR_MASK 0x0000007f
+#define SQ_VTX_WORD1_GPR_DST_REL_MASK 0x00000080
+
+#define SQ_VTX_WORD1_GPR_MASK \
+ (SQ_VTX_WORD1_GPR_DST_GPR_MASK | \
+ SQ_VTX_WORD1_GPR_DST_REL_MASK)
+
+#define SQ_VTX_WORD1_GPR_DEFAULT 0x000000cd
+
+#define SQ_VTX_WORD1_GPR_GET_DST_GPR(sq_vtx_word1_gpr) \
+ ((sq_vtx_word1_gpr & SQ_VTX_WORD1_GPR_DST_GPR_MASK) >> SQ_VTX_WORD1_GPR_DST_GPR_SHIFT)
+#define SQ_VTX_WORD1_GPR_GET_DST_REL(sq_vtx_word1_gpr) \
+ ((sq_vtx_word1_gpr & SQ_VTX_WORD1_GPR_DST_REL_MASK) >> SQ_VTX_WORD1_GPR_DST_REL_SHIFT)
+
+#define SQ_VTX_WORD1_GPR_SET_DST_GPR(sq_vtx_word1_gpr_reg, dst_gpr) \
+ sq_vtx_word1_gpr_reg = (sq_vtx_word1_gpr_reg & ~SQ_VTX_WORD1_GPR_DST_GPR_MASK) | (dst_gpr << SQ_VTX_WORD1_GPR_DST_GPR_SHIFT)
+#define SQ_VTX_WORD1_GPR_SET_DST_REL(sq_vtx_word1_gpr_reg, dst_rel) \
+ sq_vtx_word1_gpr_reg = (sq_vtx_word1_gpr_reg & ~SQ_VTX_WORD1_GPR_DST_REL_MASK) | (dst_rel << SQ_VTX_WORD1_GPR_DST_REL_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_vtx_word1_gpr_t {
+ unsigned int dst_gpr : SQ_VTX_WORD1_GPR_DST_GPR_SIZE;
+ unsigned int dst_rel : SQ_VTX_WORD1_GPR_DST_REL_SIZE;
+ unsigned int : 24;
+ } sq_vtx_word1_gpr_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_vtx_word1_gpr_t {
+ unsigned int : 24;
+ unsigned int dst_rel : SQ_VTX_WORD1_GPR_DST_REL_SIZE;
+ unsigned int dst_gpr : SQ_VTX_WORD1_GPR_DST_GPR_SIZE;
+ } sq_vtx_word1_gpr_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_vtx_word1_gpr_t f;
+} sq_vtx_word1_gpr_u;
+
+
+/*
+ * SQ_VTX_WORD1_SEM struct
+ */
+
+#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE 8
+
+#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT 0
+
+#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK 0x000000ff
+
+#define SQ_VTX_WORD1_SEM_MASK \
+ (SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK)
+
+#define SQ_VTX_WORD1_SEM_DEFAULT 0x000000cd
+
+#define SQ_VTX_WORD1_SEM_GET_SEMANTIC_ID(sq_vtx_word1_sem) \
+ ((sq_vtx_word1_sem & SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK) >> SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT)
+
+#define SQ_VTX_WORD1_SEM_SET_SEMANTIC_ID(sq_vtx_word1_sem_reg, semantic_id) \
+ sq_vtx_word1_sem_reg = (sq_vtx_word1_sem_reg & ~SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK) | (semantic_id << SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_vtx_word1_sem_t {
+ unsigned int semantic_id : SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE;
+ unsigned int : 24;
+ } sq_vtx_word1_sem_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_vtx_word1_sem_t {
+ unsigned int : 24;
+ unsigned int semantic_id : SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE;
+ } sq_vtx_word1_sem_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_vtx_word1_sem_t f;
+} sq_vtx_word1_sem_u;
+
+
+/*
+ * SQ_VTX_WORD2 struct
+ */
+
+#define SQ_VTX_WORD2_OFFSET_SIZE 16
+#define SQ_VTX_WORD2_ENDIAN_SWAP_SIZE 2
+#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE 1
+#define SQ_VTX_WORD2_MEGA_FETCH_SIZE 1
+#define SQ_VTX_WORD2_ALT_CONST_SIZE 1
+
+#define SQ_VTX_WORD2_OFFSET_SHIFT 0
+#define SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT 16
+#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT 18
+#define SQ_VTX_WORD2_MEGA_FETCH_SHIFT 19
+#define SQ_VTX_WORD2_ALT_CONST_SHIFT 20
+
+#define SQ_VTX_WORD2_OFFSET_MASK 0x0000ffff
+#define SQ_VTX_WORD2_ENDIAN_SWAP_MASK 0x00030000
+#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK 0x00040000
+#define SQ_VTX_WORD2_MEGA_FETCH_MASK 0x00080000
+#define SQ_VTX_WORD2_ALT_CONST_MASK 0x00100000
+
+#define SQ_VTX_WORD2_MASK \
+ (SQ_VTX_WORD2_OFFSET_MASK | \
+ SQ_VTX_WORD2_ENDIAN_SWAP_MASK | \
+ SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK | \
+ SQ_VTX_WORD2_MEGA_FETCH_MASK | \
+ SQ_VTX_WORD2_ALT_CONST_MASK)
+
+#define SQ_VTX_WORD2_DEFAULT 0x000dcdcd
+
+#define SQ_VTX_WORD2_GET_OFFSET(sq_vtx_word2) \
+ ((sq_vtx_word2 & SQ_VTX_WORD2_OFFSET_MASK) >> SQ_VTX_WORD2_OFFSET_SHIFT)
+#define SQ_VTX_WORD2_GET_ENDIAN_SWAP(sq_vtx_word2) \
+ ((sq_vtx_word2 & SQ_VTX_WORD2_ENDIAN_SWAP_MASK) >> SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT)
+#define SQ_VTX_WORD2_GET_CONST_BUF_NO_STRIDE(sq_vtx_word2) \
+ ((sq_vtx_word2 & SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK) >> SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT)
+#define SQ_VTX_WORD2_GET_MEGA_FETCH(sq_vtx_word2) \
+ ((sq_vtx_word2 & SQ_VTX_WORD2_MEGA_FETCH_MASK) >> SQ_VTX_WORD2_MEGA_FETCH_SHIFT)
+#define SQ_VTX_WORD2_GET_ALT_CONST(sq_vtx_word2) \
+ ((sq_vtx_word2 & SQ_VTX_WORD2_ALT_CONST_MASK) >> SQ_VTX_WORD2_ALT_CONST_SHIFT)
+
+#define SQ_VTX_WORD2_SET_OFFSET(sq_vtx_word2_reg, offset) \
+ sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_OFFSET_MASK) | (offset << SQ_VTX_WORD2_OFFSET_SHIFT)
+#define SQ_VTX_WORD2_SET_ENDIAN_SWAP(sq_vtx_word2_reg, endian_swap) \
+ sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_ENDIAN_SWAP_MASK) | (endian_swap << SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT)
+#define SQ_VTX_WORD2_SET_CONST_BUF_NO_STRIDE(sq_vtx_word2_reg, const_buf_no_stride) \
+ sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK) | (const_buf_no_stride << SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT)
+#define SQ_VTX_WORD2_SET_MEGA_FETCH(sq_vtx_word2_reg, mega_fetch) \
+ sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_MEGA_FETCH_MASK) | (mega_fetch << SQ_VTX_WORD2_MEGA_FETCH_SHIFT)
+#define SQ_VTX_WORD2_SET_ALT_CONST(sq_vtx_word2_reg, alt_const) \
+ sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_ALT_CONST_MASK) | (alt_const << SQ_VTX_WORD2_ALT_CONST_SHIFT)
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _sq_vtx_word2_t {
+ unsigned int offset : SQ_VTX_WORD2_OFFSET_SIZE;
+ unsigned int endian_swap : SQ_VTX_WORD2_ENDIAN_SWAP_SIZE;
+ unsigned int const_buf_no_stride : SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE;
+ unsigned int mega_fetch : SQ_VTX_WORD2_MEGA_FETCH_SIZE;
+ unsigned int alt_const : SQ_VTX_WORD2_ALT_CONST_SIZE;
+ unsigned int : 11;
+ } sq_vtx_word2_t;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _sq_vtx_word2_t {
+ unsigned int : 11;
+ unsigned int alt_const : SQ_VTX_WORD2_ALT_CONST_SIZE;
+ unsigned int mega_fetch : SQ_VTX_WORD2_MEGA_FETCH_SIZE;
+ unsigned int const_buf_no_stride : SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE;
+ unsigned int endian_swap : SQ_VTX_WORD2_ENDIAN_SWAP_SIZE;
+ unsigned int offset : SQ_VTX_WORD2_OFFSET_SIZE;
+ } sq_vtx_word2_t;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ sq_vtx_word2_t f;
+} sq_vtx_word2_u;
+
+#endif /* _SQ_MICRO_REG_H */
+
+
diff --git a/radeon/Makefile.am b/radeon/Makefile.am
index 9b2fbcf..554f67e 100644
--- a/radeon/Makefile.am
+++ b/radeon/Makefile.am
@@ -8,17 +8,33 @@ radeon_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \
$(DRM_LIBS) $(DRI_LIBS)
radeon_dri_ladir = @libdir@/dri
radeon_dri_la_SOURCES = \
+ radeon_bo_legacy.c \
+ radeon_common_context.c \
+ radeon_common.c \
+ radeon_cs_legacy.c \
+ radeon_dma.c \
+ radeon_debug.c \
+ radeon_fbo.c \
+ radeon_lock.c \
+ radeon_mipmap_tree.c \
+ radeon_queryobj.c \
+ radeon_span.c \
+ radeon_texture.c \
radeon_context.c \
radeon_ioctl.c \
- radeon_lock.c \
radeon_screen.c \
radeon_state.c \
radeon_state_init.c \
radeon_tex.c \
- radeon_texmem.c \
radeon_texstate.c \
radeon_tcl.c \
radeon_swtcl.c \
- radeon_span.c \
radeon_maos.c \
radeon_sanity.c
+
+if HAVE_LIBDRM_RADEON
+radeon_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
+radeon_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
+radeon_dri_la_SOURCES += \
+ radeon_cs_space_drm.c
+endif
diff --git a/radeon/radeon_bo_drm.h b/radeon/radeon_bo_drm.h
new file mode 100644
index 0000000..7141371
--- /dev/null
+++ b/radeon/radeon_bo_drm.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_BO_H
+#define RADEON_BO_H
+
+#include <stdio.h>
+#include <stdint.h>
+//#include "radeon_track.h"
+
+/* bo object */
+#define RADEON_BO_FLAGS_MACRO_TILE 1
+#define RADEON_BO_FLAGS_MICRO_TILE 2
+
+struct radeon_bo_manager;
+
+struct radeon_bo {
+ uint32_t alignment;
+ uint32_t handle;
+ uint32_t size;
+ uint32_t domains;
+ uint32_t flags;
+ unsigned cref;
+#ifdef RADEON_BO_TRACK
+ struct radeon_track *track;
+#endif
+ void *ptr;
+ struct radeon_bo_manager *bom;
+ uint32_t space_accounted;
+};
+
+/* bo functions */
+struct radeon_bo_funcs {
+ struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom,
+ uint32_t handle,
+ uint32_t size,
+ uint32_t alignment,
+ uint32_t domains,
+ uint32_t flags);
+ void (*bo_ref)(struct radeon_bo *bo);
+ struct radeon_bo *(*bo_unref)(struct radeon_bo *bo);
+ int (*bo_map)(struct radeon_bo *bo, int write);
+ int (*bo_unmap)(struct radeon_bo *bo);
+ int (*bo_wait)(struct radeon_bo *bo);
+ int (*bo_is_static)(struct radeon_bo *bo);
+ int (*bo_set_tiling)(struct radeon_bo *bo, uint32_t tiling_flags,
+ uint32_t pitch);
+ int (*bo_get_tiling)(struct radeon_bo *bo, uint32_t *tiling_flags,
+ uint32_t *pitch);
+ int (*bo_is_busy)(struct radeon_bo *bo, uint32_t *domain);
+};
+
+struct radeon_bo_manager {
+ struct radeon_bo_funcs *funcs;
+ int fd;
+
+#ifdef RADEON_BO_TRACK
+ struct radeon_tracker tracker;
+#endif
+};
+
+static inline void _radeon_bo_debug(struct radeon_bo *bo,
+ const char *op,
+ const char *file,
+ const char *func,
+ int line)
+{
+ fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X [%s %s %d]\n",
+ op, bo, bo->handle, bo->size, bo->cref, file, func, line);
+}
+
+static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom,
+ uint32_t handle,
+ uint32_t size,
+ uint32_t alignment,
+ uint32_t domains,
+ uint32_t flags,
+ const char *file,
+ const char *func,
+ int line)
+{
+ struct radeon_bo *bo;
+
+ bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags);
+
+#ifdef RADEON_BO_TRACK
+ if (bo) {
+ bo->track = radeon_tracker_add_track(&bom->tracker, bo->handle);
+ radeon_track_add_event(bo->track, file, func, "open", line);
+ }
+#endif
+ return bo;
+}
+
+static inline void _radeon_bo_ref(struct radeon_bo *bo,
+ const char *file,
+ const char *func,
+ int line)
+{
+ bo->cref++;
+#ifdef RADEON_BO_TRACK
+ radeon_track_add_event(bo->track, file, func, "ref", line);
+#endif
+ bo->bom->funcs->bo_ref(bo);
+}
+
+static inline struct radeon_bo *_radeon_bo_unref(struct radeon_bo *bo,
+ const char *file,
+ const char *func,
+ int line)
+{
+ bo->cref--;
+#ifdef RADEON_BO_TRACK
+ radeon_track_add_event(bo->track, file, func, "unref", line);
+ if (bo->cref <= 0) {
+ radeon_tracker_remove_track(&bo->bom->tracker, bo->track);
+ bo->track = NULL;
+ }
+#endif
+ return bo->bom->funcs->bo_unref(bo);
+}
+
+static inline int _radeon_bo_map(struct radeon_bo *bo,
+ int write,
+ const char *file,
+ const char *func,
+ int line)
+{
+ return bo->bom->funcs->bo_map(bo, write);
+}
+
+static inline int _radeon_bo_unmap(struct radeon_bo *bo,
+ const char *file,
+ const char *func,
+ int line)
+{
+ return bo->bom->funcs->bo_unmap(bo);
+}
+
+static inline int _radeon_bo_wait(struct radeon_bo *bo,
+ const char *file,
+ const char *func,
+ int line)
+{
+ return bo->bom->funcs->bo_wait(bo);
+}
+
+static inline int _radeon_bo_is_busy(struct radeon_bo *bo,
+ uint32_t *domain,
+ const char *file,
+ const char *func,
+ int line)
+{
+ return bo->bom->funcs->bo_is_busy(bo, domain);
+}
+
+static inline int radeon_bo_set_tiling(struct radeon_bo *bo,
+ uint32_t tiling_flags, uint32_t pitch)
+{
+ return bo->bom->funcs->bo_set_tiling(bo, tiling_flags, pitch);
+}
+
+static inline int radeon_bo_get_tiling(struct radeon_bo *bo,
+ uint32_t *tiling_flags, uint32_t *pitch)
+{
+ return bo->bom->funcs->bo_get_tiling(bo, tiling_flags, pitch);
+}
+
+static inline int radeon_bo_is_static(struct radeon_bo *bo)
+{
+ if (bo->bom->funcs->bo_is_static)
+ return bo->bom->funcs->bo_is_static(bo);
+ return 0;
+}
+
+#define radeon_bo_open(bom, h, s, a, d, f)\
+ _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_ref(bo)\
+ _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_unref(bo)\
+ _radeon_bo_unref(bo, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_map(bo, w)\
+ _radeon_bo_map(bo, w, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_unmap(bo)\
+ _radeon_bo_unmap(bo, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_debug(bo, opcode)\
+ _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_wait(bo) \
+ _radeon_bo_wait(bo, __FILE__, __func__, __LINE__)
+#define radeon_bo_is_busy(bo, domain) \
+ _radeon_bo_is_busy(bo, domain, __FILE__, __func__, __LINE__)
+
+#endif
diff --git a/radeon/radeon_bo_legacy.c b/radeon/radeon_bo_legacy.c
new file mode 100644
index 0000000..3e7547d
--- /dev/null
+++ b/radeon/radeon_bo_legacy.c
@@ -0,0 +1,926 @@
+/*
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Dave Airlie
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Aapo Tahkola <aet@rasterburn.org>
+ * Nicolai Haehnle <prefect_@gmx.net>
+ * Dave Airlie
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include "xf86drm.h"
+#include "texmem.h"
+#include "main/simple_list.h"
+
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_common.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_macros.h"
+
+/* no seriously texmem.c is this screwed up */
+struct bo_legacy_texture_object {
+ driTextureObject base;
+ struct bo_legacy *parent;
+};
+
+struct bo_legacy {
+ struct radeon_bo base;
+ int map_count;
+ uint32_t pending;
+ int is_pending;
+ int static_bo;
+ uint32_t offset;
+ struct bo_legacy_texture_object *tobj;
+ int validated;
+ int dirty;
+ void *ptr;
+ struct bo_legacy *next, *prev;
+ struct bo_legacy *pnext, *pprev;
+};
+
+struct bo_manager_legacy {
+ struct radeon_bo_manager base;
+ unsigned nhandle;
+ unsigned nfree_handles;
+ unsigned cfree_handles;
+ uint32_t current_age;
+ struct bo_legacy bos;
+ struct bo_legacy pending_bos;
+ uint32_t fb_location;
+ uint32_t texture_offset;
+ unsigned dma_alloc_size;
+ uint32_t dma_buf_count;
+ unsigned cpendings;
+ driTextureObject texture_swapped;
+ driTexHeap *texture_heap;
+ struct radeon_screen *screen;
+ unsigned *free_handles;
+};
+
+static void bo_legacy_tobj_destroy(void *data, driTextureObject *t)
+{
+ struct bo_legacy_texture_object *tobj = (struct bo_legacy_texture_object *)t;
+
+ if (tobj->parent) {
+ tobj->parent->tobj = NULL;
+ tobj->parent->validated = 0;
+ }
+}
+
+static void inline clean_handles(struct bo_manager_legacy *bom)
+{
+ while (bom->cfree_handles > 0 &&
+ !bom->free_handles[bom->cfree_handles - 1])
+ bom->cfree_handles--;
+
+}
+static int legacy_new_handle(struct bo_manager_legacy *bom, uint32_t *handle)
+{
+ uint32_t tmp;
+
+ *handle = 0;
+ if (bom->nhandle == 0xFFFFFFFF) {
+ return -EINVAL;
+ }
+ if (bom->cfree_handles > 0) {
+ tmp = bom->free_handles[--bom->cfree_handles];
+ clean_handles(bom);
+ } else {
+ bom->cfree_handles = 0;
+ tmp = bom->nhandle++;
+ }
+ assert(tmp);
+ *handle = tmp;
+ return 0;
+}
+
+static int legacy_free_handle(struct bo_manager_legacy *bom, uint32_t handle)
+{
+ uint32_t *handles;
+
+ if (!handle) {
+ return 0;
+ }
+ if (handle == (bom->nhandle - 1)) {
+ int i;
+
+ bom->nhandle--;
+ for (i = bom->cfree_handles - 1; i >= 0; i--) {
+ if (bom->free_handles[i] == (bom->nhandle - 1)) {
+ bom->nhandle--;
+ bom->free_handles[i] = 0;
+ }
+ }
+ clean_handles(bom);
+ return 0;
+ }
+ if (bom->cfree_handles < bom->nfree_handles) {
+ bom->free_handles[bom->cfree_handles++] = handle;
+ return 0;
+ }
+ bom->nfree_handles += 0x100;
+ handles = (uint32_t*)realloc(bom->free_handles, bom->nfree_handles * 4);
+ if (handles == NULL) {
+ bom->nfree_handles -= 0x100;
+ return -ENOMEM;
+ }
+ bom->free_handles = handles;
+ bom->free_handles[bom->cfree_handles++] = handle;
+ return 0;
+}
+
+static void legacy_get_current_age(struct bo_manager_legacy *boml)
+{
+ drm_radeon_getparam_t gp;
+ unsigned char *RADEONMMIO = NULL;
+ int r;
+
+ if ( IS_R300_CLASS(boml->screen)
+ || IS_R600_CLASS(boml->screen) )
+ {
+ gp.param = RADEON_PARAM_LAST_CLEAR;
+ gp.value = (int *)&boml->current_age;
+ r = drmCommandWriteRead(boml->base.fd, DRM_RADEON_GETPARAM,
+ &gp, sizeof(gp));
+ if (r) {
+ fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r);
+ exit(1);
+ }
+ }
+ else {
+ RADEONMMIO = boml->screen->mmio.map;
+ boml->current_age = boml->screen->scratch[3];
+ boml->current_age = INREG(RADEON_GUI_SCRATCH_REG3);
+ }
+}
+
+static int legacy_is_pending(struct radeon_bo *bo)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+ if (bo_legacy->is_pending <= 0) {
+ bo_legacy->is_pending = 0;
+ return 0;
+ }
+ if (boml->current_age >= bo_legacy->pending) {
+ if (boml->pending_bos.pprev == bo_legacy) {
+ boml->pending_bos.pprev = bo_legacy->pprev;
+ }
+ bo_legacy->pprev->pnext = bo_legacy->pnext;
+ if (bo_legacy->pnext) {
+ bo_legacy->pnext->pprev = bo_legacy->pprev;
+ }
+ assert(bo_legacy->is_pending <= bo->cref);
+ while (bo_legacy->is_pending--) {
+ bo = radeon_bo_unref(bo);
+ if (!bo)
+ break;
+ }
+ if (bo)
+ bo_legacy->is_pending = 0;
+ boml->cpendings--;
+ return 0;
+ }
+ return 1;
+}
+
+static int legacy_wait_pending(struct radeon_bo *bo)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+ if (!bo_legacy->is_pending) {
+ return 0;
+ }
+ /* FIXME: lockup and userspace busy looping that's all the folks */
+ legacy_get_current_age(boml);
+ while (legacy_is_pending(bo)) {
+ usleep(10);
+ legacy_get_current_age(boml);
+ }
+ return 0;
+}
+
+void legacy_track_pending(struct radeon_bo_manager *bom, int debug)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy*) bom;
+ struct bo_legacy *bo_legacy;
+ struct bo_legacy *next;
+
+ legacy_get_current_age(boml);
+ bo_legacy = boml->pending_bos.pnext;
+ while (bo_legacy) {
+ if (debug)
+ fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size,
+ boml->current_age, bo_legacy->pending);
+ next = bo_legacy->pnext;
+ if (legacy_is_pending(&(bo_legacy->base))) {
+ }
+ bo_legacy = next;
+ }
+}
+
+static int legacy_wait_any_pending(struct bo_manager_legacy *boml)
+{
+ struct bo_legacy *bo_legacy;
+
+ legacy_get_current_age(boml);
+ bo_legacy = boml->pending_bos.pnext;
+ if (!bo_legacy)
+ return -1;
+ legacy_wait_pending(&bo_legacy->base);
+ return 0;
+}
+
+static void legacy_kick_all_buffers(struct bo_manager_legacy *boml)
+{
+ struct bo_legacy *legacy;
+
+ legacy = boml->bos.next;
+ while (legacy != &boml->bos) {
+ if (legacy->tobj) {
+ if (legacy->validated) {
+ driDestroyTextureObject(&legacy->tobj->base);
+ legacy->tobj = 0;
+ legacy->validated = 0;
+ }
+ }
+ legacy = legacy->next;
+ }
+}
+
+static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml,
+ uint32_t size,
+ uint32_t alignment,
+ uint32_t domains,
+ uint32_t flags)
+{
+ struct bo_legacy *bo_legacy;
+ static int pgsize;
+
+ if (pgsize == 0)
+ pgsize = getpagesize() - 1;
+
+ size = (size + pgsize) & ~pgsize;
+
+ bo_legacy = (struct bo_legacy*)calloc(1, sizeof(struct bo_legacy));
+ if (bo_legacy == NULL) {
+ return NULL;
+ }
+ bo_legacy->base.bom = (struct radeon_bo_manager*)boml;
+ bo_legacy->base.handle = 0;
+ bo_legacy->base.size = size;
+ bo_legacy->base.alignment = alignment;
+ bo_legacy->base.domains = domains;
+ bo_legacy->base.flags = flags;
+ bo_legacy->base.ptr = NULL;
+ bo_legacy->map_count = 0;
+ bo_legacy->next = NULL;
+ bo_legacy->prev = NULL;
+ bo_legacy->pnext = NULL;
+ bo_legacy->pprev = NULL;
+ bo_legacy->next = boml->bos.next;
+ bo_legacy->prev = &boml->bos;
+ boml->bos.next = bo_legacy;
+ if (bo_legacy->next) {
+ bo_legacy->next->prev = bo_legacy;
+ }
+
+ return bo_legacy;
+}
+
+static int bo_dma_alloc(struct radeon_bo *bo)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+ drm_radeon_mem_alloc_t alloc;
+ unsigned size;
+ int base_offset;
+ int r;
+
+ /* align size on 4Kb */
+ size = (((4 * 1024) - 1) + bo->size) & ~((4 * 1024) - 1);
+ alloc.region = RADEON_MEM_REGION_GART;
+ alloc.alignment = bo_legacy->base.alignment;
+ alloc.size = size;
+ alloc.region_offset = &base_offset;
+ r = drmCommandWriteRead(bo->bom->fd,
+ DRM_RADEON_ALLOC,
+ &alloc,
+ sizeof(alloc));
+ if (r) {
+ /* ptr is set to NULL if dma allocation failed */
+ bo_legacy->ptr = NULL;
+ return r;
+ }
+ bo_legacy->ptr = boml->screen->gartTextures.map + base_offset;
+ bo_legacy->offset = boml->screen->gart_texture_offset + base_offset;
+ bo->size = size;
+ boml->dma_alloc_size += size;
+ boml->dma_buf_count++;
+ return 0;
+}
+
+static int bo_dma_free(struct radeon_bo *bo)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+ drm_radeon_mem_free_t memfree;
+ int r;
+
+ if (bo_legacy->ptr == NULL) {
+ /* ptr is set to NULL if dma allocation failed */
+ return 0;
+ }
+ legacy_get_current_age(boml);
+ memfree.region = RADEON_MEM_REGION_GART;
+ memfree.region_offset = bo_legacy->offset;
+ memfree.region_offset -= boml->screen->gart_texture_offset;
+ r = drmCommandWrite(boml->base.fd,
+ DRM_RADEON_FREE,
+ &memfree,
+ sizeof(memfree));
+ if (r) {
+ fprintf(stderr, "Failed to free bo[%p] at %08x\n",
+ &bo_legacy->base, memfree.region_offset);
+ fprintf(stderr, "ret = %s\n", strerror(-r));
+ return r;
+ }
+ boml->dma_alloc_size -= bo_legacy->base.size;
+ boml->dma_buf_count--;
+ return 0;
+}
+
+static void bo_free(struct bo_legacy *bo_legacy)
+{
+ struct bo_manager_legacy *boml;
+
+ if (bo_legacy == NULL) {
+ return;
+ }
+ boml = (struct bo_manager_legacy *)bo_legacy->base.bom;
+ bo_legacy->prev->next = bo_legacy->next;
+ if (bo_legacy->next) {
+ bo_legacy->next->prev = bo_legacy->prev;
+ }
+ if (!bo_legacy->static_bo) {
+ legacy_free_handle(boml, bo_legacy->base.handle);
+ if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) {
+ /* dma buffers */
+ bo_dma_free(&bo_legacy->base);
+ } else {
+ driDestroyTextureObject(&bo_legacy->tobj->base);
+ bo_legacy->tobj = NULL;
+ /* free backing store */
+ free(bo_legacy->ptr);
+ }
+ }
+ memset(bo_legacy, 0 , sizeof(struct bo_legacy));
+ free(bo_legacy);
+}
+
+static struct radeon_bo *bo_open(struct radeon_bo_manager *bom,
+ uint32_t handle,
+ uint32_t size,
+ uint32_t alignment,
+ uint32_t domains,
+ uint32_t flags)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+ struct bo_legacy *bo_legacy;
+ int r;
+
+ if (handle) {
+ bo_legacy = boml->bos.next;
+ while (bo_legacy) {
+ if (bo_legacy->base.handle == handle) {
+ radeon_bo_ref(&(bo_legacy->base));
+ return (struct radeon_bo*)bo_legacy;
+ }
+ bo_legacy = bo_legacy->next;
+ }
+ return NULL;
+ }
+ bo_legacy = bo_allocate(boml, size, alignment, domains, flags);
+ bo_legacy->static_bo = 0;
+ r = legacy_new_handle(boml, &bo_legacy->base.handle);
+ if (r) {
+ bo_free(bo_legacy);
+ return NULL;
+ }
+ if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT)
+ {
+retry:
+ legacy_track_pending(&boml->base, 0);
+ /* dma buffers */
+
+ r = bo_dma_alloc(&(bo_legacy->base));
+ if (r)
+ {
+ if (legacy_wait_any_pending(boml) == -1)
+ {
+ bo_free(bo_legacy);
+ return NULL;
+ }
+ goto retry;
+ return NULL;
+ }
+ }
+ else
+ {
+ bo_legacy->ptr = malloc(bo_legacy->base.size);
+ if (bo_legacy->ptr == NULL) {
+ bo_free(bo_legacy);
+ return NULL;
+ }
+ }
+ radeon_bo_ref(&(bo_legacy->base));
+
+ return (struct radeon_bo*)bo_legacy;
+}
+
+static void bo_ref(struct radeon_bo *bo)
+{
+}
+
+static struct radeon_bo *bo_unref(struct radeon_bo *bo)
+{
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+ if (bo->cref <= 0) {
+ bo_legacy->prev->next = bo_legacy->next;
+ if (bo_legacy->next) {
+ bo_legacy->next->prev = bo_legacy->prev;
+ }
+ if (!bo_legacy->is_pending) {
+ bo_free(bo_legacy);
+ }
+ return NULL;
+ }
+ return bo;
+}
+
+static int bo_map(struct radeon_bo *bo, int write)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+ legacy_wait_pending(bo);
+ bo_legacy->validated = 0;
+ bo_legacy->dirty = 1;
+ bo_legacy->map_count++;
+ bo->ptr = bo_legacy->ptr;
+ /* Read the first pixel in the frame buffer. This should
+ * be a noop, right? In fact without this conform fails as reading
+ * from the framebuffer sometimes produces old results -- the
+ * on-card read cache gets mixed up and doesn't notice that the
+ * framebuffer has been updated.
+ *
+ * Note that we should probably be reading some otherwise unused
+ * region of VRAM, otherwise we might get incorrect results when
+ * reading pixels from the top left of the screen.
+ *
+ * I found this problem on an R420 with glean's texCube test.
+ * Note that the R200 span code also *writes* the first pixel in the
+ * framebuffer, but I've found this to be unnecessary.
+ * -- Nicolai Hähnle, June 2008
+ */
+ if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
+ int p;
+ volatile int *buf = (int*)boml->screen->driScreen->pFB;
+ p = *buf;
+ }
+
+ return 0;
+}
+
+static int bo_unmap(struct radeon_bo *bo)
+{
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+ if (--bo_legacy->map_count > 0)
+ {
+ return 0;
+ }
+
+ bo->ptr = NULL;
+
+ return 0;
+}
+
+static int bo_is_busy(struct radeon_bo *bo, uint32_t *domain)
+{
+ *domain = 0;
+ if (bo->domains & RADEON_GEM_DOMAIN_GTT)
+ *domain = RADEON_GEM_DOMAIN_GTT;
+ else
+ *domain = RADEON_GEM_DOMAIN_CPU;
+ if (legacy_is_pending(bo))
+ return -EBUSY;
+ else
+ return 0;
+}
+
+static int bo_is_static(struct radeon_bo *bo)
+{
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+ return bo_legacy->static_bo;
+}
+
+static struct radeon_bo_funcs bo_legacy_funcs = {
+ bo_open,
+ bo_ref,
+ bo_unref,
+ bo_map,
+ bo_unmap,
+ NULL,
+ bo_is_static,
+ NULL,
+ NULL,
+ bo_is_busy
+};
+
+static int bo_vram_validate(struct radeon_bo *bo,
+ uint32_t *soffset,
+ uint32_t *eoffset)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+ int r;
+ int retry_count = 0, pending_retry = 0;
+
+ if (!bo_legacy->tobj) {
+ bo_legacy->tobj = CALLOC(sizeof(struct bo_legacy_texture_object));
+ bo_legacy->tobj->parent = bo_legacy;
+ make_empty_list(&bo_legacy->tobj->base);
+ bo_legacy->tobj->base.totalSize = bo->size;
+ retry:
+ r = driAllocateTexture(&boml->texture_heap, 1,
+ &bo_legacy->tobj->base);
+ if (r) {
+ pending_retry = 0;
+ while(boml->cpendings && pending_retry++ < 10000) {
+ legacy_track_pending(&boml->base, 0);
+ retry_count++;
+ if (retry_count > 2) {
+ free(bo_legacy->tobj);
+ bo_legacy->tobj = NULL;
+ fprintf(stderr, "Ouch! vram_validate failed %d\n", r);
+ return -1;
+ }
+ goto retry;
+ }
+ }
+ bo_legacy->offset = boml->texture_offset +
+ bo_legacy->tobj->base.memBlock->ofs;
+ bo_legacy->dirty = 1;
+ }
+
+ assert(bo_legacy->tobj->base.memBlock);
+
+ if (bo_legacy->tobj)
+ driUpdateTextureLRU(&bo_legacy->tobj->base);
+
+ if (bo_legacy->dirty || bo_legacy->tobj->base.dirty_images[0]) {
+ if (IS_R600_CLASS(boml->screen)) {
+ drm_radeon_texture_t tex;
+ drm_radeon_tex_image_t tmp;
+ int ret;
+
+ tex.offset = bo_legacy->offset;
+ tex.image = &tmp;
+ assert(!(tex.offset & 1023));
+
+ tmp.x = 0;
+ tmp.y = 0;
+ tmp.width = bo->size;
+ tmp.height = 1;
+ tmp.data = bo_legacy->ptr;
+ tex.format = RADEON_TXFORMAT_ARGB8888;
+ tex.width = tmp.width;
+ tex.height = tmp.height;
+ tex.pitch = bo->size;
+ do {
+ ret = drmCommandWriteRead(bo->bom->fd,
+ DRM_RADEON_TEXTURE,
+ &tex,
+ sizeof(drm_radeon_texture_t));
+ if (ret) {
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n");
+ usleep(1);
+ }
+ } while (ret == -EAGAIN);
+ } else {
+ /* Copy to VRAM using a blit.
+ * All memory is 4K aligned. We're using 1024 pixels wide blits.
+ */
+ drm_radeon_texture_t tex;
+ drm_radeon_tex_image_t tmp;
+ int ret;
+
+ tex.offset = bo_legacy->offset;
+ tex.image = &tmp;
+ assert(!(tex.offset & 1023));
+
+ tmp.x = 0;
+ tmp.y = 0;
+ if (bo->size < 4096) {
+ tmp.width = (bo->size + 3) / 4;
+ tmp.height = 1;
+ } else {
+ tmp.width = 1024;
+ tmp.height = (bo->size + 4095) / 4096;
+ }
+ tmp.data = bo_legacy->ptr;
+ tex.format = RADEON_TXFORMAT_ARGB8888;
+ tex.width = tmp.width;
+ tex.height = tmp.height;
+ tex.pitch = MAX2(tmp.width / 16, 1);
+ do {
+ ret = drmCommandWriteRead(bo->bom->fd,
+ DRM_RADEON_TEXTURE,
+ &tex,
+ sizeof(drm_radeon_texture_t));
+ if (ret) {
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n");
+ usleep(1);
+ }
+ } while (ret == -EAGAIN);
+ }
+ bo_legacy->dirty = 0;
+ bo_legacy->tobj->base.dirty_images[0] = 0;
+ }
+ return 0;
+}
+
+/*
+ * radeon_bo_legacy_validate -
+ * returns:
+ * 0 - all good
+ * -EINVAL - mapped buffer can't be validated
+ * -EAGAIN - restart validation we've kicked all the buffers out
+ */
+int radeon_bo_legacy_validate(struct radeon_bo *bo,
+ uint32_t *soffset,
+ uint32_t *eoffset)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+ int r;
+ int retries = 0;
+
+ if (bo_legacy->map_count) {
+ fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n",
+ bo, bo->size, bo_legacy->map_count);
+ return -EINVAL;
+ }
+ if (bo_legacy->static_bo || bo_legacy->validated) {
+ *soffset = bo_legacy->offset;
+ *eoffset = bo_legacy->offset + bo->size;
+
+ return 0;
+ }
+ if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
+
+ r = bo_vram_validate(bo, soffset, eoffset);
+ if (r) {
+ legacy_track_pending(&boml->base, 0);
+ legacy_kick_all_buffers(boml);
+ retries++;
+ if (retries == 2) {
+ fprintf(stderr,"legacy bo: failed to get relocations into aperture\n");
+ assert(0);
+ exit(-1);
+ }
+ return -EAGAIN;
+ }
+ }
+ *soffset = bo_legacy->offset;
+ *eoffset = bo_legacy->offset + bo->size;
+ bo_legacy->validated = 1;
+
+ return 0;
+}
+
+void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+ bo_legacy->pending = pending;
+ bo_legacy->is_pending++;
+ /* add to pending list */
+ radeon_bo_ref(bo);
+ if (bo_legacy->is_pending > 1) {
+ return;
+ }
+ bo_legacy->pprev = boml->pending_bos.pprev;
+ bo_legacy->pnext = NULL;
+ bo_legacy->pprev->pnext = bo_legacy;
+ boml->pending_bos.pprev = bo_legacy;
+ boml->cpendings++;
+}
+
+void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+ struct bo_legacy *bo_legacy;
+
+ if (bom == NULL) {
+ return;
+ }
+ bo_legacy = boml->bos.next;
+ while (bo_legacy) {
+ struct bo_legacy *next;
+
+ next = bo_legacy->next;
+ bo_free(bo_legacy);
+ bo_legacy = next;
+ }
+ driDestroyTextureHeap(boml->texture_heap);
+ free(boml->free_handles);
+ free(boml);
+}
+
+static struct bo_legacy *radeon_legacy_bo_alloc_static(struct bo_manager_legacy *bom,
+ int size,
+ uint32_t offset)
+{
+ struct bo_legacy *bo;
+
+ bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ if (bo == NULL)
+ return NULL;
+ bo->static_bo = 1;
+ bo->offset = offset + bom->fb_location;
+ bo->base.handle = bo->offset;
+ bo->ptr = bom->screen->driScreen->pFB + offset;
+ if (bo->base.handle > bom->nhandle) {
+ bom->nhandle = bo->base.handle + 1;
+ }
+ radeon_bo_ref(&(bo->base));
+ return bo;
+}
+
+struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn)
+{
+ struct bo_manager_legacy *bom;
+ struct bo_legacy *bo;
+ unsigned size;
+
+ bom = (struct bo_manager_legacy*)
+ calloc(1, sizeof(struct bo_manager_legacy));
+ if (bom == NULL) {
+ return NULL;
+ }
+
+ make_empty_list(&bom->texture_swapped);
+
+ bom->texture_heap = driCreateTextureHeap(0,
+ bom,
+ scrn->texSize[0],
+ 12,
+ RADEON_NR_TEX_REGIONS,
+ (drmTextureRegionPtr)scrn->sarea->tex_list[0],
+ &scrn->sarea->tex_age[0],
+ &bom->texture_swapped,
+ sizeof(struct bo_legacy_texture_object),
+ &bo_legacy_tobj_destroy);
+ bom->texture_offset = scrn->texOffset[0];
+
+ bom->base.funcs = &bo_legacy_funcs;
+ bom->base.fd = scrn->driScreen->fd;
+ bom->bos.next = NULL;
+ bom->bos.prev = NULL;
+ bom->pending_bos.pprev = &bom->pending_bos;
+ bom->pending_bos.pnext = NULL;
+ bom->screen = scrn;
+ bom->fb_location = scrn->fbLocation;
+ bom->nhandle = 1;
+ bom->cfree_handles = 0;
+ bom->nfree_handles = 0x400;
+ bom->free_handles = (uint32_t*)malloc(bom->nfree_handles * 4);
+ if (bom->free_handles == NULL) {
+ radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+ return NULL;
+ }
+
+ /* biggest framebuffer size */
+ size = 4096*4096*4;
+
+ /* allocate front */
+ bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->frontOffset);
+
+ if (!bo) {
+ radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+ return NULL;
+ }
+ if (scrn->sarea->tiling_enabled) {
+ bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
+ }
+
+ /* allocate back */
+ bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->backOffset);
+
+ if (!bo) {
+ radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+ return NULL;
+ }
+ if (scrn->sarea->tiling_enabled) {
+ bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
+ }
+
+ /* allocate depth */
+ bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->depthOffset);
+
+ if (!bo) {
+ radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+ return NULL;
+ }
+ bo->base.flags = 0;
+ if (scrn->sarea->tiling_enabled) {
+ bo->base.flags |= RADEON_BO_FLAGS_MACRO_TILE;
+ bo->base.flags |= RADEON_BO_FLAGS_MICRO_TILE;
+ }
+ return (struct radeon_bo_manager*)bom;
+}
+
+void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+ DRI_AGE_TEXTURES(boml->texture_heap);
+}
+
+unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo)
+{
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+ if (bo_legacy->static_bo || (bo->domains & RADEON_GEM_DOMAIN_GTT)) {
+ return 0;
+ }
+ return bo->size;
+}
+
+/*
+ * Fake up a bo for things like texture image_override.
+ * bo->offset already includes fb_location
+ */
+struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom,
+ int size,
+ uint32_t offset)
+{
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+ struct bo_legacy *bo;
+
+ bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ if (bo == NULL)
+ return NULL;
+ bo->static_bo = 1;
+ bo->offset = offset;
+ bo->base.handle = bo->offset;
+ bo->ptr = boml->screen->driScreen->pFB + (offset - boml->fb_location);
+ if (bo->base.handle > boml->nhandle) {
+ boml->nhandle = bo->base.handle + 1;
+ }
+ radeon_bo_ref(&(bo->base));
+ return &(bo->base);
+}
+
diff --git a/radeon/radeon_bo_legacy.h b/radeon/radeon_bo_legacy.h
new file mode 100644
index 0000000..2cf15df
--- /dev/null
+++ b/radeon/radeon_bo_legacy.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Aapo Tahkola <aet@rasterburn.org>
+ * Nicolai Haehnle <prefect_@gmx.net>
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_BO_LEGACY_H
+#define RADEON_BO_LEGACY_H
+
+#include "radeon_screen.h"
+
+void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending);
+int radeon_bo_legacy_validate(struct radeon_bo *bo,
+ uint32_t *soffset,
+ uint32_t *eoffset);
+struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn);
+void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom);
+void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom);
+unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo);
+struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom,
+ int size,
+ uint32_t offset);
+void legacy_track_pending(struct radeon_bo_manager *bom, int debug);
+
+#endif
diff --git a/radeon/radeon_bocs_wrapper.h b/radeon/radeon_bocs_wrapper.h
new file mode 100644
index 0000000..4520a7d
--- /dev/null
+++ b/radeon/radeon_bocs_wrapper.h
@@ -0,0 +1,99 @@
+#ifndef RADEON_CS_WRAPPER_H
+#define RADEON_CS_WRAPPER_H
+
+#ifdef HAVE_LIBDRM_RADEON
+
+#include "radeon_bo.h"
+#include "radeon_bo_gem.h"
+#include "radeon_cs.h"
+#include "radeon_cs_gem.h"
+
+#else
+#include <stdint.h>
+
+#define RADEON_GEM_DOMAIN_CPU 0x1 // Cached CPU domain
+#define RADEON_GEM_DOMAIN_GTT 0x2 // GTT or cache flushed
+#define RADEON_GEM_DOMAIN_VRAM 0x4 // VRAM domain
+
+#define RADEON_TILING_MACRO 0x1
+#define RADEON_TILING_MICRO 0x2
+#define RADEON_TILING_SWAP 0x4
+#define RADEON_TILING_SURFACE 0x8 /* this object requires a surface
+ * when mapped - i.e. front buffer */
+
+/* to be used to build locally in mesa with no libdrm bits */
+#include "../radeon/radeon_bo_drm.h"
+#include "../radeon/radeon_cs_drm.h"
+
+#ifndef DRM_RADEON_GEM_INFO
+#define DRM_RADEON_GEM_INFO 0x1c
+
+struct drm_radeon_gem_info {
+ uint64_t gart_size;
+ uint64_t vram_size;
+ uint64_t vram_visible;
+};
+
+struct drm_radeon_info {
+ uint32_t request;
+ uint32_t pad;
+ uint32_t value;
+};
+#endif
+
+#ifndef RADEON_PARAM_DEVICE_ID
+#define RADEON_PARAM_DEVICE_ID 16
+#endif
+
+#ifndef RADEON_PARAM_NUM_Z_PIPES
+#define RADEON_PARAM_NUM_Z_PIPES 17
+#endif
+
+#ifndef RADEON_INFO_DEVICE_ID
+#define RADEON_INFO_DEVICE_ID 0
+#endif
+#ifndef RADEON_INFO_NUM_GB_PIPES
+#define RADEON_INFO_NUM_GB_PIPES 0
+#endif
+
+#ifndef RADEON_INFO_NUM_Z_PIPES
+#define RADEON_INFO_NUM_Z_PIPES 0
+#endif
+
+#ifndef DRM_RADEON_INFO
+#define DRM_RADEON_INFO 0x1
+#endif
+
+
+static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy)
+{
+ return 0;
+}
+
+static inline void *radeon_bo_manager_gem_ctor(int fd)
+{
+ return NULL;
+}
+
+static inline void radeon_bo_manager_gem_dtor(void *dummy)
+{
+}
+
+static inline void *radeon_cs_manager_gem_ctor(int fd)
+{
+ return NULL;
+}
+
+static inline void radeon_cs_manager_gem_dtor(void *dummy)
+{
+}
+
+static inline void radeon_tracker_print(void *ptr, int io)
+{
+}
+#endif
+
+#include "radeon_bo_legacy.h"
+#include "radeon_cs_legacy.h"
+
+#endif
diff --git a/radeon/radeon_buffer_objects.c b/radeon/radeon_buffer_objects.c
new file mode 100644
index 0000000..8fac5c6
--- /dev/null
+++ b/radeon/radeon_buffer_objects.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_buffer_objects.h"
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/bufferobj.h"
+
+#include "radeon_common.h"
+
+struct radeon_buffer_object *
+get_radeon_buffer_object(struct gl_buffer_object *obj)
+{
+ return (struct radeon_buffer_object *) obj;
+}
+
+static struct gl_buffer_object *
+radeonNewBufferObject(GLcontext * ctx,
+ GLuint name,
+ GLenum target)
+{
+ struct radeon_buffer_object *obj = CALLOC_STRUCT(radeon_buffer_object);
+
+ _mesa_initialize_buffer_object(&obj->Base, name, target);
+
+ obj->bo = NULL;
+
+ return &obj->Base;
+}
+
+/**
+ * Called via glDeleteBuffersARB().
+ */
+static void
+radeonDeleteBufferObject(GLcontext * ctx,
+ struct gl_buffer_object *obj)
+{
+ struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+ if (obj->Pointer) {
+ radeon_bo_unmap(radeon_obj->bo);
+ }
+
+ if (radeon_obj->bo) {
+ radeon_bo_unref(radeon_obj->bo);
+ }
+
+ _mesa_free(radeon_obj);
+}
+
+
+/**
+ * Allocate space for and store data in a buffer object. Any data that was
+ * previously stored in the buffer object is lost. If data is NULL,
+ * memory will be allocated, but no copy will occur.
+ * Called via ctx->Driver.BufferData().
+ * \return GL_TRUE for success, GL_FALSE if out of memory
+ */
+static GLboolean
+radeonBufferData(GLcontext * ctx,
+ GLenum target,
+ GLsizeiptrARB size,
+ const GLvoid * data,
+ GLenum usage,
+ struct gl_buffer_object *obj)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+ radeon_obj->Base.Size = size;
+ radeon_obj->Base.Usage = usage;
+
+ if (radeon_obj->bo != NULL) {
+ radeon_bo_unref(radeon_obj->bo);
+ radeon_obj->bo = NULL;
+ }
+
+ if (size != 0) {
+ radeon_obj->bo = radeon_bo_open(radeon->radeonScreen->bom,
+ 0,
+ size,
+ 32,
+ RADEON_GEM_DOMAIN_GTT,
+ 0);
+
+ if (!radeon_obj->bo)
+ return GL_FALSE;
+
+ if (data != NULL) {
+ radeon_bo_map(radeon_obj->bo, GL_TRUE);
+
+ _mesa_memcpy(radeon_obj->bo->ptr, data, size);
+
+ radeon_bo_unmap(radeon_obj->bo);
+ }
+ }
+ return GL_TRUE;
+}
+
+/**
+ * Replace data in a subrange of buffer object. If the data range
+ * specified by size + offset extends beyond the end of the buffer or
+ * if data is NULL, no copy is performed.
+ * Called via glBufferSubDataARB().
+ */
+static void
+radeonBufferSubData(GLcontext * ctx,
+ GLenum target,
+ GLintptrARB offset,
+ GLsizeiptrARB size,
+ const GLvoid * data,
+ struct gl_buffer_object *obj)
+{
+ struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+ radeon_bo_map(radeon_obj->bo, GL_TRUE);
+
+ _mesa_memcpy(radeon_obj->bo->ptr + offset, data, size);
+
+ radeon_bo_unmap(radeon_obj->bo);
+}
+
+/**
+ * Called via glGetBufferSubDataARB()
+ */
+static void
+radeonGetBufferSubData(GLcontext * ctx,
+ GLenum target,
+ GLintptrARB offset,
+ GLsizeiptrARB size,
+ GLvoid * data,
+ struct gl_buffer_object *obj)
+{
+ struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+ radeon_bo_map(radeon_obj->bo, GL_FALSE);
+
+ _mesa_memcpy(data, radeon_obj->bo->ptr + offset, size);
+
+ radeon_bo_unmap(radeon_obj->bo);
+}
+
+/**
+ * Called via glMapBufferARB()
+ */
+static void *
+radeonMapBuffer(GLcontext * ctx,
+ GLenum target,
+ GLenum access,
+ struct gl_buffer_object *obj)
+{
+ struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+ if (access == GL_WRITE_ONLY_ARB) {
+ ctx->Driver.Flush(ctx);
+ }
+
+ if (radeon_obj->bo == NULL) {
+ obj->Pointer = NULL;
+ return NULL;
+ }
+
+ radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB);
+
+ obj->Pointer = radeon_obj->bo->ptr;
+ obj->Length = obj->Size;
+ obj->Offset = 0;
+
+ return obj->Pointer;
+}
+
+
+/**
+ * Called via glUnmapBufferARB()
+ */
+static GLboolean
+radeonUnmapBuffer(GLcontext * ctx,
+ GLenum target,
+ struct gl_buffer_object *obj)
+{
+ struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+ if (radeon_obj->bo != NULL) {
+ radeon_bo_unmap(radeon_obj->bo);
+ }
+
+ obj->Pointer = NULL;
+ obj->Offset = 0;
+ obj->Length = 0;
+
+ return GL_TRUE;
+}
+
+void
+radeonInitBufferObjectFuncs(struct dd_function_table *functions)
+{
+ functions->NewBufferObject = radeonNewBufferObject;
+ functions->DeleteBuffer = radeonDeleteBufferObject;
+ functions->BufferData = radeonBufferData;
+ functions->BufferSubData = radeonBufferSubData;
+ functions->GetBufferSubData = radeonGetBufferSubData;
+ functions->MapBuffer = radeonMapBuffer;
+ functions->UnmapBuffer = radeonUnmapBuffer;
+}
diff --git a/radeon/radeon_buffer_objects.h b/radeon/radeon_buffer_objects.h
new file mode 100644
index 0000000..d681960
--- /dev/null
+++ b/radeon/radeon_buffer_objects.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_BUFFER_OBJECTS_H
+#define RADEON_BUFFER_OBJECTS_H
+
+#include "main/mtypes.h"
+
+struct radeon_bo;
+
+/**
+ * Radeon vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
+ */
+struct radeon_buffer_object
+{
+ struct gl_buffer_object Base;
+ struct radeon_bo *bo;
+};
+
+struct radeon_buffer_object *
+get_radeon_buffer_object(struct gl_buffer_object *obj);
+
+/**
+ * Hook the bufferobject implementation into mesa:
+ */
+void radeonInitBufferObjectFuncs(struct dd_function_table *functions);
+
+#endif
diff --git a/radeon/radeon_chipset.h b/radeon/radeon_chipset.h
index f6bd1eb..46a9cd5 100644
--- a/radeon/radeon_chipset.h
+++ b/radeon/radeon_chipset.h
@@ -255,6 +255,145 @@
#define PCI_CHIP_RS740_796E 0x796E
#define PCI_CHIP_RS740_796F 0x796F
+#define PCI_CHIP_R600_9400 0x9400
+#define PCI_CHIP_R600_9401 0x9401
+#define PCI_CHIP_R600_9402 0x9402
+#define PCI_CHIP_R600_9403 0x9403
+#define PCI_CHIP_R600_9405 0x9405
+#define PCI_CHIP_R600_940A 0x940A
+#define PCI_CHIP_R600_940B 0x940B
+#define PCI_CHIP_R600_940F 0x940F
+
+#define PCI_CHIP_RV610_94C0 0x94C0
+#define PCI_CHIP_RV610_94C1 0x94C1
+#define PCI_CHIP_RV610_94C3 0x94C3
+#define PCI_CHIP_RV610_94C4 0x94C4
+#define PCI_CHIP_RV610_94C5 0x94C5
+#define PCI_CHIP_RV610_94C6 0x94C6
+#define PCI_CHIP_RV610_94C7 0x94C7
+#define PCI_CHIP_RV610_94C8 0x94C8
+#define PCI_CHIP_RV610_94C9 0x94C9
+#define PCI_CHIP_RV610_94CB 0x94CB
+#define PCI_CHIP_RV610_94CC 0x94CC
+#define PCI_CHIP_RV610_94CD 0x94CD
+
+#define PCI_CHIP_RV630_9580 0x9580
+#define PCI_CHIP_RV630_9581 0x9581
+#define PCI_CHIP_RV630_9583 0x9583
+#define PCI_CHIP_RV630_9586 0x9586
+#define PCI_CHIP_RV630_9587 0x9587
+#define PCI_CHIP_RV630_9588 0x9588
+#define PCI_CHIP_RV630_9589 0x9589
+#define PCI_CHIP_RV630_958A 0x958A
+#define PCI_CHIP_RV630_958B 0x958B
+#define PCI_CHIP_RV630_958C 0x958C
+#define PCI_CHIP_RV630_958D 0x958D
+#define PCI_CHIP_RV630_958E 0x958E
+#define PCI_CHIP_RV630_958F 0x958F
+
+#define PCI_CHIP_RV670_9500 0x9500
+#define PCI_CHIP_RV670_9501 0x9501
+#define PCI_CHIP_RV670_9504 0x9504
+#define PCI_CHIP_RV670_9505 0x9505
+#define PCI_CHIP_RV670_9506 0x9506
+#define PCI_CHIP_RV670_9507 0x9507
+#define PCI_CHIP_RV670_9508 0x9508
+#define PCI_CHIP_RV670_9509 0x9509
+#define PCI_CHIP_RV670_950F 0x950F
+#define PCI_CHIP_RV670_9511 0x9511
+#define PCI_CHIP_RV670_9515 0x9515
+#define PCI_CHIP_RV670_9517 0x9517
+#define PCI_CHIP_RV670_9519 0x9519
+
+#define PCI_CHIP_RV620_95C0 0x95C0
+#define PCI_CHIP_RV620_95C2 0x95C2
+#define PCI_CHIP_RV620_95C4 0x95C4
+#define PCI_CHIP_RV620_95C5 0x95C5
+#define PCI_CHIP_RV620_95C6 0x95C6
+#define PCI_CHIP_RV620_95C7 0x95C7
+#define PCI_CHIP_RV620_95C9 0x95C9
+#define PCI_CHIP_RV620_95CC 0x95CC
+#define PCI_CHIP_RV620_95CD 0x95CD
+#define PCI_CHIP_RV620_95CE 0x95CE
+#define PCI_CHIP_RV620_95CF 0x95CF
+
+#define PCI_CHIP_RV635_9590 0x9590
+#define PCI_CHIP_RV635_9591 0x9591
+#define PCI_CHIP_RV635_9593 0x9593
+#define PCI_CHIP_RV635_9595 0x9595
+#define PCI_CHIP_RV635_9596 0x9596
+#define PCI_CHIP_RV635_9597 0x9597
+#define PCI_CHIP_RV635_9598 0x9598
+#define PCI_CHIP_RV635_9599 0x9599
+#define PCI_CHIP_RV635_959B 0x959B
+
+#define PCI_CHIP_RS780_9610 0x9610
+#define PCI_CHIP_RS780_9611 0x9611
+#define PCI_CHIP_RS780_9612 0x9612
+#define PCI_CHIP_RS780_9613 0x9613
+#define PCI_CHIP_RS780_9614 0x9614
+#define PCI_CHIP_RS780_9615 0x9615
+#define PCI_CHIP_RS780_9616 0x9616
+
+#define PCI_CHIP_RS880_9710 0x9710
+#define PCI_CHIP_RS880_9711 0x9711
+#define PCI_CHIP_RS880_9712 0x9712
+#define PCI_CHIP_RS880_9713 0x9713
+#define PCI_CHIP_RS880_9714 0x9714
+
+#define PCI_CHIP_RV770_9440 0x9440
+#define PCI_CHIP_RV770_9441 0x9441
+#define PCI_CHIP_RV770_9442 0x9442
+#define PCI_CHIP_RV770_9443 0x9443
+#define PCI_CHIP_RV770_9444 0x9444
+#define PCI_CHIP_RV770_9446 0x9446
+#define PCI_CHIP_RV770_944A 0x944A
+#define PCI_CHIP_RV770_944B 0x944B
+#define PCI_CHIP_RV770_944C 0x944C
+#define PCI_CHIP_RV770_944E 0x944E
+#define PCI_CHIP_RV770_9450 0x9450
+#define PCI_CHIP_RV770_9452 0x9452
+#define PCI_CHIP_RV770_9456 0x9456
+#define PCI_CHIP_RV770_945A 0x945A
+#define PCI_CHIP_RV770_945B 0x945B
+#define PCI_CHIP_RV790_9460 0x9460
+#define PCI_CHIP_RV790_9462 0x9462
+#define PCI_CHIP_RV770_946A 0x946A
+#define PCI_CHIP_RV770_946B 0x946B
+#define PCI_CHIP_RV770_947A 0x947A
+#define PCI_CHIP_RV770_947B 0x947B
+
+#define PCI_CHIP_RV730_9480 0x9480
+#define PCI_CHIP_RV730_9487 0x9487
+#define PCI_CHIP_RV730_9488 0x9488
+#define PCI_CHIP_RV730_9489 0x9489
+#define PCI_CHIP_RV730_948F 0x948F
+#define PCI_CHIP_RV730_9490 0x9490
+#define PCI_CHIP_RV730_9491 0x9491
+#define PCI_CHIP_RV730_9495 0x9495
+#define PCI_CHIP_RV730_9498 0x9498
+#define PCI_CHIP_RV730_949C 0x949C
+#define PCI_CHIP_RV730_949E 0x949E
+#define PCI_CHIP_RV730_949F 0x949F
+
+#define PCI_CHIP_RV710_9540 0x9540
+#define PCI_CHIP_RV710_9541 0x9541
+#define PCI_CHIP_RV710_9542 0x9542
+#define PCI_CHIP_RV710_954E 0x954E
+#define PCI_CHIP_RV710_954F 0x954F
+#define PCI_CHIP_RV710_9552 0x9552
+#define PCI_CHIP_RV710_9553 0x9553
+#define PCI_CHIP_RV710_9555 0x9555
+#define PCI_CHIP_RV710_9557 0x9557
+
+#define PCI_CHIP_RV740_94A0 0x94A0
+#define PCI_CHIP_RV740_94A1 0x94A1
+#define PCI_CHIP_RV740_94A3 0x94A3
+#define PCI_CHIP_RV740_94B1 0x94B1
+#define PCI_CHIP_RV740_94B3 0x94B3
+#define PCI_CHIP_RV740_94B4 0x94B4
+#define PCI_CHIP_RV740_94B5 0x94B5
+#define PCI_CHIP_RV740_94B9 0x94B9
enum {
CHIP_FAMILY_R100,
@@ -282,6 +421,18 @@ enum {
CHIP_FAMILY_R580,
CHIP_FAMILY_RV560,
CHIP_FAMILY_RV570,
+ CHIP_FAMILY_R600,
+ CHIP_FAMILY_RV610,
+ CHIP_FAMILY_RV630,
+ CHIP_FAMILY_RV670,
+ CHIP_FAMILY_RV620,
+ CHIP_FAMILY_RV635,
+ CHIP_FAMILY_RS780,
+ CHIP_FAMILY_RS880,
+ CHIP_FAMILY_RV770,
+ CHIP_FAMILY_RV730,
+ CHIP_FAMILY_RV710,
+ CHIP_FAMILY_RV740,
CHIP_FAMILY_LAST
};
@@ -289,6 +440,7 @@ enum {
#define RADEON_CLASS_R100 (0 << 0)
#define RADEON_CLASS_R200 (1 << 0)
#define RADEON_CLASS_R300 (2 << 0)
+#define RADEON_CLASS_R600 (3 << 0)
#define RADEON_CLASS_MASK (3 << 0)
#define RADEON_CHIPSET_TCL (1 << 2) /* tcl support - any radeon */
diff --git a/radeon/radeon_cmdbuf.h b/radeon/radeon_cmdbuf.h
new file mode 100644
index 0000000..6fcd1ce
--- /dev/null
+++ b/radeon/radeon_cmdbuf.h
@@ -0,0 +1,121 @@
+#ifndef COMMON_CMDBUF_H
+#define COMMON_CMDBUF_H
+
+#include "radeon_bocs_wrapper.h"
+
+GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller);
+int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller);
+int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller);
+void rcommonInitCmdBuf(radeonContextPtr rmesa);
+void rcommonDestroyCmdBuf(radeonContextPtr rmesa);
+
+void rcommonBeginBatch(radeonContextPtr rmesa,
+ int n,
+ int dostate,
+ const char *file,
+ const char *function,
+ int line);
+
+/* +r6/r7 : code here moved */
+
+#define CP_PACKET2 (2 << 30)
+#define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
+#define CP_PACKET0_ONE(reg, n) (RADEON_CP_PACKET0 | RADEON_CP_PACKET0_ONE_REG_WR | ((n)<<16) | ((reg)>>2))
+#define CP_PACKET3(pkt, n) (RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
+
+/**
+ * Every function writing to the command buffer needs to declare this
+ * to get the necessary local variables.
+ */
+#define BATCH_LOCALS(rmesa) \
+ const radeonContextPtr b_l_rmesa = rmesa
+
+/**
+ * Prepare writing n dwords to the command buffer,
+ * including producing any necessary state emits on buffer wraparound.
+ */
+#define BEGIN_BATCH(n) rcommonBeginBatch(b_l_rmesa, n, 1, __FILE__, __FUNCTION__, __LINE__)
+
+/**
+ * Same as BEGIN_BATCH, but do not cause automatic state emits.
+ */
+#define BEGIN_BATCH_NO_AUTOSTATE(n) rcommonBeginBatch(b_l_rmesa, n, 0, __FILE__, __FUNCTION__, __LINE__)
+
+/**
+ * Write one dword to the command buffer.
+ */
+#define OUT_BATCH(data) \
+ do { \
+ radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, data);\
+ } while(0)
+
+/**
+ * Write a relocated dword to the command buffer.
+ */
+#define OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) \
+ do { \
+ int __offset = (offset); \
+ if (0 && __offset) { \
+ fprintf(stderr, "(%s:%s:%d) offset : %d\n", \
+ __FILE__, __FUNCTION__, __LINE__, __offset); \
+ } \
+ radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, __offset); \
+ radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, \
+ bo, rd, wd, flags); \
+ if (!b_l_rmesa->radeonScreen->kernel_mm) \
+ b_l_rmesa->cmdbuf.cs->section_cdw += 2; \
+ } while(0)
+
+
+/**
+ * Write n dwords from ptr to the command buffer.
+ */
+#define OUT_BATCH_TABLE(ptr,n) \
+ do { \
+ radeon_cs_write_table(b_l_rmesa->cmdbuf.cs, (ptr), (n));\
+ } while(0)
+
+/**
+ * Finish writing dwords to the command buffer.
+ * The number of (direct or indirect) OUT_BATCH calls between the previous
+ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time.
+ */
+#define END_BATCH() \
+ do { \
+ radeon_cs_end(b_l_rmesa->cmdbuf.cs, __FILE__, __FUNCTION__, __LINE__);\
+ } while(0)
+
+/**
+ * After the last END_BATCH() of rendering, this indicates that flushing
+ * the command buffer now is okay.
+ */
+#define COMMIT_BATCH() \
+ do { \
+ } while(0)
+
+
+/** Single register write to command buffer; requires 2 dwords. */
+#define OUT_BATCH_REGVAL(reg, val) \
+ OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), 1)); \
+ OUT_BATCH((val))
+
+/** Continuous register range write to command buffer; requires 1 dword,
+ * expects count dwords afterwards for register contents. */
+#define OUT_BATCH_REGSEQ(reg, count) \
+ OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), (count)))
+
+/** Write a 32 bit float to the ring; requires 1 dword. */
+#define OUT_BATCH_FLOAT32(f) \
+ OUT_BATCH(radeonPackFloat32((f)))
+
+/* +r6/r7 : code here moved */
+
+/* Fire the buffered vertices no matter what.
+ */
+static INLINE void radeon_firevertices(radeonContextPtr radeon)
+{
+ if (radeon->cmdbuf.cs->cdw || radeon->dma.flush )
+ radeon->glCtx->Driver.Flush(radeon->glCtx); /* +r6/r7 */
+}
+
+#endif
diff --git a/radeon/radeon_common.c b/radeon/radeon_common.c
new file mode 100644
index 0000000..9817ff8
--- /dev/null
+++ b/radeon/radeon_common.c
@@ -0,0 +1,1349 @@
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/*
+ - Scissor implementation
+ - buffer swap/copy ioctls
+ - finish/flush
+ - state emission
+ - cmdbuffer management
+*/
+
+#include <errno.h>
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "drivers/common/meta.h"
+
+#include "vblank.h"
+
+#include "radeon_common.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_lock.h"
+#include "radeon_drm.h"
+#include "radeon_queryobj.h"
+
+/**
+ * Enable verbose debug output for emit code.
+ * 0 no output
+ * 1 most output
+ * 2 also print state alues
+ */
+#define RADEON_CMDBUF 0
+
+/* =============================================================
+ * Scissoring
+ */
+
+static GLboolean intersect_rect(drm_clip_rect_t * out,
+ drm_clip_rect_t * a, drm_clip_rect_t * b)
+{
+ *out = *a;
+ if (b->x1 > out->x1)
+ out->x1 = b->x1;
+ if (b->y1 > out->y1)
+ out->y1 = b->y1;
+ if (b->x2 < out->x2)
+ out->x2 = b->x2;
+ if (b->y2 < out->y2)
+ out->y2 = b->y2;
+ if (out->x1 >= out->x2)
+ return GL_FALSE;
+ if (out->y1 >= out->y2)
+ return GL_FALSE;
+ return GL_TRUE;
+}
+
+void radeonRecalcScissorRects(radeonContextPtr radeon)
+{
+ drm_clip_rect_t *out;
+ int i;
+
+ /* Grow cliprect store?
+ */
+ if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) {
+ while (radeon->state.scissor.numAllocedClipRects <
+ radeon->numClipRects) {
+ radeon->state.scissor.numAllocedClipRects += 1; /* zero case */
+ radeon->state.scissor.numAllocedClipRects *= 2;
+ }
+
+ if (radeon->state.scissor.pClipRects)
+ FREE(radeon->state.scissor.pClipRects);
+
+ radeon->state.scissor.pClipRects =
+ MALLOC(radeon->state.scissor.numAllocedClipRects *
+ sizeof(drm_clip_rect_t));
+
+ if (radeon->state.scissor.pClipRects == NULL) {
+ radeon->state.scissor.numAllocedClipRects = 0;
+ return;
+ }
+ }
+
+ out = radeon->state.scissor.pClipRects;
+ radeon->state.scissor.numClipRects = 0;
+
+ for (i = 0; i < radeon->numClipRects; i++) {
+ if (intersect_rect(out,
+ &radeon->pClipRects[i],
+ &radeon->state.scissor.rect)) {
+ radeon->state.scissor.numClipRects++;
+ out++;
+ }
+ }
+
+ if (radeon->vtbl.update_scissor)
+ radeon->vtbl.update_scissor(radeon->glCtx);
+}
+
+void radeon_get_cliprects(radeonContextPtr radeon,
+ struct drm_clip_rect **cliprects,
+ unsigned int *num_cliprects,
+ int *x_off, int *y_off)
+{
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(radeon);
+ struct radeon_framebuffer *rfb = dPriv->driverPrivate;
+
+ if (radeon->constant_cliprect) {
+ radeon->fboRect.x1 = 0;
+ radeon->fboRect.y1 = 0;
+ radeon->fboRect.x2 = radeon->glCtx->DrawBuffer->Width;
+ radeon->fboRect.y2 = radeon->glCtx->DrawBuffer->Height;
+
+ *cliprects = &radeon->fboRect;
+ *num_cliprects = 1;
+ *x_off = 0;
+ *y_off = 0;
+ } else if (radeon->front_cliprects ||
+ rfb->pf_active || dPriv->numBackClipRects == 0) {
+ *cliprects = dPriv->pClipRects;
+ *num_cliprects = dPriv->numClipRects;
+ *x_off = dPriv->x;
+ *y_off = dPriv->y;
+ } else {
+ *num_cliprects = dPriv->numBackClipRects;
+ *cliprects = dPriv->pBackClipRects;
+ *x_off = dPriv->backX;
+ *y_off = dPriv->backY;
+ }
+}
+
+/**
+ * Update cliprects and scissors.
+ */
+void radeonSetCliprects(radeonContextPtr radeon)
+{
+ __DRIdrawablePrivate *const drawable = radeon_get_drawable(radeon);
+ __DRIdrawablePrivate *const readable = radeon_get_readable(radeon);
+ struct radeon_framebuffer *const draw_rfb = drawable->driverPrivate;
+ struct radeon_framebuffer *const read_rfb = readable->driverPrivate;
+ int x_off, y_off;
+
+ radeon_get_cliprects(radeon, &radeon->pClipRects,
+ &radeon->numClipRects, &x_off, &y_off);
+
+ if ((draw_rfb->base.Width != drawable->w) ||
+ (draw_rfb->base.Height != drawable->h)) {
+ _mesa_resize_framebuffer(radeon->glCtx, &draw_rfb->base,
+ drawable->w, drawable->h);
+ draw_rfb->base.Initialized = GL_TRUE;
+ }
+
+ if (drawable != readable) {
+ if ((read_rfb->base.Width != readable->w) ||
+ (read_rfb->base.Height != readable->h)) {
+ _mesa_resize_framebuffer(radeon->glCtx, &read_rfb->base,
+ readable->w, readable->h);
+ read_rfb->base.Initialized = GL_TRUE;
+ }
+ }
+
+ if (radeon->state.scissor.enabled)
+ radeonRecalcScissorRects(radeon);
+
+}
+
+
+
+void radeonUpdateScissor( GLcontext *ctx )
+{
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ GLint x = ctx->Scissor.X, y = ctx->Scissor.Y;
+ GLsizei w = ctx->Scissor.Width, h = ctx->Scissor.Height;
+ int x1, y1, x2, y2;
+ int min_x, min_y, max_x, max_y;
+
+ if (!ctx->DrawBuffer)
+ return;
+ min_x = min_y = 0;
+ max_x = ctx->DrawBuffer->Width - 1;
+ max_y = ctx->DrawBuffer->Height - 1;
+
+ if ( !ctx->DrawBuffer->Name ) {
+ x1 = x;
+ y1 = ctx->DrawBuffer->Height - (y + h);
+ x2 = x + w - 1;
+ y2 = y1 + h - 1;
+ } else {
+ x1 = x;
+ y1 = y;
+ x2 = x + w - 1;
+ y2 = y + h - 1;
+
+ }
+ if (!rmesa->radeonScreen->kernel_mm) {
+ /* Fix scissors for dri 1 */
+
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa);
+ x1 += dPriv->x;
+ x2 += dPriv->x;
+ min_x += dPriv->x;
+ max_x += dPriv->x;
+ y1 += dPriv->y;
+ y2 += dPriv->y;
+ min_y += dPriv->y;
+ max_y += dPriv->y;
+ }
+
+ rmesa->state.scissor.rect.x1 = CLAMP(x1, min_x, max_x);
+ rmesa->state.scissor.rect.y1 = CLAMP(y1, min_y, max_y);
+ rmesa->state.scissor.rect.x2 = CLAMP(x2, min_x, max_x);
+ rmesa->state.scissor.rect.y2 = CLAMP(y2, min_y, max_y);
+
+ radeonRecalcScissorRects( rmesa );
+}
+
+/* =============================================================
+ * Scissoring
+ */
+
+void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ if (ctx->Scissor.Enabled) {
+ /* We don't pipeline cliprect changes */
+ radeon_firevertices(radeon);
+ radeonUpdateScissor(ctx);
+ }
+}
+
+void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask )
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ GLuint i;
+ drm_radeon_stipple_t stipple;
+
+ /* Must flip pattern upside down.
+ */
+ for ( i = 0 ; i < 32 ; i++ ) {
+ stipple.mask[31 - i] = ((GLuint *) mask)[i];
+ }
+
+ /* TODO: push this into cmd mechanism
+ */
+ radeon_firevertices(radeon);
+ LOCK_HARDWARE( radeon );
+
+ drmCommandWrite( radeon->dri.fd, DRM_RADEON_STIPPLE,
+ &stipple, sizeof(stipple) );
+ UNLOCK_HARDWARE( radeon );
+}
+
+
+/* ================================================================
+ * SwapBuffers with client-side throttling
+ */
+
+static uint32_t radeonGetLastFrame(radeonContextPtr radeon)
+{
+ drm_radeon_getparam_t gp;
+ int ret;
+ uint32_t frame = 0;
+
+ gp.param = RADEON_PARAM_LAST_FRAME;
+ gp.value = (int *)&frame;
+ ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+ &gp, sizeof(gp));
+ if (ret) {
+ fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+ ret);
+ exit(1);
+ }
+
+ return frame;
+}
+
+uint32_t radeonGetAge(radeonContextPtr radeon)
+{
+ drm_radeon_getparam_t gp;
+ int ret;
+ uint32_t age;
+
+ gp.param = RADEON_PARAM_LAST_CLEAR;
+ gp.value = (int *)&age;
+ ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+ &gp, sizeof(gp));
+ if (ret) {
+ fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+ ret);
+ exit(1);
+ }
+
+ return age;
+}
+
+static void radeonEmitIrqLocked(radeonContextPtr radeon)
+{
+ drm_radeon_irq_emit_t ie;
+ int ret;
+
+ ie.irq_seq = &radeon->iw.irq_seq;
+ ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT,
+ &ie, sizeof(ie));
+ if (ret) {
+ fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__,
+ ret);
+ exit(1);
+ }
+}
+
+static void radeonWaitIrq(radeonContextPtr radeon)
+{
+ int ret;
+
+ do {
+ ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT,
+ &radeon->iw, sizeof(radeon->iw));
+ } while (ret && (errno == EINTR || errno == EBUSY));
+
+ if (ret) {
+ fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__,
+ ret);
+ exit(1);
+ }
+}
+
+static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
+{
+ drm_radeon_sarea_t *sarea = radeon->sarea;
+
+ if (radeon->do_irqs) {
+ if (radeonGetLastFrame(radeon) < sarea->last_frame) {
+ if (!radeon->irqsEmitted) {
+ while (radeonGetLastFrame(radeon) <
+ sarea->last_frame) ;
+ } else {
+ UNLOCK_HARDWARE(radeon);
+ radeonWaitIrq(radeon);
+ LOCK_HARDWARE(radeon);
+ }
+ radeon->irqsEmitted = 10;
+ }
+
+ if (radeon->irqsEmitted) {
+ radeonEmitIrqLocked(radeon);
+ radeon->irqsEmitted--;
+ }
+ } else {
+ while (radeonGetLastFrame(radeon) < sarea->last_frame) {
+ UNLOCK_HARDWARE(radeon);
+ if (radeon->do_usleeps)
+ DO_USLEEP(1);
+ LOCK_HARDWARE(radeon);
+ }
+ }
+}
+
+/* wait for idle */
+void radeonWaitForIdleLocked(radeonContextPtr radeon)
+{
+ int ret;
+ int i = 0;
+
+ do {
+ ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE);
+ if (ret)
+ DO_USLEEP(1);
+ } while (ret && ++i < 100);
+
+ if (ret < 0) {
+ UNLOCK_HARDWARE(radeon);
+ fprintf(stderr, "Error: R300 timed out... exiting\n");
+ exit(-1);
+ }
+}
+
+static void radeonWaitForIdle(radeonContextPtr radeon)
+{
+ if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+ LOCK_HARDWARE(radeon);
+ radeonWaitForIdleLocked(radeon);
+ UNLOCK_HARDWARE(radeon);
+ }
+}
+
+static void radeon_flip_renderbuffers(struct radeon_framebuffer *rfb)
+{
+ int current_page = rfb->pf_current_page;
+ int next_page = (current_page + 1) % rfb->pf_num_pages;
+ struct gl_renderbuffer *tmp_rb;
+
+ /* Exchange renderbuffers if necessary but make sure their
+ * reference counts are preserved.
+ */
+ if (rfb->color_rb[current_page] &&
+ rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer !=
+ &rfb->color_rb[current_page]->base) {
+ tmp_rb = NULL;
+ _mesa_reference_renderbuffer(&tmp_rb,
+ rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+ tmp_rb = &rfb->color_rb[current_page]->base;
+ _mesa_reference_renderbuffer(&rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer, tmp_rb);
+ _mesa_reference_renderbuffer(&tmp_rb, NULL);
+ }
+
+ if (rfb->color_rb[next_page] &&
+ rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer !=
+ &rfb->color_rb[next_page]->base) {
+ tmp_rb = NULL;
+ _mesa_reference_renderbuffer(&tmp_rb,
+ rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+ tmp_rb = &rfb->color_rb[next_page]->base;
+ _mesa_reference_renderbuffer(&rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer, tmp_rb);
+ _mesa_reference_renderbuffer(&tmp_rb, NULL);
+ }
+}
+
+/* Copy the back color buffer to the front color buffer.
+ */
+void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
+ const drm_clip_rect_t *rect)
+{
+ radeonContextPtr rmesa;
+ struct radeon_framebuffer *rfb;
+ GLint nbox, i, ret;
+
+ assert(dPriv);
+ assert(dPriv->driContextPriv);
+ assert(dPriv->driContextPriv->driverPrivate);
+
+ rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+
+ LOCK_HARDWARE(rmesa);
+
+ rfb = dPriv->driverPrivate;
+
+ if ( RADEON_DEBUG & RADEON_IOCTL ) {
+ fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
+ }
+
+ nbox = dPriv->numClipRects; /* must be in locked region */
+
+ for ( i = 0 ; i < nbox ; ) {
+ GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
+ drm_clip_rect_t *box = dPriv->pClipRects;
+ drm_clip_rect_t *b = rmesa->sarea->boxes;
+ GLint n = 0;
+
+ for ( ; i < nr ; i++ ) {
+
+ *b = box[i];
+
+ if (rect)
+ {
+ if (rect->x1 > b->x1)
+ b->x1 = rect->x1;
+ if (rect->y1 > b->y1)
+ b->y1 = rect->y1;
+ if (rect->x2 < b->x2)
+ b->x2 = rect->x2;
+ if (rect->y2 < b->y2)
+ b->y2 = rect->y2;
+
+ if (b->x1 >= b->x2 || b->y1 >= b->y2)
+ continue;
+ }
+
+ b++;
+ n++;
+ }
+ rmesa->sarea->nbox = n;
+
+ if (!n)
+ continue;
+
+ ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
+
+ if ( ret ) {
+ fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
+ UNLOCK_HARDWARE( rmesa );
+ exit( 1 );
+ }
+ }
+
+ UNLOCK_HARDWARE( rmesa );
+}
+
+static int radeonScheduleSwap(__DRIdrawablePrivate *dPriv, GLboolean *missed_target)
+{
+ radeonContextPtr rmesa;
+
+ rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+ radeon_firevertices(rmesa);
+
+ LOCK_HARDWARE( rmesa );
+
+ if (!dPriv->numClipRects) {
+ UNLOCK_HARDWARE(rmesa);
+ usleep(10000); /* throttle invisible client 10ms */
+ return 0;
+ }
+
+ radeonWaitForFrameCompletion(rmesa);
+
+ UNLOCK_HARDWARE(rmesa);
+ driWaitForVBlank(dPriv, missed_target);
+
+ return 0;
+}
+
+static GLboolean radeonPageFlip( __DRIdrawablePrivate *dPriv )
+{
+ radeonContextPtr radeon;
+ GLint ret;
+ __DRIscreenPrivate *psp;
+ struct radeon_renderbuffer *rrb;
+ struct radeon_framebuffer *rfb;
+
+ assert(dPriv);
+ assert(dPriv->driContextPriv);
+ assert(dPriv->driContextPriv->driverPrivate);
+
+ radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+ rfb = dPriv->driverPrivate;
+ rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+
+ psp = dPriv->driScreenPriv;
+
+ LOCK_HARDWARE(radeon);
+
+ if ( RADEON_DEBUG & RADEON_IOCTL ) {
+ fprintf(stderr, "%s: pfCurrentPage: %d %d\n", __FUNCTION__,
+ radeon->sarea->pfCurrentPage, radeon->sarea->pfState);
+ }
+ drm_clip_rect_t *box = dPriv->pClipRects;
+ drm_clip_rect_t *b = radeon->sarea->boxes;
+ b[0] = box[0];
+ radeon->sarea->nbox = 1;
+
+ ret = drmCommandNone( radeon->dri.fd, DRM_RADEON_FLIP );
+
+ UNLOCK_HARDWARE(radeon);
+
+ if ( ret ) {
+ fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
+ return GL_FALSE;
+ }
+
+ if (!rfb->pf_active)
+ return GL_FALSE;
+
+ rfb->pf_current_page = radeon->sarea->pfCurrentPage;
+ radeon_flip_renderbuffers(rfb);
+ radeon_draw_buffer(radeon->glCtx, &rfb->base);
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Swap front and back buffer.
+ */
+void radeonSwapBuffers(__DRIdrawablePrivate * dPriv)
+{
+ int64_t ust;
+ __DRIscreenPrivate *psp;
+
+ if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+ radeonContextPtr radeon;
+ GLcontext *ctx;
+
+ radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+ ctx = radeon->glCtx;
+
+ if (ctx->Visual.doubleBufferMode) {
+ GLboolean missed_target;
+ struct radeon_framebuffer *rfb = dPriv->driverPrivate;
+ _mesa_notifySwapBuffers(ctx);/* flush pending rendering comands */
+
+ radeonScheduleSwap(dPriv, &missed_target);
+
+ if (rfb->pf_active) {
+ radeonPageFlip(dPriv);
+ } else {
+ radeonCopyBuffer(dPriv, NULL);
+ }
+
+ psp = dPriv->driScreenPriv;
+
+ rfb->swap_count++;
+ (*psp->systemTime->getUST)( & ust );
+ if ( missed_target ) {
+ rfb->swap_missed_count++;
+ rfb->swap_missed_ust = ust - rfb->swap_ust;
+ }
+
+ rfb->swap_ust = ust;
+ radeon->hw.all_dirty = GL_TRUE;
+ }
+ } else {
+ /* XXX this shouldn't be an error but we can't handle it for now */
+ _mesa_problem(NULL, "%s: drawable has no context!",
+ __FUNCTION__);
+ }
+}
+
+void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+ int x, int y, int w, int h )
+{
+ if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+ radeonContextPtr radeon;
+ GLcontext *ctx;
+
+ radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+ ctx = radeon->glCtx;
+
+ if (ctx->Visual.doubleBufferMode) {
+ drm_clip_rect_t rect;
+ rect.x1 = x + dPriv->x;
+ rect.y1 = (dPriv->h - y - h) + dPriv->y;
+ rect.x2 = rect.x1 + w;
+ rect.y2 = rect.y1 + h;
+ _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */
+ radeonCopyBuffer(dPriv, &rect);
+ }
+ } else {
+ /* XXX this shouldn't be an error but we can't handle it for now */
+ _mesa_problem(NULL, "%s: drawable has no context!",
+ __FUNCTION__);
+ }
+}
+
+void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrbDepth = NULL, *rrbStencil = NULL,
+ *rrbColor = NULL;
+ uint32_t offset = 0;
+
+
+ if (!fb) {
+ /* this can happen during the initial context initialization */
+ return;
+ }
+
+ /* radeons only handle 1 color draw so far */
+ if (fb->_NumColorDrawBuffers != 1) {
+ radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE);
+ return;
+ }
+
+ /* Do this here, note core Mesa, since this function is called from
+ * many places within the driver.
+ */
+ if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+ /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
+ _mesa_update_framebuffer(ctx);
+ /* this updates the DrawBuffer's Width/Height if it's a FBO */
+ _mesa_update_draw_buffer_bounds(ctx);
+ }
+
+ if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
+ /* this may occur when we're called by glBindFrameBuffer() during
+ * the process of someone setting up renderbuffers, etc.
+ */
+ /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/
+ return;
+ }
+
+ if (fb->Name)
+ ;/* do something depthy/stencily TODO */
+
+
+ /* none */
+ if (fb->Name == 0) {
+ if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+ rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+ radeon->front_cliprects = GL_TRUE;
+ radeon->front_buffer_dirty = GL_TRUE;
+ } else {
+ rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+ radeon->front_cliprects = GL_FALSE;
+ }
+ } else {
+ /* user FBO in theory */
+ struct radeon_renderbuffer *rrb;
+ rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[0]);
+ if (rrb) {
+ offset = rrb->draw_offset;
+ rrbColor = rrb;
+ }
+ radeon->constant_cliprect = GL_TRUE;
+ }
+
+ if (rrbColor == NULL)
+ radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE);
+ else
+ radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE);
+
+
+ if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) {
+ rrbDepth = radeon_renderbuffer(fb->_DepthBuffer->Wrapped);
+ if (rrbDepth && rrbDepth->bo) {
+ radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+ } else {
+ radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_TRUE);
+ }
+ } else {
+ radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+ rrbDepth = NULL;
+ }
+
+ if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) {
+ rrbStencil = radeon_renderbuffer(fb->_StencilBuffer->Wrapped);
+ if (rrbStencil && rrbStencil->bo) {
+ radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+ /* need to re-compute stencil hw state */
+ if (!rrbDepth)
+ rrbDepth = rrbStencil;
+ } else {
+ radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_TRUE);
+ }
+ } else {
+ radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+ if (ctx->Driver.Enable != NULL)
+ ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+ else
+ ctx->NewState |= _NEW_STENCIL;
+ }
+
+ /* Update culling direction which changes depending on the
+ * orientation of the buffer:
+ */
+ if (ctx->Driver.FrontFace)
+ ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+ else
+ ctx->NewState |= _NEW_POLYGON;
+
+ /*
+ * Update depth test state
+ */
+ if (ctx->Driver.Enable) {
+ ctx->Driver.Enable(ctx, GL_DEPTH_TEST,
+ (ctx->Depth.Test && fb->Visual.depthBits > 0));
+ /* Need to update the derived ctx->Stencil._Enabled first */
+ ctx->Driver.Enable(ctx, GL_STENCIL_TEST,
+ (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0));
+ } else {
+ ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL);
+ }
+
+ _mesa_reference_renderbuffer(&radeon->state.depth.rb, &rrbDepth->base);
+ _mesa_reference_renderbuffer(&radeon->state.color.rb, &rrbColor->base);
+ radeon->state.color.draw_offset = offset;
+
+#if 0
+ /* update viewport since it depends on window size */
+ if (ctx->Driver.Viewport) {
+ ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y,
+ ctx->Viewport.Width, ctx->Viewport.Height);
+ } else {
+
+ }
+#endif
+ ctx->NewState |= _NEW_VIEWPORT;
+
+ /* Set state we know depends on drawable parameters:
+ */
+ radeonUpdateScissor(ctx);
+ radeon->NewGLState |= _NEW_SCISSOR;
+
+ if (ctx->Driver.DepthRange)
+ ctx->Driver.DepthRange(ctx,
+ ctx->Viewport.Near,
+ ctx->Viewport.Far);
+
+ /* Update culling direction which changes depending on the
+ * orientation of the buffer:
+ */
+ if (ctx->Driver.FrontFace)
+ ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+ else
+ ctx->NewState |= _NEW_POLYGON;
+}
+
+/**
+ * Called via glDrawBuffer.
+ */
+void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
+{
+ if (RADEON_DEBUG & RADEON_DRI)
+ fprintf(stderr, "%s %s\n", __FUNCTION__,
+ _mesa_lookup_enum_by_nr( mode ));
+
+ if (ctx->DrawBuffer->Name == 0) {
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+ const GLboolean was_front_buffer_rendering =
+ radeon->is_front_buffer_rendering;
+
+ radeon->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) ||
+ (mode == GL_FRONT);
+
+ /* If we weren't front-buffer rendering before but we are now, make sure
+ * that the front-buffer has actually been allocated.
+ */
+ if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) {
+ radeon_update_renderbuffers(radeon->dri.context,
+ radeon->dri.context->driDrawablePriv);
+ }
+ }
+
+ radeon_draw_buffer(ctx, ctx->DrawBuffer);
+}
+
+void radeonReadBuffer( GLcontext *ctx, GLenum mode )
+{
+ if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) {
+ struct radeon_context *const rmesa = RADEON_CONTEXT(ctx);
+ const GLboolean was_front_buffer_reading = rmesa->is_front_buffer_reading;
+ rmesa->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
+ || (mode == GL_FRONT);
+
+ if (!was_front_buffer_reading && rmesa->is_front_buffer_reading) {
+ radeon_update_renderbuffers(rmesa->dri.context,
+ rmesa->dri.context->driReadablePriv);
+ }
+ }
+ /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+ if (ctx->ReadBuffer == ctx->DrawBuffer) {
+ /* This will update FBO completeness status.
+ * A framebuffer will be incomplete if the GL_READ_BUFFER setting
+ * refers to a missing renderbuffer. Calling glReadBuffer can set
+ * that straight and can make the drawing buffer complete.
+ */
+ radeon_draw_buffer(ctx, ctx->DrawBuffer);
+ }
+}
+
+
+/* Turn on/off page flipping according to the flags in the sarea:
+ */
+void radeonUpdatePageFlipping(radeonContextPtr radeon)
+{
+ struct radeon_framebuffer *rfb = radeon_get_drawable(radeon)->driverPrivate;
+
+ rfb->pf_active = radeon->sarea->pfState;
+ rfb->pf_current_page = radeon->sarea->pfCurrentPage;
+ rfb->pf_num_pages = 2;
+ radeon_flip_renderbuffers(rfb);
+ radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer);
+}
+
+void radeon_window_moved(radeonContextPtr radeon)
+{
+ /* Cliprects has to be updated before doing anything else */
+ radeonSetCliprects(radeon);
+ if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+ radeonUpdatePageFlipping(radeon);
+ }
+}
+
+void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ __DRIcontext *driContext = radeon->dri.context;
+ void (*old_viewport)(GLcontext *ctx, GLint x, GLint y,
+ GLsizei w, GLsizei h);
+
+ if (!driContext->driScreenPriv->dri2.enabled)
+ return;
+
+ if (!radeon->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
+ if (radeon->is_front_buffer_rendering) {
+ ctx->Driver.Flush(ctx);
+ }
+ radeon_update_renderbuffers(driContext, driContext->driDrawablePriv);
+ if (driContext->driDrawablePriv != driContext->driReadablePriv)
+ radeon_update_renderbuffers(driContext, driContext->driReadablePriv);
+ }
+
+ old_viewport = ctx->Driver.Viewport;
+ ctx->Driver.Viewport = NULL;
+ radeon_window_moved(radeon);
+ radeon_draw_buffer(ctx, radeon->glCtx->DrawBuffer);
+ ctx->Driver.Viewport = old_viewport;
+}
+
+static void radeon_print_state_atom_prekmm(radeonContextPtr radeon, struct radeon_state_atom *state)
+{
+ int i, j, reg;
+ int dwords = (*state->check) (radeon->glCtx, state);
+ drm_r300_cmd_header_t cmd;
+
+ fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size);
+
+ if (radeon_is_debug_enabled(RADEON_STATE, RADEON_TRACE)) {
+ if (dwords > state->cmd_size)
+ dwords = state->cmd_size;
+
+ for (i = 0; i < dwords;) {
+ cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]);
+ reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo;
+ fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
+ state->name, i, reg, cmd.packet0.count);
+ ++i;
+ for (j = 0; j < cmd.packet0.count && i < dwords; j++) {
+ fprintf(stderr, " %s[%d]: 0x%04x = %08x\n",
+ state->name, i, reg, state->cmd[i]);
+ reg += 4;
+ ++i;
+ }
+ }
+ }
+}
+
+static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state)
+{
+ int i, j, reg, count;
+ int dwords;
+ uint32_t packet0;
+ if (!radeon_is_debug_enabled(RADEON_STATE, RADEON_VERBOSE) )
+ return;
+
+ if (!radeon->radeonScreen->kernel_mm) {
+ radeon_print_state_atom_prekmm(radeon, state);
+ return;
+ }
+
+ dwords = (*state->check) (radeon->glCtx, state);
+
+ fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size);
+
+ if (radeon_is_debug_enabled(RADEON_STATE, RADEON_TRACE)) {
+ if (dwords > state->cmd_size)
+ dwords = state->cmd_size;
+ for (i = 0; i < dwords;) {
+ packet0 = state->cmd[i];
+ reg = (packet0 & 0x1FFF) << 2;
+ count = ((packet0 & 0x3FFF0000) >> 16) + 1;
+ fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
+ state->name, i, reg, count);
+ ++i;
+ for (j = 0; j < count && i < dwords; j++) {
+ fprintf(stderr, " %s[%d]: 0x%04x = %08x\n",
+ state->name, i, reg, state->cmd[i]);
+ reg += 4;
+ ++i;
+ }
+ }
+ }
+}
+
+/**
+ * Count total size for next state emit.
+ **/
+GLuint radeonCountStateEmitSize(radeonContextPtr radeon)
+{
+ struct radeon_state_atom *atom;
+ GLuint dwords = 0;
+ /* check if we are going to emit full state */
+
+ if (radeon->cmdbuf.cs->cdw && !radeon->hw.all_dirty) {
+ if (!radeon->hw.is_dirty)
+ goto out;
+ foreach(atom, &radeon->hw.atomlist) {
+ if (atom->dirty) {
+ const GLuint atom_size = atom->check(radeon->glCtx, atom);
+ dwords += atom_size;
+ if (RADEON_CMDBUF && atom_size) {
+ radeon_print_state_atom(radeon, atom);
+ }
+ }
+ }
+ } else {
+ foreach(atom, &radeon->hw.atomlist) {
+ const GLuint atom_size = atom->check(radeon->glCtx, atom);
+ dwords += atom_size;
+ if (RADEON_CMDBUF && atom_size) {
+ radeon_print_state_atom(radeon, atom);
+ }
+
+ }
+ }
+out:
+ radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %u\n", __func__, dwords);
+ return dwords;
+}
+
+static INLINE void radeon_emit_atom(radeonContextPtr radeon, struct radeon_state_atom *atom)
+{
+ BATCH_LOCALS(radeon);
+ int dwords;
+
+ dwords = (*atom->check) (radeon->glCtx, atom);
+ if (dwords) {
+
+ radeon_print_state_atom(radeon, atom);
+
+ if (atom->emit) {
+ (*atom->emit)(radeon->glCtx, atom);
+ } else {
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(atom->cmd, dwords);
+ END_BATCH();
+ }
+ } else {
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, " skip state %s\n", atom->name);
+ }
+ atom->dirty = GL_FALSE;
+
+}
+
+static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean emitAll)
+{
+ struct radeon_state_atom *atom;
+
+ if (radeon->vtbl.pre_emit_atoms)
+ radeon->vtbl.pre_emit_atoms(radeon);
+
+ /* Emit actual atoms */
+ if (radeon->hw.all_dirty || emitAll) {
+ foreach(atom, &radeon->hw.atomlist)
+ radeon_emit_atom( radeon, atom );
+ } else {
+ foreach(atom, &radeon->hw.atomlist) {
+ if ( atom->dirty )
+ radeon_emit_atom( radeon, atom );
+ }
+ }
+
+ COMMIT_BATCH();
+}
+
+static GLboolean radeon_revalidate_bos(GLcontext *ctx)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ int ret;
+
+ ret = radeon_cs_space_check(radeon->cmdbuf.cs);
+ if (ret == RADEON_CS_SPACE_FLUSH)
+ return GL_FALSE;
+ return GL_TRUE;
+}
+
+void radeonEmitState(radeonContextPtr radeon)
+{
+ radeon_print(RADEON_STATE, RADEON_NORMAL, "%s\n", __FUNCTION__);
+
+ if (radeon->vtbl.pre_emit_state)
+ radeon->vtbl.pre_emit_state(radeon);
+
+ /* this code used to return here but now it emits zbs */
+ if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty)
+ return;
+
+ if (!radeon->cmdbuf.cs->cdw) {
+ if (RADEON_DEBUG & RADEON_STATE)
+ fprintf(stderr, "Begin reemit state\n");
+
+ radeonEmitAtoms(radeon, GL_TRUE);
+ } else {
+
+ if (RADEON_DEBUG & RADEON_STATE)
+ fprintf(stderr, "Begin dirty state\n");
+
+ radeonEmitAtoms(radeon, GL_FALSE);
+ }
+
+ radeon->hw.is_dirty = GL_FALSE;
+ radeon->hw.all_dirty = GL_FALSE;
+}
+
+
+void radeonFlush(GLcontext *ctx)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw);
+
+ /* okay if we have no cmds in the buffer &&
+ we have no DMA flush &&
+ we have no DMA buffer allocated.
+ then no point flushing anything at all.
+ */
+ if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && is_empty_list(&radeon->dma.reserved))
+ return;
+
+ if (radeon->dma.flush)
+ radeon->dma.flush( ctx );
+
+ radeonEmitState(radeon);
+
+ if (radeon->cmdbuf.cs->cdw)
+ rcommonFlushCmdBuf(radeon, __FUNCTION__);
+
+ if ((ctx->DrawBuffer->Name == 0) && radeon->front_buffer_dirty) {
+ __DRIscreen *const screen = radeon->radeonScreen->driScreen;
+
+ if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2)
+ && (screen->dri2.loader->flushFrontBuffer != NULL)) {
+ __DRIdrawablePrivate * drawable = radeon_get_drawable(radeon);
+ (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate);
+
+ /* Only clear the dirty bit if front-buffer rendering is no longer
+ * enabled. This is done so that the dirty bit can only be set in
+ * glDrawBuffer. Otherwise the dirty bit would have to be set at
+ * each of N places that do rendering. This has worse performances,
+ * but it is much easier to get correct.
+ */
+ if (!radeon->is_front_buffer_rendering) {
+ radeon->front_buffer_dirty = GL_FALSE;
+ }
+ }
+ }
+
+ make_empty_list(&radeon->query.not_flushed_head);
+
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+void radeonFinish(GLcontext * ctx)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ int i;
+
+ if (ctx->Driver.Flush)
+ ctx->Driver.Flush(ctx); /* +r6/r7 */
+
+ if (radeon->radeonScreen->kernel_mm) {
+ for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+ struct radeon_renderbuffer *rrb;
+ rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[i]);
+ if (rrb && rrb->bo)
+ radeon_bo_wait(rrb->bo);
+ }
+ {
+ struct radeon_renderbuffer *rrb;
+ rrb = radeon_get_depthbuffer(radeon);
+ if (rrb && rrb->bo)
+ radeon_bo_wait(rrb->bo);
+ }
+ } else if (radeon->do_irqs) {
+ LOCK_HARDWARE(radeon);
+ radeonEmitIrqLocked(radeon);
+ UNLOCK_HARDWARE(radeon);
+ radeonWaitIrq(radeon);
+ } else {
+ radeonWaitForIdle(radeon);
+ }
+}
+
+/* cmdbuffer */
+/**
+ * Send the current command buffer via ioctl to the hardware.
+ */
+int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller)
+{
+ int ret = 0;
+
+ if (rmesa->cmdbuf.flushing) {
+ fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n");
+ exit(-1);
+ }
+ rmesa->cmdbuf.flushing = 1;
+
+ if (RADEON_DEBUG & RADEON_IOCTL) {
+ fprintf(stderr, "%s from %s - %i cliprects\n",
+ __FUNCTION__, caller, rmesa->numClipRects);
+ }
+
+ radeonEmitQueryEnd(rmesa->glCtx);
+
+ if (rmesa->cmdbuf.cs->cdw) {
+ ret = radeon_cs_emit(rmesa->cmdbuf.cs);
+ rmesa->hw.all_dirty = GL_TRUE;
+ }
+ radeon_cs_erase(rmesa->cmdbuf.cs);
+ rmesa->cmdbuf.flushing = 0;
+
+ if (radeon_revalidate_bos(rmesa->glCtx) == GL_FALSE) {
+ fprintf(stderr,"failed to revalidate buffers\n");
+ }
+
+ return ret;
+}
+
+int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller)
+{
+ int ret;
+
+ radeonReleaseDmaRegions(rmesa);
+
+ LOCK_HARDWARE(rmesa);
+ ret = rcommonFlushCmdBufLocked(rmesa, caller);
+ UNLOCK_HARDWARE(rmesa);
+
+ if (ret) {
+ fprintf(stderr, "drmRadeonCmdBuffer: %d. Kernel failed to "
+ "parse or rejected command stream. See dmesg "
+ "for more info.\n", ret);
+ _mesa_exit(ret);
+ }
+
+ return ret;
+}
+
+/**
+ * Make sure that enough space is available in the command buffer
+ * by flushing if necessary.
+ *
+ * \param dwords The number of dwords we need to be free on the command buffer
+ */
+GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller)
+{
+ if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size
+ || radeon_cs_need_flush(rmesa->cmdbuf.cs)) {
+ /* If we try to flush empty buffer there is too big rendering operation. */
+ assert(rmesa->cmdbuf.cs->cdw);
+ rcommonFlushCmdBuf(rmesa, caller);
+ return GL_TRUE;
+ }
+ return GL_FALSE;
+}
+
+void rcommonInitCmdBuf(radeonContextPtr rmesa)
+{
+ GLuint size;
+ /* Initialize command buffer */
+ size = 256 * driQueryOptioni(&rmesa->optionCache,
+ "command_buffer_size");
+ if (size < 2 * rmesa->hw.max_state_size) {
+ size = 2 * rmesa->hw.max_state_size + 65535;
+ }
+ if (size > 64 * 256)
+ size = 64 * 256;
+
+ radeon_print(RADEON_CS, RADEON_VERBOSE,
+ "sizeof(drm_r300_cmd_header_t)=%zd\n", sizeof(drm_r300_cmd_header_t));
+ radeon_print(RADEON_CS, RADEON_VERBOSE,
+ "sizeof(drm_radeon_cmd_buffer_t)=%zd\n", sizeof(drm_radeon_cmd_buffer_t));
+ radeon_print(RADEON_CS, RADEON_VERBOSE,
+ "Allocating %d bytes command buffer (max state is %d bytes)\n",
+ size * 4, rmesa->hw.max_state_size * 4);
+
+ if (rmesa->radeonScreen->kernel_mm) {
+ int fd = rmesa->radeonScreen->driScreen->fd;
+ rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
+ } else {
+ rmesa->cmdbuf.csm = radeon_cs_manager_legacy_ctor(rmesa);
+ }
+ if (rmesa->cmdbuf.csm == NULL) {
+ /* FIXME: fatal error */
+ return;
+ }
+ rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
+ assert(rmesa->cmdbuf.cs != NULL);
+ rmesa->cmdbuf.size = size;
+
+ radeon_cs_space_set_flush(rmesa->cmdbuf.cs,
+ (void (*)(void *))rmesa->glCtx->Driver.Flush, rmesa->glCtx);
+
+ if (!rmesa->radeonScreen->kernel_mm) {
+ radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
+ radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
+ } else {
+ struct drm_radeon_gem_info mminfo = { 0 };
+
+ if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
+ {
+ radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible);
+ radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
+ }
+ }
+
+}
+/**
+ * Destroy the command buffer
+ */
+void rcommonDestroyCmdBuf(radeonContextPtr rmesa)
+{
+ radeon_cs_destroy(rmesa->cmdbuf.cs);
+ if (rmesa->radeonScreen->driScreen->dri2.enabled || rmesa->radeonScreen->kernel_mm) {
+ radeon_cs_manager_gem_dtor(rmesa->cmdbuf.csm);
+ } else {
+ radeon_cs_manager_legacy_dtor(rmesa->cmdbuf.csm);
+ }
+}
+
+void rcommonBeginBatch(radeonContextPtr rmesa, int n,
+ int dostate,
+ const char *file,
+ const char *function,
+ int line)
+{
+ if (!rmesa->cmdbuf.cs->cdw && dostate) {
+ radeon_print(RADEON_STATE, RADEON_NORMAL,
+ "Reemit state after flush (from %s)\n", function);
+ radeonEmitState(rmesa);
+ }
+ radeon_cs_begin(rmesa->cmdbuf.cs, n, file, function, line);
+
+ radeon_print(RADEON_CS, RADEON_VERBOSE, "BEGIN_BATCH(%d) at %d, from %s:%i\n",
+ n, rmesa->cmdbuf.cs->cdw, function, line);
+
+}
+
+void radeonUserClear(GLcontext *ctx, GLuint mask)
+{
+ _mesa_meta_clear(ctx, mask);
+}
diff --git a/radeon/radeon_common.h b/radeon/radeon_common.h
new file mode 100644
index 0000000..f320191
--- /dev/null
+++ b/radeon/radeon_common.h
@@ -0,0 +1,87 @@
+#ifndef COMMON_MISC_H
+#define COMMON_MISC_H
+
+#include "radeon_common_context.h"
+#include "radeon_dma.h"
+#include "radeon_texture.h"
+
+void radeonUserClear(GLcontext *ctx, GLuint mask);
+void radeonRecalcScissorRects(radeonContextPtr radeon);
+void radeonSetCliprects(radeonContextPtr radeon);
+void radeonUpdateScissor( GLcontext *ctx );
+void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h);
+void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask );
+
+void radeonWaitForIdleLocked(radeonContextPtr radeon);
+extern uint32_t radeonGetAge(radeonContextPtr radeon);
+void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
+ const drm_clip_rect_t *rect);
+void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
+void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+ int x, int y, int w, int h );
+
+void radeonUpdatePageFlipping(radeonContextPtr rmesa);
+
+void radeonFlush(GLcontext *ctx);
+void radeonFinish(GLcontext * ctx);
+void radeonEmitState(radeonContextPtr radeon);
+GLuint radeonCountStateEmitSize(radeonContextPtr radeon);
+
+void radeon_clear_tris(GLcontext *ctx, GLbitfield mask);
+
+void radeon_window_moved(radeonContextPtr radeon);
+void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb);
+void radeonDrawBuffer( GLcontext *ctx, GLenum mode );
+void radeonReadBuffer( GLcontext *ctx, GLenum mode );
+void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height);
+void radeon_get_cliprects(radeonContextPtr radeon,
+ struct drm_clip_rect **cliprects,
+ unsigned int *num_cliprects,
+ int *x_off, int *y_off);
+void radeon_fbo_init(struct radeon_context *radeon);
+void
+radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
+ struct radeon_bo *bo);
+struct radeon_renderbuffer *
+radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv);
+static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb)
+{
+ struct radeon_renderbuffer *rrb = (struct radeon_renderbuffer *)rb;
+ if (rrb && rrb->base.ClassID == RADEON_RB_CLASS)
+ return rrb;
+ else
+ return NULL;
+}
+
+static inline struct radeon_renderbuffer *radeon_get_renderbuffer(struct gl_framebuffer *fb, int att_index)
+{
+ if (att_index >= 0)
+ return radeon_renderbuffer(fb->Attachment[att_index].Renderbuffer);
+ else
+ return NULL;
+}
+
+static inline struct radeon_renderbuffer *radeon_get_depthbuffer(radeonContextPtr rmesa)
+{
+ struct radeon_renderbuffer *rrb;
+ rrb = radeon_renderbuffer(rmesa->state.depth.rb);
+ if (!rrb)
+ return NULL;
+
+ return rrb;
+}
+
+static inline struct radeon_renderbuffer *radeon_get_colorbuffer(radeonContextPtr rmesa)
+{
+ struct radeon_renderbuffer *rrb;
+
+ rrb = radeon_renderbuffer(rmesa->state.color.rb);
+ if (!rrb)
+ return NULL;
+ return rrb;
+}
+
+#include "radeon_cmdbuf.h"
+
+
+#endif
diff --git a/radeon/radeon_common_context.c b/radeon/radeon_common_context.c
new file mode 100644
index 0000000..330721a
--- /dev/null
+++ b/radeon/radeon_common_context.c
@@ -0,0 +1,802 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+ VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#include "radeon_common.h"
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+#include "utils.h"
+#include "vblank.h"
+#include "drirenderbuffer.h"
+#include "drivers/common/meta.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/state.h"
+#include "main/simple_list.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */
+#include "r600_context.h"
+#endif
+
+#define DRIVER_DATE "20090101"
+
+#ifndef RADEON_DEBUG
+int RADEON_DEBUG = (0);
+#endif
+
+
+static const char* get_chip_family_name(int chip_family)
+{
+ switch(chip_family) {
+ case CHIP_FAMILY_R100: return "R100";
+ case CHIP_FAMILY_RV100: return "RV100";
+ case CHIP_FAMILY_RS100: return "RS100";
+ case CHIP_FAMILY_RV200: return "RV200";
+ case CHIP_FAMILY_RS200: return "RS200";
+ case CHIP_FAMILY_R200: return "R200";
+ case CHIP_FAMILY_RV250: return "RV250";
+ case CHIP_FAMILY_RS300: return "RS300";
+ case CHIP_FAMILY_RV280: return "RV280";
+ case CHIP_FAMILY_R300: return "R300";
+ case CHIP_FAMILY_R350: return "R350";
+ case CHIP_FAMILY_RV350: return "RV350";
+ case CHIP_FAMILY_RV380: return "RV380";
+ case CHIP_FAMILY_R420: return "R420";
+ case CHIP_FAMILY_RV410: return "RV410";
+ case CHIP_FAMILY_RS400: return "RS400";
+ case CHIP_FAMILY_RS600: return "RS600";
+ case CHIP_FAMILY_RS690: return "RS690";
+ case CHIP_FAMILY_RS740: return "RS740";
+ case CHIP_FAMILY_RV515: return "RV515";
+ case CHIP_FAMILY_R520: return "R520";
+ case CHIP_FAMILY_RV530: return "RV530";
+ case CHIP_FAMILY_R580: return "R580";
+ case CHIP_FAMILY_RV560: return "RV560";
+ case CHIP_FAMILY_RV570: return "RV570";
+ case CHIP_FAMILY_R600: return "R600";
+ case CHIP_FAMILY_RV610: return "RV610";
+ case CHIP_FAMILY_RV630: return "RV630";
+ case CHIP_FAMILY_RV670: return "RV670";
+ case CHIP_FAMILY_RV620: return "RV620";
+ case CHIP_FAMILY_RV635: return "RV635";
+ case CHIP_FAMILY_RS780: return "RS780";
+ case CHIP_FAMILY_RS880: return "RS880";
+ case CHIP_FAMILY_RV770: return "RV770";
+ case CHIP_FAMILY_RV730: return "RV730";
+ case CHIP_FAMILY_RV710: return "RV710";
+ case CHIP_FAMILY_RV740: return "RV740";
+ default: return "unknown";
+ }
+}
+
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ static char buffer[128];
+
+ switch (name) {
+ case GL_VENDOR:
+ if (IS_R600_CLASS(radeon->radeonScreen))
+ return (GLubyte *) "Advanced Micro Devices, Inc.";
+ else if (IS_R300_CLASS(radeon->radeonScreen))
+ return (GLubyte *) "DRI R300 Project";
+ else
+ return (GLubyte *) "Tungsten Graphics, Inc.";
+
+ case GL_RENDERER:
+ {
+ unsigned offset;
+ GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
+ radeon->radeonScreen->AGPMode;
+ const char* chipclass;
+ char hardwarename[32];
+
+ if (IS_R600_CLASS(radeon->radeonScreen))
+ chipclass = "R600";
+ else if (IS_R300_CLASS(radeon->radeonScreen))
+ chipclass = "R300";
+ else if (IS_R200_CLASS(radeon->radeonScreen))
+ chipclass = "R200";
+ else
+ chipclass = "R100";
+
+ sprintf(hardwarename, "%s (%s %04X)",
+ chipclass,
+ get_chip_family_name(radeon->radeonScreen->chip_family),
+ radeon->radeonScreen->device_id);
+
+ offset = driGetRendererString(buffer, hardwarename, DRIVER_DATE,
+ agp_mode);
+
+ if (IS_R600_CLASS(radeon->radeonScreen)) {
+ sprintf(&buffer[offset], " TCL");
+ } else if (IS_R300_CLASS(radeon->radeonScreen)) {
+ sprintf(&buffer[offset], " %sTCL",
+ (radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
+ ? "" : "NO-");
+ } else {
+ sprintf(&buffer[offset], " %sTCL",
+ !(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+ ? "" : "NO-");
+ }
+
+ if (radeon->radeonScreen->driScreen->dri2.enabled)
+ strcat(buffer, " DRI2");
+
+ return (GLubyte *) buffer;
+ }
+
+ default:
+ return NULL;
+ }
+}
+
+/* Initialize the driver's misc functions.
+ */
+static void radeonInitDriverFuncs(struct dd_function_table *functions)
+{
+ functions->GetString = radeonGetString;
+}
+
+/**
+ * Create and initialize all common fields of the context,
+ * including the Mesa context itself.
+ */
+GLboolean radeonInitContext(radeonContextPtr radeon,
+ struct dd_function_table* functions,
+ const __GLcontextModes * glVisual,
+ __DRIcontextPrivate * driContextPriv,
+ void *sharedContextPrivate)
+{
+ __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+ radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+ GLcontext* ctx;
+ GLcontext* shareCtx;
+ int fthrottle_mode;
+
+ /* Fill in additional standard functions. */
+ radeonInitDriverFuncs(functions);
+
+ radeon->radeonScreen = screen;
+ /* Allocate and initialize the Mesa context */
+ if (sharedContextPrivate)
+ shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
+ else
+ shareCtx = NULL;
+ radeon->glCtx = _mesa_create_context(glVisual, shareCtx,
+ functions, (void *)radeon);
+ if (!radeon->glCtx)
+ return GL_FALSE;
+
+ ctx = radeon->glCtx;
+ driContextPriv->driverPrivate = radeon;
+
+ meta_init_metaops(ctx, &radeon->meta);
+
+ _mesa_meta_init(ctx);
+
+ /* DRI fields */
+ radeon->dri.context = driContextPriv;
+ radeon->dri.screen = sPriv;
+ radeon->dri.hwContext = driContextPriv->hHWContext;
+ radeon->dri.hwLock = &sPriv->pSAREA->lock;
+ radeon->dri.hwLockCount = 0;
+ radeon->dri.fd = sPriv->fd;
+ radeon->dri.drmMinor = sPriv->drm_version.minor;
+
+ radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
+ screen->sarea_priv_offset);
+
+ /* Setup IRQs */
+ fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
+ radeon->iw.irq_seq = -1;
+ radeon->irqsEmitted = 0;
+ radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
+ radeon->radeonScreen->irq);
+
+ radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+
+ if (!radeon->do_irqs)
+ fprintf(stderr,
+ "IRQ's not enabled, falling back to %s: %d %d\n",
+ radeon->do_usleeps ? "usleeps" : "busy waits",
+ fthrottle_mode, radeon->radeonScreen->irq);
+
+ radeon->texture_depth = driQueryOptioni (&radeon->optionCache,
+ "texture_depth");
+ if (radeon->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+ radeon->texture_depth = ( glVisual->rgbBits > 16 ) ?
+ DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+
+ if (IS_R600_CLASS(radeon->radeonScreen)) {
+ radeon->texture_row_align = 256;
+ radeon->texture_rect_row_align = 256;
+ radeon->texture_compressed_row_align = 256;
+ } else if (IS_R200_CLASS(radeon->radeonScreen) ||
+ IS_R100_CLASS(radeon->radeonScreen)) {
+ radeon->texture_row_align = 32;
+ radeon->texture_rect_row_align = 64;
+ radeon->texture_compressed_row_align = 32;
+ } else { /* R300 - not sure this is all correct */
+ int chip_family = radeon->radeonScreen->chip_family;
+ if (chip_family == CHIP_FAMILY_RS600 ||
+ chip_family == CHIP_FAMILY_RS690 ||
+ chip_family == CHIP_FAMILY_RS740)
+ radeon->texture_row_align = 64;
+ else
+ radeon->texture_row_align = 32;
+ radeon->texture_rect_row_align = 64;
+ radeon->texture_compressed_row_align = 64;
+ }
+
+ make_empty_list(&radeon->query.not_flushed_head);
+ radeon_init_dma(radeon);
+
+ return GL_TRUE;
+}
+
+
+
+/**
+ * Destroy the command buffer and state atoms.
+ */
+static void radeon_destroy_atom_list(radeonContextPtr radeon)
+{
+ struct radeon_state_atom *atom;
+
+ foreach(atom, &radeon->hw.atomlist) {
+ FREE(atom->cmd);
+ if (atom->lastcmd)
+ FREE(atom->lastcmd);
+ }
+
+}
+
+/**
+ * Cleanup common context fields.
+ * Called by r200DestroyContext/r300DestroyContext
+ */
+void radeonDestroyContext(__DRIcontextPrivate *driContextPriv )
+{
+#ifdef RADEON_BO_TRACK
+ FILE *track;
+#endif
+ GET_CURRENT_CONTEXT(ctx);
+ radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+ radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
+
+ assert(radeon);
+
+ _mesa_meta_free(radeon->glCtx);
+
+ if (radeon == current) {
+ radeon_firevertices(radeon);
+ _mesa_make_current(NULL, NULL, NULL);
+ }
+
+ if (!is_empty_list(&radeon->dma.reserved)) {
+ rcommonFlushCmdBuf( radeon, __FUNCTION__ );
+ }
+
+ radeonFreeDmaRegions(radeon);
+ radeonReleaseArrays(radeon->glCtx, ~0);
+ meta_destroy_metaops(&radeon->meta);
+ if (radeon->vtbl.free_context)
+ radeon->vtbl.free_context(radeon->glCtx);
+ _swsetup_DestroyContext( radeon->glCtx );
+ _tnl_DestroyContext( radeon->glCtx );
+ _vbo_DestroyContext( radeon->glCtx );
+ _swrast_DestroyContext( radeon->glCtx );
+
+ /* free atom list */
+ /* free the Mesa context */
+ _mesa_destroy_context(radeon->glCtx);
+
+ /* _mesa_destroy_context() might result in calls to functions that
+ * depend on the DriverCtx, so don't set it to NULL before.
+ *
+ * radeon->glCtx->DriverCtx = NULL;
+ */
+ /* free the option cache */
+ driDestroyOptionCache(&radeon->optionCache);
+
+ rcommonDestroyCmdBuf(radeon);
+
+ radeon_destroy_atom_list(radeon);
+
+ if (radeon->state.scissor.pClipRects) {
+ FREE(radeon->state.scissor.pClipRects);
+ radeon->state.scissor.pClipRects = 0;
+ }
+#ifdef RADEON_BO_TRACK
+ track = fopen("/tmp/tracklog", "w");
+ if (track) {
+ radeon_tracker_print(&radeon->radeonScreen->bom->tracker, track);
+ fclose(track);
+ }
+#endif
+ FREE(radeon);
+}
+
+/* Force the context `c' to be unbound from its buffer.
+ */
+GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv)
+{
+ radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+
+ if (RADEON_DEBUG & RADEON_DRI)
+ fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
+ radeon->glCtx);
+
+ return GL_TRUE;
+}
+
+
+static void
+radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon,
+ struct radeon_framebuffer *draw)
+{
+ /* if radeon->fake */
+ struct radeon_renderbuffer *rb;
+
+ if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
+ if (!rb->bo) {
+ rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+ radeon->radeonScreen->frontOffset,
+ 0,
+ 0,
+ RADEON_GEM_DOMAIN_VRAM,
+ 0);
+ }
+ rb->cpp = radeon->radeonScreen->cpp;
+ rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
+ }
+ if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
+ if (!rb->bo) {
+ rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+ radeon->radeonScreen->backOffset,
+ 0,
+ 0,
+ RADEON_GEM_DOMAIN_VRAM,
+ 0);
+ }
+ rb->cpp = radeon->radeonScreen->cpp;
+ rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
+ }
+ if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) {
+ if (!rb->bo) {
+ rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+ radeon->radeonScreen->depthOffset,
+ 0,
+ 0,
+ RADEON_GEM_DOMAIN_VRAM,
+ 0);
+ }
+ rb->cpp = radeon->radeonScreen->cpp;
+ rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+ }
+ if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) {
+ if (!rb->bo) {
+ rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+ radeon->radeonScreen->depthOffset,
+ 0,
+ 0,
+ RADEON_GEM_DOMAIN_VRAM,
+ 0);
+ }
+ rb->cpp = radeon->radeonScreen->cpp;
+ rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+ }
+}
+
+static void
+radeon_make_renderbuffer_current(radeonContextPtr radeon,
+ struct radeon_framebuffer *draw)
+{
+ int size = 4096*4096*4;
+ /* if radeon->fake */
+ struct radeon_renderbuffer *rb;
+
+ if (radeon->radeonScreen->kernel_mm) {
+ radeon_make_kernel_renderbuffer_current(radeon, draw);
+ return;
+ }
+
+
+ if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
+ if (!rb->bo) {
+ rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+ radeon->radeonScreen->frontOffset +
+ radeon->radeonScreen->fbLocation,
+ size,
+ 4096,
+ RADEON_GEM_DOMAIN_VRAM,
+ 0);
+ }
+ rb->cpp = radeon->radeonScreen->cpp;
+ rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
+ }
+ if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
+ if (!rb->bo) {
+ rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+ radeon->radeonScreen->backOffset +
+ radeon->radeonScreen->fbLocation,
+ size,
+ 4096,
+ RADEON_GEM_DOMAIN_VRAM,
+ 0);
+ }
+ rb->cpp = radeon->radeonScreen->cpp;
+ rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
+ }
+ if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) {
+ if (!rb->bo) {
+ rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+ radeon->radeonScreen->depthOffset +
+ radeon->radeonScreen->fbLocation,
+ size,
+ 4096,
+ RADEON_GEM_DOMAIN_VRAM,
+ 0);
+ }
+ rb->cpp = radeon->radeonScreen->cpp;
+ rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+ }
+ if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) {
+ if (!rb->bo) {
+ rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+ radeon->radeonScreen->depthOffset +
+ radeon->radeonScreen->fbLocation,
+ size,
+ 4096,
+ RADEON_GEM_DOMAIN_VRAM,
+ 0);
+ }
+ rb->cpp = radeon->radeonScreen->cpp;
+ rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+ }
+}
+
+static unsigned
+radeon_bits_per_pixel(const struct radeon_renderbuffer *rb)
+{
+ switch (rb->base._ActualFormat) {
+ case GL_RGB5:
+ case GL_DEPTH_COMPONENT16:
+ return 16;
+ case GL_RGB8:
+ case GL_RGBA8:
+ case GL_DEPTH_COMPONENT24:
+ case GL_DEPTH24_STENCIL8_EXT:
+ case GL_STENCIL_INDEX8_EXT:
+ return 32;
+ default:
+ return 0;
+ }
+}
+
+void
+radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
+{
+ unsigned int attachments[10];
+ __DRIbuffer *buffers = NULL;
+ __DRIscreen *screen;
+ struct radeon_renderbuffer *rb;
+ int i, count;
+ struct radeon_framebuffer *draw;
+ radeonContextPtr radeon;
+ char *regname;
+ struct radeon_bo *depth_bo = NULL, *bo;
+
+ if (RADEON_DEBUG & RADEON_DRI)
+ fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+
+ draw = drawable->driverPrivate;
+ screen = context->driScreenPriv;
+ radeon = (radeonContextPtr) context->driverPrivate;
+
+ if (screen->dri2.loader
+ && (screen->dri2.loader->base.version > 2)
+ && (screen->dri2.loader->getBuffersWithFormat != NULL)) {
+ struct radeon_renderbuffer *depth_rb;
+ struct radeon_renderbuffer *stencil_rb;
+
+ i = 0;
+ if ((radeon->is_front_buffer_rendering ||
+ radeon->is_front_buffer_reading ||
+ !draw->color_rb[1])
+ && draw->color_rb[0]) {
+ attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+ attachments[i++] = radeon_bits_per_pixel(draw->color_rb[0]);
+ }
+
+ if (draw->color_rb[1]) {
+ attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+ attachments[i++] = radeon_bits_per_pixel(draw->color_rb[1]);
+ }
+
+ depth_rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+ stencil_rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+
+ if ((depth_rb != NULL) && (stencil_rb != NULL)) {
+ attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL;
+ attachments[i++] = radeon_bits_per_pixel(depth_rb);
+ } else if (depth_rb != NULL) {
+ attachments[i++] = __DRI_BUFFER_DEPTH;
+ attachments[i++] = radeon_bits_per_pixel(depth_rb);
+ } else if (stencil_rb != NULL) {
+ attachments[i++] = __DRI_BUFFER_STENCIL;
+ attachments[i++] = radeon_bits_per_pixel(stencil_rb);
+ }
+
+ buffers = (*screen->dri2.loader->getBuffersWithFormat)(drawable,
+ &drawable->w,
+ &drawable->h,
+ attachments, i / 2,
+ &count,
+ drawable->loaderPrivate);
+ } else if (screen->dri2.loader) {
+ i = 0;
+ if (draw->color_rb[0])
+ attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+ if (draw->color_rb[1])
+ attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+ if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH))
+ attachments[i++] = __DRI_BUFFER_DEPTH;
+ if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL))
+ attachments[i++] = __DRI_BUFFER_STENCIL;
+
+ buffers = (*screen->dri2.loader->getBuffers)(drawable,
+ &drawable->w,
+ &drawable->h,
+ attachments, i,
+ &count,
+ drawable->loaderPrivate);
+ }
+
+ if (buffers == NULL)
+ return;
+
+ /* set one cliprect to cover the whole drawable */
+ drawable->x = 0;
+ drawable->y = 0;
+ drawable->backX = 0;
+ drawable->backY = 0;
+ drawable->numClipRects = 1;
+ drawable->pClipRects[0].x1 = 0;
+ drawable->pClipRects[0].y1 = 0;
+ drawable->pClipRects[0].x2 = drawable->w;
+ drawable->pClipRects[0].y2 = drawable->h;
+ drawable->numBackClipRects = 1;
+ drawable->pBackClipRects[0].x1 = 0;
+ drawable->pBackClipRects[0].y1 = 0;
+ drawable->pBackClipRects[0].x2 = drawable->w;
+ drawable->pBackClipRects[0].y2 = drawable->h;
+ for (i = 0; i < count; i++) {
+ switch (buffers[i].attachment) {
+ case __DRI_BUFFER_FRONT_LEFT:
+ rb = draw->color_rb[0];
+ regname = "dri2 front buffer";
+ break;
+ case __DRI_BUFFER_FAKE_FRONT_LEFT:
+ rb = draw->color_rb[0];
+ regname = "dri2 fake front buffer";
+ break;
+ case __DRI_BUFFER_BACK_LEFT:
+ rb = draw->color_rb[1];
+ regname = "dri2 back buffer";
+ break;
+ case __DRI_BUFFER_DEPTH:
+ rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+ regname = "dri2 depth buffer";
+ break;
+ case __DRI_BUFFER_DEPTH_STENCIL:
+ rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+ regname = "dri2 depth / stencil buffer";
+ break;
+ case __DRI_BUFFER_STENCIL:
+ rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+ regname = "dri2 stencil buffer";
+ break;
+ case __DRI_BUFFER_ACCUM:
+ default:
+ fprintf(stderr,
+ "unhandled buffer attach event, attacment type %d\n",
+ buffers[i].attachment);
+ return;
+ }
+
+ if (rb == NULL)
+ continue;
+
+ if (rb->bo) {
+ uint32_t name = radeon_gem_name_bo(rb->bo);
+ if (name == buffers[i].name)
+ continue;
+ }
+
+ if (RADEON_DEBUG & RADEON_DRI)
+ fprintf(stderr,
+ "attaching buffer %s, %d, at %d, cpp %d, pitch %d\n",
+ regname, buffers[i].name, buffers[i].attachment,
+ buffers[i].cpp, buffers[i].pitch);
+
+ rb->cpp = buffers[i].cpp;
+ rb->pitch = buffers[i].pitch;
+ rb->base.Width = drawable->w;
+ rb->base.Height = drawable->h;
+ rb->has_surface = 0;
+
+ if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) {
+ if (RADEON_DEBUG & RADEON_DRI)
+ fprintf(stderr, "(reusing depth buffer as stencil)\n");
+ bo = depth_bo;
+ radeon_bo_ref(bo);
+ } else {
+ uint32_t tiling_flags = 0, pitch = 0;
+ int ret;
+
+ bo = radeon_bo_open(radeon->radeonScreen->bom,
+ buffers[i].name,
+ 0,
+ 0,
+ RADEON_GEM_DOMAIN_VRAM,
+ buffers[i].flags);
+
+ if (bo == NULL) {
+
+ fprintf(stderr, "failed to attach %s %d\n",
+ regname, buffers[i].name);
+
+ }
+
+ ret = radeon_bo_get_tiling(bo, &tiling_flags, &pitch);
+ if (tiling_flags & RADEON_TILING_MACRO)
+ bo->flags |= RADEON_BO_FLAGS_MACRO_TILE;
+ if (tiling_flags & RADEON_TILING_MICRO)
+ bo->flags |= RADEON_BO_FLAGS_MICRO_TILE;
+
+ }
+
+ if (buffers[i].attachment == __DRI_BUFFER_DEPTH) {
+ if (draw->base.Visual.depthBits == 16)
+ rb->cpp = 2;
+ depth_bo = bo;
+ }
+
+ radeon_renderbuffer_set_bo(rb, bo);
+ radeon_bo_unref(bo);
+
+ if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) {
+ rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+ if (rb != NULL) {
+ struct radeon_bo *stencil_bo = NULL;
+
+ if (rb->bo) {
+ uint32_t name = radeon_gem_name_bo(rb->bo);
+ if (name == buffers[i].name)
+ continue;
+ }
+
+ stencil_bo = bo;
+ radeon_bo_ref(stencil_bo);
+ radeon_renderbuffer_set_bo(rb, stencil_bo);
+ radeon_bo_unref(stencil_bo);
+ }
+ }
+ }
+
+ driUpdateFramebufferSize(radeon->glCtx, drawable);
+}
+
+/* Force the context `c' to be the current context and associate with it
+ * buffer `b'.
+ */
+GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+ __DRIdrawablePrivate * driDrawPriv,
+ __DRIdrawablePrivate * driReadPriv)
+{
+ radeonContextPtr radeon;
+ struct radeon_framebuffer *drfb;
+ struct gl_framebuffer *readfb;
+
+ if (!driContextPriv) {
+ if (RADEON_DEBUG & RADEON_DRI)
+ fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+ _mesa_make_current(NULL, NULL, NULL);
+ return GL_TRUE;
+ }
+
+ radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+ drfb = driDrawPriv->driverPrivate;
+ readfb = driReadPriv->driverPrivate;
+
+ if (driContextPriv->driScreenPriv->dri2.enabled) {
+ radeon_update_renderbuffers(driContextPriv, driDrawPriv);
+ if (driDrawPriv != driReadPriv)
+ radeon_update_renderbuffers(driContextPriv, driReadPriv);
+ _mesa_reference_renderbuffer(&radeon->state.color.rb,
+ &(radeon_get_renderbuffer(&drfb->base, BUFFER_BACK_LEFT)->base));
+ _mesa_reference_renderbuffer(&radeon->state.depth.rb,
+ &(radeon_get_renderbuffer(&drfb->base, BUFFER_DEPTH)->base));
+ } else {
+ radeon_make_renderbuffer_current(radeon, drfb);
+ }
+
+ if (RADEON_DEBUG & RADEON_DRI)
+ fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __FUNCTION__, radeon->glCtx, drfb, readfb);
+
+ driUpdateFramebufferSize(radeon->glCtx, driDrawPriv);
+ if (driReadPriv != driDrawPriv)
+ driUpdateFramebufferSize(radeon->glCtx, driReadPriv);
+
+ _mesa_make_current(radeon->glCtx, &drfb->base, readfb);
+
+ _mesa_update_state(radeon->glCtx);
+
+ if (radeon->glCtx->DrawBuffer == &drfb->base) {
+ if (driDrawPriv->swap_interval == (unsigned)-1) {
+ int i;
+ driDrawPriv->vblFlags =
+ (radeon->radeonScreen->irq != 0)
+ ? driGetDefaultVBlankFlags(&radeon->
+ optionCache)
+ : VBLANK_FLAG_NO_IRQ;
+
+ driDrawableInitVBlank(driDrawPriv);
+ drfb->vbl_waited = driDrawPriv->vblSeq;
+
+ for (i = 0; i < 2; i++) {
+ if (drfb->color_rb[i])
+ drfb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq;
+ }
+
+ }
+
+ radeon_window_moved(radeon);
+ radeon_draw_buffer(radeon->glCtx, &drfb->base);
+ }
+
+
+ if (RADEON_DEBUG & RADEON_DRI)
+ fprintf(stderr, "End %s\n", __FUNCTION__);
+
+ return GL_TRUE;
+}
+
diff --git a/radeon/radeon_common_context.h b/radeon/radeon_common_context.h
new file mode 100644
index 0000000..0309345
--- /dev/null
+++ b/radeon/radeon_common_context.h
@@ -0,0 +1,594 @@
+
+#ifndef COMMON_CONTEXT_H
+#define COMMON_CONTEXT_H
+
+#include "main/mm.h"
+#include "math/m_vector.h"
+#include "texmem.h"
+#include "tnl/t_context.h"
+#include "main/colormac.h"
+
+#include "radeon_debug.h"
+#include "radeon_screen.h"
+#include "radeon_drm.h"
+#include "dri_util.h"
+#include "tnl/t_vertex.h"
+
+#include "dri_metaops.h"
+struct radeon_context;
+
+#include "radeon_bocs_wrapper.h"
+
+/* This union is used to avoid warnings/miscompilation
+ with float to uint32_t casts due to strict-aliasing */
+typedef union { GLfloat f; uint32_t ui32; } float_ui32_type;
+
+struct radeon_context;
+typedef struct radeon_context radeonContextRec;
+typedef struct radeon_context *radeonContextPtr;
+
+
+#define TEX_0 0x1
+#define TEX_1 0x2
+#define TEX_2 0x4
+#define TEX_3 0x8
+#define TEX_4 0x10
+#define TEX_5 0x20
+
+/* Rasterizing fallbacks */
+/* See correponding strings in r200_swtcl.c */
+#define RADEON_FALLBACK_TEXTURE 0x0001
+#define RADEON_FALLBACK_DRAW_BUFFER 0x0002
+#define RADEON_FALLBACK_STENCIL 0x0004
+#define RADEON_FALLBACK_RENDER_MODE 0x0008
+#define RADEON_FALLBACK_BLEND_EQ 0x0010
+#define RADEON_FALLBACK_BLEND_FUNC 0x0020
+#define RADEON_FALLBACK_DISABLE 0x0040
+#define RADEON_FALLBACK_BORDER_MODE 0x0080
+#define RADEON_FALLBACK_DEPTH_BUFFER 0x0100
+#define RADEON_FALLBACK_STENCIL_BUFFER 0x0200
+
+#define R200_FALLBACK_TEXTURE 0x01
+#define R200_FALLBACK_DRAW_BUFFER 0x02
+#define R200_FALLBACK_STENCIL 0x04
+#define R200_FALLBACK_RENDER_MODE 0x08
+#define R200_FALLBACK_DISABLE 0x10
+#define R200_FALLBACK_BORDER_MODE 0x20
+
+#define RADEON_TCL_FALLBACK_RASTER 0x1 /* rasterization */
+#define RADEON_TCL_FALLBACK_UNFILLED 0x2 /* unfilled tris */
+#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE 0x4 /* twoside tris */
+#define RADEON_TCL_FALLBACK_MATERIAL 0x8 /* material in vb */
+#define RADEON_TCL_FALLBACK_TEXGEN_0 0x10 /* texgen, unit 0 */
+#define RADEON_TCL_FALLBACK_TEXGEN_1 0x20 /* texgen, unit 1 */
+#define RADEON_TCL_FALLBACK_TEXGEN_2 0x40 /* texgen, unit 2 */
+#define RADEON_TCL_FALLBACK_TCL_DISABLE 0x80 /* user disable */
+#define RADEON_TCL_FALLBACK_FOGCOORDSPEC 0x100 /* fogcoord, sep. spec light */
+
+/* The blit width for texture uploads
+ */
+#define BLIT_WIDTH_BYTES 1024
+
+/* Use the templated vertex format:
+ */
+#define COLOR_IS_RGBA
+#define TAG(x) radeon##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#define RADEON_RB_CLASS 0xdeadbeef
+
+struct radeon_renderbuffer
+{
+ struct gl_renderbuffer base;
+ struct radeon_bo *bo;
+ unsigned int cpp;
+ /* unsigned int offset; */
+ unsigned int pitch;
+
+ uint32_t draw_offset; /* FBO */
+ /* boo Xorg 6.8.2 compat */
+ int has_surface;
+
+ GLuint pf_pending; /**< sequence number of pending flip */
+ GLuint vbl_pending; /**< vblank sequence number of pending flip */
+ __DRIdrawablePrivate *dPriv;
+};
+
+struct radeon_framebuffer
+{
+ struct gl_framebuffer base;
+
+ struct radeon_renderbuffer *color_rb[2];
+
+ GLuint vbl_waited;
+
+ /* buffer swap */
+ int64_t swap_ust;
+ int64_t swap_missed_ust;
+
+ GLuint swap_count;
+ GLuint swap_missed_count;
+
+ /* Drawable page flipping state */
+ GLboolean pf_active;
+ GLint pf_current_page;
+ GLint pf_num_pages;
+
+};
+
+
+struct radeon_colorbuffer_state {
+ GLuint clear;
+ int roundEnable;
+ struct gl_renderbuffer *rb;
+ uint32_t draw_offset; /* offset into color renderbuffer - FBOs */
+};
+
+struct radeon_depthbuffer_state {
+ GLuint clear;
+ struct gl_renderbuffer *rb;
+};
+
+struct radeon_scissor_state {
+ drm_clip_rect_t rect;
+ GLboolean enabled;
+
+ GLuint numClipRects; /* Cliprects active */
+ GLuint numAllocedClipRects; /* Cliprects available */
+ drm_clip_rect_t *pClipRects;
+};
+
+struct radeon_stencilbuffer_state {
+ GLuint clear; /* rb3d_stencilrefmask value */
+};
+
+struct radeon_state_atom {
+ struct radeon_state_atom *next, *prev;
+ const char *name; /* for debug */
+ int cmd_size; /* size in bytes */
+ GLuint idx;
+ GLuint is_tcl;
+ GLuint *cmd; /* one or more cmd's */
+ GLuint *lastcmd; /* one or more cmd's */
+ GLboolean dirty; /* dirty-mark in emit_state_list */
+ int (*check) (GLcontext *, struct radeon_state_atom *atom); /* is this state active? */
+ void (*emit) (GLcontext *, struct radeon_state_atom *atom);
+};
+
+struct radeon_hw_state {
+ /* Head of the linked list of state atoms. */
+ struct radeon_state_atom atomlist;
+ int max_state_size; /* Number of bytes necessary for a full state emit. */
+ int max_post_flush_size; /* Number of bytes necessary for post flushing emits */
+ GLboolean is_dirty, all_dirty;
+};
+
+
+/* Texture related */
+typedef struct _radeon_texture_image radeon_texture_image;
+
+struct _radeon_texture_image {
+ struct gl_texture_image base;
+
+ /**
+ * If mt != 0, the image is stored in hardware format in the
+ * given mipmap tree. In this case, base.Data may point into the
+ * mapping of the buffer object that contains the mipmap tree.
+ *
+ * If mt == 0, the image is stored in normal memory pointed to
+ * by base.Data.
+ */
+ struct _radeon_mipmap_tree *mt;
+ struct radeon_bo *bo;
+
+ int mtlevel; /** if mt != 0, this is the image's level in the mipmap tree */
+ int mtface; /** if mt != 0, this is the image's face in the mipmap tree */
+};
+
+
+static INLINE radeon_texture_image *get_radeon_texture_image(struct gl_texture_image *image)
+{
+ return (radeon_texture_image*)image;
+}
+
+
+typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
+
+#define RADEON_TXO_MICRO_TILE (1 << 3)
+
+/* Texture object in locally shared texture space.
+ */
+struct radeon_tex_obj {
+ struct gl_texture_object base;
+ struct _radeon_mipmap_tree *mt;
+
+ /**
+ * This is true if we've verified that the mipmap tree above is complete
+ * and so on.
+ */
+ GLboolean validated;
+
+ GLuint override_offset;
+ GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
+ GLuint tile_bits; /* hw texture tile bits used on this texture */
+ struct radeon_bo *bo;
+
+ GLuint pp_txfilter; /* hardware register values */
+ GLuint pp_txformat;
+ GLuint pp_txformat_x;
+ GLuint pp_txsize; /* npot only */
+ GLuint pp_txpitch; /* npot only */
+ GLuint pp_border_color;
+ GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */
+
+ GLuint pp_txfilter_1; /* r300 */
+
+ /* r700 texture states */
+ GLuint SQ_TEX_RESOURCE0;
+ GLuint SQ_TEX_RESOURCE1;
+ GLuint SQ_TEX_RESOURCE2;
+ GLuint SQ_TEX_RESOURCE3;
+ GLuint SQ_TEX_RESOURCE4;
+ GLuint SQ_TEX_RESOURCE5;
+ GLuint SQ_TEX_RESOURCE6;
+
+ GLuint SQ_TEX_SAMPLER0;
+ GLuint SQ_TEX_SAMPLER1;
+ GLuint SQ_TEX_SAMPLER2;
+
+ GLuint TD_PS_SAMPLER0_BORDER_RED;
+ GLuint TD_PS_SAMPLER0_BORDER_GREEN;
+ GLuint TD_PS_SAMPLER0_BORDER_BLUE;
+ GLuint TD_PS_SAMPLER0_BORDER_ALPHA;
+
+ GLboolean border_fallback;
+
+
+};
+
+static INLINE radeonTexObj* radeon_tex_obj(struct gl_texture_object *texObj)
+{
+ return (radeonTexObj*)texObj;
+}
+
+/* occlusion query */
+struct radeon_query_object {
+ struct gl_query_object Base;
+ struct radeon_bo *bo;
+ int curr_offset;
+ GLboolean emitted_begin;
+
+ /* Double linked list of not flushed query objects */
+ struct radeon_query_object *prev, *next;
+};
+
+/* Need refcounting on dma buffers:
+ */
+struct radeon_dma_buffer {
+ int refcount; /* the number of retained regions in buf */
+ drmBufPtr buf;
+};
+
+struct radeon_aos {
+ struct radeon_bo *bo; /** Buffer object where vertex data is stored */
+ int offset; /** Offset into buffer object, in bytes */
+ int components; /** Number of components per vertex */
+ int stride; /** Stride in dwords (may be 0 for repeating) */
+ int count; /** Number of vertices */
+};
+
+#define DMA_BO_FREE_TIME 100
+
+struct radeon_dma_bo {
+ struct radeon_dma_bo *next, *prev;
+ struct radeon_bo *bo;
+ int expire_counter;
+};
+
+struct radeon_dma {
+ /* Active dma region. Allocations for vertices and retained
+ * regions come from here. Also used for emitting random vertices,
+ * these may be flushed by calling flush_current();
+ */
+ struct radeon_dma_bo free;
+ struct radeon_dma_bo wait;
+ struct radeon_dma_bo reserved;
+ size_t current_used; /** Number of bytes allocated and forgotten about */
+ size_t current_vertexptr; /** End of active vertex region */
+ size_t minimum_size;
+
+ /**
+ * If current_vertexptr != current_used then flush must be non-zero.
+ * flush must be called before non-active vertex allocations can be
+ * performed.
+ */
+ void (*flush) (GLcontext *);
+};
+
+/* radeon_swtcl.c
+ */
+struct radeon_swtcl_info {
+
+ GLuint RenderIndex;
+ GLuint vertex_size;
+ GLubyte *verts;
+
+ /* Fallback rasterization functions
+ */
+ GLuint hw_primitive;
+ GLenum render_primitive;
+ GLuint numverts;
+
+ struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+ GLuint vertex_attr_count;
+
+ GLuint emit_prediction;
+};
+
+#define RADEON_MAX_AOS_ARRAYS 16
+struct radeon_tcl_info {
+ struct radeon_aos aos[RADEON_MAX_AOS_ARRAYS];
+ GLuint aos_count;
+ struct radeon_bo *elt_dma_bo; /** Buffer object that contains element indices */
+ int elt_dma_offset; /** Offset into this buffer object, in bytes */
+};
+
+struct radeon_ioctl {
+ GLuint vertex_offset;
+ GLuint vertex_max;
+ struct radeon_bo *bo;
+ GLuint vertex_size;
+};
+
+#define RADEON_MAX_PRIMS 64
+
+struct radeon_prim {
+ GLuint start;
+ GLuint end;
+ GLuint prim;
+};
+
+static INLINE GLuint radeonPackColor(GLuint cpp,
+ GLubyte r, GLubyte g,
+ GLubyte b, GLubyte a)
+{
+ switch (cpp) {
+ case 2:
+ return PACK_COLOR_565(r, g, b);
+ case 4:
+ return PACK_COLOR_8888(a, r, g, b);
+ default:
+ return 0;
+ }
+}
+
+#define MAX_CMD_BUF_SZ (16*1024)
+
+#define MAX_DMA_BUF_SZ (64*1024)
+
+struct radeon_store {
+ GLuint statenr;
+ GLuint primnr;
+ char cmd_buf[MAX_CMD_BUF_SZ];
+ int cmd_used;
+ int elts_start;
+};
+
+struct radeon_dri_mirror {
+ __DRIcontextPrivate *context; /* DRI context */
+ __DRIscreenPrivate *screen; /* DRI screen */
+
+ drm_context_t hwContext;
+ drm_hw_lock_t *hwLock;
+ int hwLockCount;
+ int fd;
+ int drmMinor;
+};
+
+typedef void (*radeon_tri_func) (radeonContextPtr,
+ radeonVertex *,
+ radeonVertex *, radeonVertex *);
+
+typedef void (*radeon_line_func) (radeonContextPtr,
+ radeonVertex *, radeonVertex *);
+
+typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
+
+#define RADEON_MAX_BOS 32
+struct radeon_state {
+ struct radeon_colorbuffer_state color;
+ struct radeon_depthbuffer_state depth;
+ struct radeon_scissor_state scissor;
+ struct radeon_stencilbuffer_state stencil;
+
+ struct radeon_cs_space_check bos[RADEON_MAX_BOS];
+ int validated_bo_count;
+};
+
+/**
+ * This structure holds the command buffer while it is being constructed.
+ *
+ * The first batch of commands in the buffer is always the state that needs
+ * to be re-emitted when the context is lost. This batch can be skipped
+ * otherwise.
+ */
+struct radeon_cmdbuf {
+ struct radeon_cs_manager *csm;
+ struct radeon_cs *cs;
+ int size; /** # of dwords total */
+ unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */
+};
+
+struct radeon_context {
+ GLcontext *glCtx;
+ radeonScreenPtr radeonScreen; /* Screen private DRI data */
+
+ /* Texture object bookkeeping
+ */
+ int texture_depth;
+ float initialMaxAnisotropy;
+ uint32_t texture_row_align;
+ uint32_t texture_rect_row_align;
+ uint32_t texture_compressed_row_align;
+
+ struct radeon_dma dma;
+ struct radeon_hw_state hw;
+ /* Rasterization and vertex state:
+ */
+ GLuint TclFallback;
+ GLuint Fallback;
+ GLuint NewGLState;
+ DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */
+
+ /* Drawable, cliprect and scissor information */
+ GLuint numClipRects; /* Cliprects for the draw buffer */
+ drm_clip_rect_t *pClipRects;
+ unsigned int lastStamp;
+ drm_radeon_sarea_t *sarea; /* Private SAREA data */
+
+ /* Mirrors of some DRI state */
+ struct radeon_dri_mirror dri;
+
+ /* Busy waiting */
+ GLuint do_usleeps;
+ GLuint do_irqs;
+ GLuint irqsEmitted;
+ drm_radeon_irq_wait_t iw;
+
+ /* Derived state - for r300 only */
+ struct radeon_state state;
+
+ struct radeon_swtcl_info swtcl;
+ struct radeon_tcl_info tcl;
+ /* Configuration cache
+ */
+ driOptionCache optionCache;
+
+ struct radeon_cmdbuf cmdbuf;
+
+ struct radeon_debug debug;
+
+ drm_clip_rect_t fboRect;
+ GLboolean constant_cliprect; /* use for FBO or DRI2 rendering */
+ GLboolean front_cliprects;
+
+ /**
+ * Set if rendering has occured to the drawable's front buffer.
+ *
+ * This is used in the DRI2 case to detect that glFlush should also copy
+ * the contents of the fake front buffer to the real front buffer.
+ */
+ GLboolean front_buffer_dirty;
+
+ /**
+ * Track whether front-buffer rendering is currently enabled
+ *
+ * A separate flag is used to track this in order to support MRT more
+ * easily.
+ */
+ GLboolean is_front_buffer_rendering;
+
+ /**
+ * Track whether front-buffer is the current read target.
+ *
+ * This is closely associated with is_front_buffer_rendering, but may
+ * be set separately. The DRI2 fake front buffer must be referenced
+ * either way.
+ */
+ GLboolean is_front_buffer_reading;
+
+ struct dri_metaops meta;
+
+ struct {
+ struct radeon_query_object *current;
+ struct radeon_query_object not_flushed_head;
+ struct radeon_state_atom queryobj;
+ } query;
+
+ struct {
+ void (*get_lock)(radeonContextPtr radeon);
+ void (*update_viewport_offset)(GLcontext *ctx);
+ void (*emit_cs_header)(struct radeon_cs *cs, radeonContextPtr rmesa);
+ void (*swtcl_flush)(GLcontext *ctx, uint32_t offset);
+ void (*pre_emit_atoms)(radeonContextPtr rmesa);
+ void (*pre_emit_state)(radeonContextPtr rmesa);
+ void (*fallback)(GLcontext *ctx, GLuint bit, GLboolean mode);
+ void (*free_context)(GLcontext *ctx);
+ void (*emit_query_finish)(radeonContextPtr radeon);
+ void (*update_scissor)(GLcontext *ctx);
+ } vtbl;
+};
+
+#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
+
+static inline __DRIdrawablePrivate* radeon_get_drawable(radeonContextPtr radeon)
+{
+ return radeon->dri.context->driDrawablePriv;
+}
+
+static inline __DRIdrawablePrivate* radeon_get_readable(radeonContextPtr radeon)
+{
+ return radeon->dri.context->driReadablePriv;
+}
+
+/**
+ * This function takes a float and packs it into a uint32_t
+ */
+static INLINE uint32_t radeonPackFloat32(float fl)
+{
+ union {
+ float fl;
+ uint32_t u;
+ } u;
+
+ u.fl = fl;
+ return u.u;
+}
+
+/* This is probably wrong for some values, I need to test this
+ * some more. Range checking would be a good idea also..
+ *
+ * But it works for most things. I'll fix it later if someone
+ * else with a better clue doesn't
+ */
+static INLINE uint32_t radeonPackFloat24(float f)
+{
+ float mantissa;
+ int exponent;
+ uint32_t float24 = 0;
+
+ if (f == 0.0)
+ return 0;
+
+ mantissa = frexpf(f, &exponent);
+
+ /* Handle -ve */
+ if (mantissa < 0) {
+ float24 |= (1 << 23);
+ mantissa = mantissa * -1.0;
+ }
+ /* Handle exponent, bias of 63 */
+ exponent += 62;
+ float24 |= (exponent << 16);
+ /* Kill 7 LSB of mantissa */
+ float24 |= (radeonPackFloat32(mantissa) & 0x7FFFFF) >> 7;
+
+ return float24;
+}
+
+GLboolean radeonInitContext(radeonContextPtr radeon,
+ struct dd_function_table* functions,
+ const __GLcontextModes * glVisual,
+ __DRIcontextPrivate * driContextPriv,
+ void *sharedContextPrivate);
+
+void radeonCleanupContext(radeonContextPtr radeon);
+GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
+void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable);
+GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+ __DRIdrawablePrivate * driDrawPriv,
+ __DRIdrawablePrivate * driReadPriv);
+extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv);
+
+#endif
diff --git a/radeon/radeon_compat.c b/radeon/radeon_compat.c
deleted file mode 100644
index 46b490d..0000000
--- a/radeon/radeon_compat.c
+++ /dev/null
@@ -1,301 +0,0 @@
-/**************************************************************************
-
-Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
- Tungsten Graphics Inc., Austin, Texas.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-on the rights to use, copy, modify, merge, publish, distribute, sub
-license, and/or sell copies of the Software, and to permit persons to whom
-the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice (including the next
-paragraph) shall be included in all copies or substantial portions of the
-Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- *
- */
-
-#include "main/glheader.h"
-#include "main/imports.h"
-
-#include "radeon_context.h"
-#include "radeon_state.h"
-#include "radeon_ioctl.h"
-
-
-static struct {
- int start;
- int len;
- const char *name;
-} packet[RADEON_MAX_STATE_PACKETS] = {
- { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
- { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
- { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
- { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
- { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
- { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
- { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
- { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
- { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
- { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
- { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
- { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
- { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
- { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
- { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
- { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
- { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
- { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
- { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
- { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
- { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
-};
-
-
-static void radeonCompatEmitPacket( radeonContextPtr rmesa,
- struct radeon_state_atom *state )
-{
- drm_radeon_sarea_t *sarea = rmesa->sarea;
- drm_radeon_context_regs_t *ctx = &sarea->context_state;
- drm_radeon_texture_regs_t *tex0 = &sarea->tex_state[0];
- drm_radeon_texture_regs_t *tex1 = &sarea->tex_state[1];
- int i;
- int *buf = state->cmd;
-
- for ( i = 0 ; i < state->cmd_size ; ) {
- drm_radeon_cmd_header_t *header = (drm_radeon_cmd_header_t *)&buf[i++];
-
- if (RADEON_DEBUG & DEBUG_STATE)
- fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id,
- packet[(int)header->packet.packet_id].name);
-
- switch (header->packet.packet_id) {
- case RADEON_EMIT_PP_MISC:
- ctx->pp_misc = buf[i++];
- ctx->pp_fog_color = buf[i++];
- ctx->re_solid_color = buf[i++];
- ctx->rb3d_blendcntl = buf[i++];
- ctx->rb3d_depthoffset = buf[i++];
- ctx->rb3d_depthpitch = buf[i++];
- ctx->rb3d_zstencilcntl = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_CONTEXT;
- break;
- case RADEON_EMIT_PP_CNTL:
- ctx->pp_cntl = buf[i++];
- ctx->rb3d_cntl = buf[i++];
- ctx->rb3d_coloroffset = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_CONTEXT;
- break;
- case RADEON_EMIT_RB3D_COLORPITCH:
- ctx->rb3d_colorpitch = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_CONTEXT;
- break;
- case RADEON_EMIT_RE_LINE_PATTERN:
- ctx->re_line_pattern = buf[i++];
- ctx->re_line_state = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_LINE;
- break;
- case RADEON_EMIT_SE_LINE_WIDTH:
- ctx->se_line_width = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_LINE;
- break;
- case RADEON_EMIT_PP_LUM_MATRIX:
- ctx->pp_lum_matrix = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
- break;
- case RADEON_EMIT_PP_ROT_MATRIX_0:
- ctx->pp_rot_matrix_0 = buf[i++];
- ctx->pp_rot_matrix_1 = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
- break;
- case RADEON_EMIT_RB3D_STENCILREFMASK:
- ctx->rb3d_stencilrefmask = buf[i++];
- ctx->rb3d_ropcntl = buf[i++];
- ctx->rb3d_planemask = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_MASKS;
- break;
- case RADEON_EMIT_SE_VPORT_XSCALE:
- ctx->se_vport_xscale = buf[i++];
- ctx->se_vport_xoffset = buf[i++];
- ctx->se_vport_yscale = buf[i++];
- ctx->se_vport_yoffset = buf[i++];
- ctx->se_vport_zscale = buf[i++];
- ctx->se_vport_zoffset = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_VIEWPORT;
- break;
- case RADEON_EMIT_SE_CNTL:
- ctx->se_cntl = buf[i++];
- ctx->se_coord_fmt = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT;
- break;
- case RADEON_EMIT_SE_CNTL_STATUS:
- ctx->se_cntl_status = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_SETUP;
- break;
- case RADEON_EMIT_RE_MISC:
- ctx->re_misc = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_MISC;
- break;
- case RADEON_EMIT_PP_TXFILTER_0:
- tex0->pp_txfilter = buf[i++];
- tex0->pp_txformat = buf[i++];
- tex0->pp_txoffset = buf[i++];
- tex0->pp_txcblend = buf[i++];
- tex0->pp_txablend = buf[i++];
- tex0->pp_tfactor = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_TEX0;
- break;
- case RADEON_EMIT_PP_BORDER_COLOR_0:
- tex0->pp_border_color = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_TEX0;
- break;
- case RADEON_EMIT_PP_TXFILTER_1:
- tex1->pp_txfilter = buf[i++];
- tex1->pp_txformat = buf[i++];
- tex1->pp_txoffset = buf[i++];
- tex1->pp_txcblend = buf[i++];
- tex1->pp_txablend = buf[i++];
- tex1->pp_tfactor = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_TEX1;
- break;
- case RADEON_EMIT_PP_BORDER_COLOR_1:
- tex1->pp_border_color = buf[i++];
- sarea->dirty |= RADEON_UPLOAD_TEX1;
- break;
-
- case RADEON_EMIT_SE_ZBIAS_FACTOR:
- i++;
- i++;
- break;
-
- case RADEON_EMIT_PP_TXFILTER_2:
- case RADEON_EMIT_PP_BORDER_COLOR_2:
- case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
- case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
- default:
- /* These states aren't understood by radeon drm 1.1 */
- fprintf(stderr, "Tried to emit unsupported state\n");
- return;
- }
- }
-}
-
-
-
-static void radeonCompatEmitStateLocked( radeonContextPtr rmesa )
-{
- struct radeon_state_atom *atom;
-
- if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
- return;
-
- foreach(atom, &rmesa->hw.atomlist) {
- if (rmesa->hw.all_dirty)
- atom->dirty = GL_TRUE;
- if (atom->is_tcl)
- atom->dirty = GL_FALSE;
- if (atom->dirty)
- radeonCompatEmitPacket(rmesa, atom);
- }
-
- rmesa->hw.is_dirty = GL_FALSE;
- rmesa->hw.all_dirty = GL_FALSE;
-}
-
-
-static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa,
- GLuint hw_primitive,
- GLuint nverts,
- drm_clip_rect_t *pbox,
- GLuint nbox )
-{
- int i;
-
- for ( i = 0 ; i < nbox ; ) {
- int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox );
- drm_clip_rect_t *b = rmesa->sarea->boxes;
- drm_radeon_vertex_t vtx;
-
- rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS;
- rmesa->sarea->nbox = nr - i;
-
- for ( ; i < nr ; i++)
- *b++ = pbox[i];
-
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr,
- "RadeonFlushVertexBuffer: prim %x buf %d verts %d "
- "disc %d nbox %d\n",
- hw_primitive,
- rmesa->dma.current.buf->buf->idx,
- nverts,
- nr == nbox,
- rmesa->sarea->nbox );
-
- vtx.prim = hw_primitive;
- vtx.idx = rmesa->dma.current.buf->buf->idx;
- vtx.count = nverts;
- vtx.discard = (nr == nbox);
-
- drmCommandWrite( rmesa->dri.fd,
- DRM_RADEON_VERTEX,
- &vtx, sizeof(vtx));
- }
-}
-
-
-
-/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer!
- */
-void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
- GLuint vertex_format,
- GLuint hw_primitive,
- GLuint nrverts )
-{
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- LOCK_HARDWARE( rmesa );
-
- radeonCompatEmitStateLocked( rmesa );
- rmesa->sarea->vc_format = vertex_format;
-
- if (rmesa->state.scissor.enabled) {
- radeonCompatEmitPrimitiveLocked( rmesa,
- hw_primitive,
- nrverts,
- rmesa->state.scissor.pClipRects,
- rmesa->state.scissor.numClipRects );
- }
- else {
- radeonCompatEmitPrimitiveLocked( rmesa,
- hw_primitive,
- nrverts,
- rmesa->pClipRects,
- rmesa->numClipRects );
- }
-
-
- UNLOCK_HARDWARE( rmesa );
-}
-
diff --git a/radeon/radeon_context.c b/radeon/radeon_context.c
index b67bda7..8f4485a 100644
--- a/radeon/radeon_context.c
+++ b/radeon/radeon_context.c
@@ -53,6 +53,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "drivers/common/driverfuncs.h"
+#include "radeon_common.h"
#include "radeon_context.h"
#include "radeon_ioctl.h"
#include "radeon_state.h"
@@ -61,10 +62,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_swtcl.h"
#include "radeon_tcl.h"
#include "radeon_maos.h"
+#include "radeon_queryobj.h"
+#define need_GL_ARB_occlusion_query
#define need_GL_EXT_blend_minmax
#define need_GL_EXT_fog_coord
#define need_GL_EXT_secondary_color
+#define need_GL_EXT_framebuffer_object
#include "extension_helper.h"
#define DRIVER_DATE "20061018"
@@ -72,46 +76,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "vblank.h"
#include "utils.h"
#include "xmlpool.h" /* for symbolic values of enum-type options */
-#ifndef RADEON_DEBUG
-int RADEON_DEBUG = (0);
-#endif
-
-
-/* Return various strings for glGetString().
- */
-static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- static char buffer[128];
- unsigned offset;
- GLuint agp_mode = (rmesa->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
- rmesa->radeonScreen->AGPMode;
-
- switch ( name ) {
- case GL_VENDOR:
- return (GLubyte *)"Tungsten Graphics, Inc.";
-
- case GL_RENDERER:
- offset = driGetRendererString( buffer, "Radeon", DRIVER_DATE,
- agp_mode );
-
- sprintf( & buffer[ offset ], " %sTCL",
- !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
- ? "" : "NO-" );
-
- return (GLubyte *)buffer;
-
- default:
- return NULL;
- }
-}
-
/* Extension strings exported by the R100 driver.
*/
const struct dri_extension card_extensions[] =
{
{ "GL_ARB_multitexture", NULL },
+ { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions},
{ "GL_ARB_texture_border_clamp", NULL },
{ "GL_ARB_texture_env_add", NULL },
{ "GL_ARB_texture_env_combine", NULL },
@@ -121,6 +92,7 @@ const struct dri_extension card_extensions[] =
{ "GL_EXT_blend_logic_op", NULL },
{ "GL_EXT_blend_subtract", GL_EXT_blend_minmax_functions },
{ "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+ { "GL_EXT_packed_depth_stencil", NULL},
{ "GL_EXT_secondary_color", GL_EXT_secondary_color_functions },
{ "GL_EXT_stencil_wrap", NULL },
{ "GL_EXT_texture_edge_clamp", NULL },
@@ -137,6 +109,11 @@ const struct dri_extension card_extensions[] =
{ NULL, NULL }
};
+const struct dri_extension mm_extensions[] = {
+ { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+ { NULL, NULL }
+};
+
extern const struct tnl_pipeline_stage _radeon_render_stage;
extern const struct tnl_pipeline_stage _radeon_tcl_stage;
@@ -160,47 +137,85 @@ static const struct tnl_pipeline_stage *radeon_pipeline[] = {
NULL,
};
+static void r100_get_lock(radeonContextPtr radeon)
+{
+ r100ContextPtr rmesa = (r100ContextPtr)radeon;
+ drm_radeon_sarea_t *sarea = radeon->sarea;
+ RADEON_STATECHANGE(rmesa, ctx);
+ if (rmesa->radeon.sarea->tiling_enabled) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
+ RADEON_COLOR_TILE_ENABLE;
+ } else {
+ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
+ ~RADEON_COLOR_TILE_ENABLE;
+ }
+
+ if (sarea->ctx_owner != rmesa->radeon.dri.hwContext) {
+ sarea->ctx_owner = rmesa->radeon.dri.hwContext;
+
+ if (!radeon->radeonScreen->kernel_mm)
+ radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
+ }
+}
-/* Initialize the driver's misc functions.
- */
-static void radeonInitDriverFuncs( struct dd_function_table *functions )
+static void r100_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
{
- functions->GetString = radeonGetString;
}
-static const struct dri_debug_control debug_control[] =
+static void r100_vtbl_pre_emit_state(radeonContextPtr radeon)
{
- { "fall", DEBUG_FALLBACKS },
- { "tex", DEBUG_TEXTURE },
- { "ioctl", DEBUG_IOCTL },
- { "prim", DEBUG_PRIMS },
- { "vert", DEBUG_VERTS },
- { "state", DEBUG_STATE },
- { "code", DEBUG_CODEGEN },
- { "vfmt", DEBUG_VFMT },
- { "vtxf", DEBUG_VFMT },
- { "verb", DEBUG_VERBOSE },
- { "dri", DEBUG_DRI },
- { "dma", DEBUG_DMA },
- { "san", DEBUG_SANITY },
- { "sync", DEBUG_SYNC },
- { NULL, 0 }
-};
+ r100ContextPtr rmesa = (r100ContextPtr)radeon;
+
+ /* r100 always needs to emit ZBS to avoid TCL lockups */
+ rmesa->hw.zbs.dirty = 1;
+ radeon->hw.is_dirty = 1;
+}
+
+static void r100_vtbl_free_context(GLcontext *ctx)
+{
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ _mesa_vector4f_free( &rmesa->tcl.ObjClean );
+}
+static void r100_emit_query_finish(radeonContextPtr radeon)
+{
+ BATCH_LOCALS(radeon);
+ struct radeon_query_object *query = radeon->query.current;
+
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZPASS_ADDR, 0));
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+ query->curr_offset += sizeof(uint32_t);
+ assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
+
+static void r100_init_vtbl(radeonContextPtr radeon)
+{
+ radeon->vtbl.get_lock = r100_get_lock;
+ radeon->vtbl.update_viewport_offset = radeonUpdateViewportOffset;
+ radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header;
+ radeon->vtbl.swtcl_flush = r100_swtcl_flush;
+ radeon->vtbl.pre_emit_state = r100_vtbl_pre_emit_state;
+ radeon->vtbl.fallback = radeonFallback;
+ radeon->vtbl.free_context = r100_vtbl_free_context;
+ radeon->vtbl.emit_query_finish = r100_emit_query_finish;
+}
/* Create the device specific context.
*/
GLboolean
-radeonCreateContext( const __GLcontextModes *glVisual,
+r100CreateContext( const __GLcontextModes *glVisual,
__DRIcontextPrivate *driContextPriv,
void *sharedContextPrivate)
{
__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
struct dd_function_table functions;
- radeonContextPtr rmesa;
- GLcontext *ctx, *shareCtx;
+ r100ContextPtr rmesa;
+ GLcontext *ctx;
int i;
int tcl_mode, fthrottle_mode;
@@ -209,10 +224,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
assert(screen);
/* Allocate the Radeon context */
- rmesa = (radeonContextPtr) CALLOC( sizeof(*rmesa) );
+ rmesa = (r100ContextPtr) CALLOC( sizeof(*rmesa) );
if ( !rmesa )
return GL_FALSE;
+ r100_init_vtbl(&rmesa->radeon);
+
/* init exp fog table data */
radeonInitStaticFogData();
@@ -220,12 +237,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
* Do this here so that initialMaxAnisotropy is set before we create
* the default textures.
*/
- driParseConfigFiles (&rmesa->optionCache, &screen->optionCache,
+ driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
screen->driScreen->myNum, "radeon");
- rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
+ rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
"def_max_anisotropy");
- if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
+ if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
if ( sPriv->drm_version.minor < 13 )
fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
"disabling.\n", sPriv->drm_version.minor );
@@ -240,65 +257,18 @@ radeonCreateContext( const __GLcontextModes *glVisual,
* (the texture functions are especially important)
*/
_mesa_init_driver_functions( &functions );
- radeonInitDriverFuncs( &functions );
radeonInitTextureFuncs( &functions );
+ radeonInitQueryObjFunctions(&functions);
- /* Allocate the Mesa context */
- if (sharedContextPrivate)
- shareCtx = ((radeonContextPtr) sharedContextPrivate)->glCtx;
- else
- shareCtx = NULL;
- rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
- &functions, (void *) rmesa);
- if (!rmesa->glCtx) {
- FREE(rmesa);
- return GL_FALSE;
- }
- driContextPriv->driverPrivate = rmesa;
-
- /* Init radeon context data */
- rmesa->dri.context = driContextPriv;
- rmesa->dri.screen = sPriv;
- rmesa->dri.drawable = NULL;
- rmesa->dri.readable = NULL;
- rmesa->dri.hwContext = driContextPriv->hHWContext;
- rmesa->dri.hwLock = &sPriv->pSAREA->lock;
- rmesa->dri.fd = sPriv->fd;
- rmesa->dri.drmMinor = sPriv->drm_version.minor;
-
- rmesa->radeonScreen = screen;
- rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
- screen->sarea_priv_offset);
-
-
- rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address;
-
- (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
- make_empty_list( & rmesa->swapped );
-
- rmesa->nr_heaps = screen->numTexHeaps;
- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
- rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
- screen->texSize[i],
- 12,
- RADEON_NR_TEX_REGIONS,
- (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
- & rmesa->sarea->tex_age[i],
- & rmesa->swapped,
- sizeof( radeonTexObj ),
- (destroy_texture_object_t *) radeonDestroyTexObj );
-
- driSetTextureSwapCounterLocation( rmesa->texture_heaps[i],
- & rmesa->c_textureSwaps );
+ if (!radeonInitContext(&rmesa->radeon, &functions,
+ glVisual, driContextPriv,
+ sharedContextPrivate)) {
+ FREE(rmesa);
+ return GL_FALSE;
}
- rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
- "texture_depth");
- if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
- rmesa->texture_depth = ( screen->cpp == 4 ) ?
- DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
- rmesa->swtcl.RenderIndex = ~0;
- rmesa->hw.all_dirty = GL_TRUE;
+ rmesa->radeon.swtcl.RenderIndex = ~0;
+ rmesa->radeon.hw.all_dirty = GL_TRUE;
/* Set the maximum texture size small enough that we can guarentee that
* all texture units can bind a maximal texture and have all of them in
@@ -306,26 +276,20 @@ radeonCreateContext( const __GLcontextModes *glVisual,
* setting allow larger textures.
*/
- ctx = rmesa->glCtx;
- ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache,
+ ctx = rmesa->radeon.glCtx;
+ ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
"texture_units");
ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
- i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures");
-
- driCalculateMaxTextureLevels( rmesa->texture_heaps,
- rmesa->nr_heaps,
- & ctx->Const,
- 4,
- 11, /* max 2D texture size is 2048x2048 */
- 8, /* 256^3 */
- 9, /* \todo: max cube texture size seems to be 512x512(x6) */
- 11, /* max rect texture size is 2048x2048. */
- 12,
- GL_FALSE,
- i );
+ i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
+ /* FIXME: When no memory manager is available we should set this
+ * to some reasonable value based on texture memory pool size */
+ ctx->Const.MaxTextureLevels = 12;
+ ctx->Const.Max3DTextureLevels = 9;
+ ctx->Const.MaxCubeTextureLevels = 12;
+ ctx->Const.MaxTextureRectSize = 2048;
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
@@ -390,38 +354,42 @@ radeonCreateContext( const __GLcontextModes *glVisual,
}
driInitExtensions( ctx, card_extensions, GL_TRUE );
- if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
+ if (rmesa->radeon.radeonScreen->kernel_mm)
+ driInitExtensions(ctx, mm_extensions, GL_FALSE);
+ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
_mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
- if (rmesa->glCtx->Mesa_DXTn) {
+ if (rmesa->radeon.glCtx->Mesa_DXTn) {
_mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
_mesa_enable_extension( ctx, "GL_S3_s3tc" );
}
- else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) {
+ else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
_mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
}
- if (rmesa->dri.drmMinor >= 9)
+ if (rmesa->radeon.radeonScreen->kernel_mm || rmesa->radeon.dri.drmMinor >= 9)
_mesa_enable_extension( ctx, "GL_NV_texture_rectangle");
+ if (!rmesa->radeon.radeonScreen->kernel_mm)
+ _mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
+
/* XXX these should really go right after _mesa_init_driver_functions() */
- radeonInitIoctlFuncs( ctx );
- radeonInitStateFuncs( ctx );
+ radeon_fbo_init(&rmesa->radeon);
radeonInitSpanFuncs( ctx );
+ radeonInitIoctlFuncs( ctx );
+ radeonInitStateFuncs( ctx , rmesa->radeon.radeonScreen->kernel_mm );
radeonInitState( rmesa );
radeonInitSwtcl( ctx );
_mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0,
ctx->Const.MaxArrayLockSize, 32 );
- fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode");
- rmesa->iw.irq_seq = -1;
- rmesa->irqsEmitted = 0;
- rmesa->do_irqs = (rmesa->radeonScreen->irq != 0 &&
- fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
-
- rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+ fthrottle_mode = driQueryOptioni(&rmesa->radeon.optionCache, "fthrottle_mode");
+ rmesa->radeon.iw.irq_seq = -1;
+ rmesa->radeon.irqsEmitted = 0;
+ rmesa->radeon.do_irqs = (rmesa->radeon.radeonScreen->irq != 0 &&
+ fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
- (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
+ rmesa->radeon.do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
#if DO_DEBUG
@@ -429,206 +397,21 @@ radeonCreateContext( const __GLcontextModes *glVisual,
debug_control );
#endif
- tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode");
- if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
+ tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
+ if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
fprintf(stderr, "disabling 3D acceleration\n");
FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1);
} else if (tcl_mode == DRI_CONF_TCL_SW ||
- !(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
- rmesa->radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
+ !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
fprintf(stderr, "Disabling HW TCL support\n");
}
- TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
+ TCL_FALLBACK(rmesa->radeon.glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
}
- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
/* _tnl_need_dlist_norm_lengths( ctx, GL_FALSE ); */
}
return GL_TRUE;
}
-
-
-/* Destroy the device specific context.
- */
-/* Destroy the Mesa and driver specific context data.
- */
-void radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
-{
- GET_CURRENT_CONTEXT(ctx);
- radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
- radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
-
- /* check if we're deleting the currently bound context */
- if (rmesa == current) {
- RADEON_FIREVERTICES( rmesa );
- _mesa_make_current(NULL, NULL, NULL);
- }
-
- /* Free radeon context resources */
- assert(rmesa); /* should never be null */
- if ( rmesa ) {
- GLboolean release_texture_heaps;
-
-
- release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
- _swsetup_DestroyContext( rmesa->glCtx );
- _tnl_DestroyContext( rmesa->glCtx );
- _vbo_DestroyContext( rmesa->glCtx );
- _swrast_DestroyContext( rmesa->glCtx );
-
- radeonDestroySwtcl( rmesa->glCtx );
- radeonReleaseArrays( rmesa->glCtx, ~0 );
- if (rmesa->dma.current.buf) {
- radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
- }
-
- _mesa_vector4f_free( &rmesa->tcl.ObjClean );
-
- if (rmesa->state.scissor.pClipRects) {
- FREE(rmesa->state.scissor.pClipRects);
- rmesa->state.scissor.pClipRects = NULL;
- }
-
- if ( release_texture_heaps ) {
- /* This share group is about to go away, free our private
- * texture object data.
- */
- int i;
-
- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
- driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
- rmesa->texture_heaps[ i ] = NULL;
- }
-
- assert( is_empty_list( & rmesa->swapped ) );
- }
-
- /* free the Mesa context */
- rmesa->glCtx->DriverCtx = NULL;
- _mesa_destroy_context( rmesa->glCtx );
-
- /* free the option cache */
- driDestroyOptionCache (&rmesa->optionCache);
-
- FREE( rmesa );
- }
-}
-
-
-
-
-void
-radeonSwapBuffers( __DRIdrawablePrivate *dPriv )
-{
-
- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
- radeonContextPtr rmesa;
- GLcontext *ctx;
- rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
- ctx = rmesa->glCtx;
- if (ctx->Visual.doubleBufferMode) {
- _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */
-
- if ( rmesa->doPageFlip ) {
- radeonPageFlip( dPriv );
- }
- else {
- radeonCopyBuffer( dPriv, NULL );
- }
- }
- }
- else {
- /* XXX this shouldn't be an error but we can't handle it for now */
- _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
- }
-}
-
-void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
- int x, int y, int w, int h )
-{
- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
- radeonContextPtr radeon;
- GLcontext *ctx;
-
- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
- ctx = radeon->glCtx;
-
- if (ctx->Visual.doubleBufferMode) {
- drm_clip_rect_t rect;
- rect.x1 = x + dPriv->x;
- rect.y1 = (dPriv->h - y - h) + dPriv->y;
- rect.x2 = rect.x1 + w;
- rect.y2 = rect.y1 + h;
- _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */
- radeonCopyBuffer(dPriv, &rect);
- }
- } else {
- /* XXX this shouldn't be an error but we can't handle it for now */
- _mesa_problem(NULL, "%s: drawable has no context!",
- __FUNCTION__);
- }
-}
-
-/* Make context `c' the current context and bind it to the given
- * drawing and reading surfaces.
- */
-GLboolean
-radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
- __DRIdrawablePrivate *driDrawPriv,
- __DRIdrawablePrivate *driReadPriv )
-{
- if ( driContextPriv ) {
- radeonContextPtr newCtx =
- (radeonContextPtr) driContextPriv->driverPrivate;
-
- if (RADEON_DEBUG & DEBUG_DRI)
- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) newCtx->glCtx);
-
- newCtx->dri.readable = driReadPriv;
-
- if ( (newCtx->dri.drawable != driDrawPriv) ||
- newCtx->lastStamp != driDrawPriv->lastStamp ) {
- if (driDrawPriv->swap_interval == (unsigned)-1) {
- driDrawPriv->vblFlags = (newCtx->radeonScreen->irq != 0)
- ? driGetDefaultVBlankFlags(&newCtx->optionCache)
- : VBLANK_FLAG_NO_IRQ;
-
- driDrawableInitVBlank( driDrawPriv );
- }
-
- newCtx->dri.drawable = driDrawPriv;
-
- radeonSetCliprects(newCtx);
- radeonUpdateViewportOffset( newCtx->glCtx );
- }
-
- _mesa_make_current( newCtx->glCtx,
- (GLframebuffer *) driDrawPriv->driverPrivate,
- (GLframebuffer *) driReadPriv->driverPrivate );
-
- _mesa_update_state( newCtx->glCtx );
- } else {
- if (RADEON_DEBUG & DEBUG_DRI)
- fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
- _mesa_make_current( NULL, NULL, NULL );
- }
-
- if (RADEON_DEBUG & DEBUG_DRI)
- fprintf(stderr, "End %s\n", __FUNCTION__);
- return GL_TRUE;
-}
-
-/* Force the context `c' to be unbound from its buffer.
- */
-GLboolean
-radeonUnbindContext( __DRIcontextPrivate *driContextPriv )
-{
- radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
-
- if (RADEON_DEBUG & DEBUG_DRI)
- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) rmesa->glCtx);
-
- return GL_TRUE;
-}
diff --git a/radeon/radeon_context.h b/radeon/radeon_context.h
index 53df766..4e2c52c 100644
--- a/radeon/radeon_context.h
+++ b/radeon/radeon_context.h
@@ -48,91 +48,23 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "drm.h"
#include "radeon_drm.h"
#include "texmem.h"
-
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/colormac.h"
-
-struct radeon_context;
-typedef struct radeon_context radeonContextRec;
-typedef struct radeon_context *radeonContextPtr;
-
-/* This union is used to avoid warnings/miscompilation
- with float to uint32_t casts due to strict-aliasing */
-typedef union {
- GLfloat f;
- uint32_t ui32;
-} float_ui32_type;
-
-#include "radeon_lock.h"
#include "radeon_screen.h"
-#include "main/mm.h"
-
-#include "math/m_vector.h"
-
-#define TEX_0 0x1
-#define TEX_1 0x2
-#define TEX_2 0x4
-#define TEX_ALL 0x7
-
-/* Rasterizing fallbacks */
-/* See correponding strings in r200_swtcl.c */
-#define RADEON_FALLBACK_TEXTURE 0x0001
-#define RADEON_FALLBACK_DRAW_BUFFER 0x0002
-#define RADEON_FALLBACK_STENCIL 0x0004
-#define RADEON_FALLBACK_RENDER_MODE 0x0008
-#define RADEON_FALLBACK_BLEND_EQ 0x0010
-#define RADEON_FALLBACK_BLEND_FUNC 0x0020
-#define RADEON_FALLBACK_DISABLE 0x0040
-#define RADEON_FALLBACK_BORDER_MODE 0x0080
-
-/* The blit width for texture uploads
- */
-#define BLIT_WIDTH_BYTES 1024
-/* Use the templated vertex format:
- */
-#define COLOR_IS_RGBA
-#define TAG(x) radeon##x
-#include "tnl_dd/t_dd_vertex.h"
-#undef TAG
-
-typedef void (*radeon_tri_func) (radeonContextPtr,
- radeonVertex *,
- radeonVertex *, radeonVertex *);
-
-typedef void (*radeon_line_func) (radeonContextPtr,
- radeonVertex *, radeonVertex *);
+#include "radeon_common.h"
-typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
-
-struct radeon_colorbuffer_state {
- GLuint clear;
- int roundEnable;
-};
-struct radeon_depthbuffer_state {
- GLuint clear;
- GLfloat scale;
-};
+struct r100_context;
+typedef struct r100_context r100ContextRec;
+typedef struct r100_context *r100ContextPtr;
-struct radeon_scissor_state {
- drm_clip_rect_t rect;
- GLboolean enabled;
+#include "radeon_lock.h"
- GLuint numClipRects; /* Cliprects active */
- GLuint numAllocedClipRects; /* Cliprects available */
- drm_clip_rect_t *pClipRects;
-};
-struct radeon_stencilbuffer_state {
- GLboolean hwBuffer;
- GLuint clear; /* rb3d_stencilrefmask value */
-};
-struct radeon_stipple_state {
- GLuint mask[32];
-};
+#define R100_TEX_ALL 0x7
/* used for both tcl_vtx and vc_frmt tex bits (they are identical) */
#define RADEON_ST_BIT(unit) \
@@ -141,42 +73,6 @@ struct radeon_stipple_state {
#define RADEON_Q_BIT(unit) \
(unit == 0 ? RADEON_CP_VC_FRMT_Q0 : (RADEON_CP_VC_FRMT_Q1 >> 2) << (2 * unit))
-typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
-
-/* Texture object in locally shared texture space.
- */
-struct radeon_tex_obj {
- driTextureObject base;
-
- GLuint bufAddr; /* Offset to start of locally
- shared texture block */
-
- GLuint dirty_state; /* Flags (1 per texunit) for
- whether or not this texobj
- has dirty hardware state
- (pp_*) that needs to be
- brought into the
- texunit. */
-
- drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
- /* Six, for the cube faces */
-
- GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
-
- GLuint pp_txfilter; /* hardware register values */
- GLuint pp_txformat;
- GLuint pp_txoffset; /* Image location in texmem.
- All cube faces follow. */
- GLuint pp_txsize; /* npot only */
- GLuint pp_txpitch; /* npot only */
- GLuint pp_border_color;
- GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */
-
- GLboolean border_fallback;
-
- GLuint tile_bits; /* hw texture tile bits used on this texture */
-};
-
struct radeon_texture_env_state {
radeonTexObjPtr texobj;
GLenum format;
@@ -187,17 +83,6 @@ struct radeon_texture_state {
struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS];
};
-struct radeon_state_atom {
- struct radeon_state_atom *next, *prev;
- const char *name; /* for debug */
- int cmd_size; /* size in bytes */
- GLuint is_tcl;
- int *cmd; /* one or more cmd's */
- int *lastcmd; /* one or more cmd's */
- GLboolean dirty; /* dirty-mark in emit_state_list */
- GLboolean(*check) (GLcontext *); /* is this state active? */
-};
-
/* Trying to keep these relatively short as the variables are becoming
* extravagently long. Drop the driver name prefix off the front of
* everything - I think we know which driver we're in by now, and keep the
@@ -410,10 +295,16 @@ struct radeon_state_atom {
#define SHN_SHININESS 1
#define SHN_STATE_SIZE 2
-struct radeon_hw_state {
- /* Head of the linked list of state atoms. */
- struct radeon_state_atom atomlist;
+#define R100_QUERYOBJ_CMD_0 0
+#define R100_QUERYOBJ_DATA_0 1
+#define R100_QUERYOBJ_CMDSIZE 2
+
+#define STP_CMD_0 0
+#define STP_DATA_0 1
+#define STP_CMD_1 2
+#define STP_STATE_SIZE 35
+struct r100_hw_state {
/* Hardware state, stored as cmdbuf commands:
* -- Need to doublebuffer for
* - eliding noop statechange loops? (except line stipple count)
@@ -437,90 +328,19 @@ struct radeon_hw_state {
struct radeon_state_atom fog;
struct radeon_state_atom glt;
struct radeon_state_atom txr[3]; /* for NPOT */
-
- int max_state_size; /* Number of bytes necessary for a full state emit. */
- GLboolean is_dirty, all_dirty;
-};
-
-struct radeon_state {
- /* Derived state for internal purposes:
- */
- struct radeon_colorbuffer_state color;
- struct radeon_depthbuffer_state depth;
- struct radeon_scissor_state scissor;
- struct radeon_stencilbuffer_state stencil;
- struct radeon_stipple_state stipple;
- struct radeon_texture_state texture;
-};
-
-/* Need refcounting on dma buffers:
- */
-struct radeon_dma_buffer {
- int refcount; /* the number of retained regions in buf */
- drmBufPtr buf;
-};
-
-#define GET_START(rvb) (rmesa->radeonScreen->gart_buffer_offset + \
- (rvb)->address - rmesa->dma.buf0_address + \
- (rvb)->start)
-
-/* A retained region, eg vertices for indexed vertices.
- */
-struct radeon_dma_region {
- struct radeon_dma_buffer *buf;
- char *address; /* == buf->address */
- int start, end, ptr; /* offsets from start of buf */
- int aos_start;
- int aos_stride;
- int aos_size;
-};
-
-struct radeon_dma {
- /* Active dma region. Allocations for vertices and retained
- * regions come from here. Also used for emitting random vertices,
- * these may be flushed by calling flush_current();
- */
- struct radeon_dma_region current;
-
- void (*flush) (radeonContextPtr);
-
- char *buf0_address; /* start of buf[0], for index calcs */
- GLuint nr_released_bufs; /* flush after so many buffers released */
+ struct radeon_state_atom stp;
};
-struct radeon_dri_mirror {
- __DRIcontextPrivate *context; /* DRI context */
- __DRIscreenPrivate *screen; /* DRI screen */
-
- /**
- * DRI drawable bound to this context for drawing.
- */
- __DRIdrawablePrivate *drawable;
- /**
- * DRI drawable bound to this context for reading.
- */
- __DRIdrawablePrivate *readable;
-
- drm_context_t hwContext;
- drm_hw_lock_t *hwLock;
- int fd;
- int drmMinor;
+struct r100_state {
+ struct radeon_texture_state texture;
};
#define RADEON_CMD_BUF_SZ (8*1024)
-
-struct radeon_store {
- GLuint statenr;
- GLuint primnr;
- char cmd_buf[RADEON_CMD_BUF_SZ];
- int cmd_used;
- int elts_start;
-};
-
+#define R200_ELT_BUF_SZ (8*1024)
/* radeon_tcl.c
*/
-struct radeon_tcl_info {
+struct r100_tcl_info {
GLuint vertex_format;
GLuint hw_primitive;
@@ -529,30 +349,18 @@ struct radeon_tcl_info {
*/
GLvector4f ObjClean;
- struct radeon_dma_region *aos_components[8];
- GLuint nr_aos_components;
-
GLuint *Elts;
- struct radeon_dma_region indexed_verts;
- struct radeon_dma_region obj;
- struct radeon_dma_region rgba;
- struct radeon_dma_region spec;
- struct radeon_dma_region fog;
- struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS];
- struct radeon_dma_region norm;
+ int elt_cmd_offset;
+ int elt_cmd_start;
+ int elt_used;
};
/* radeon_swtcl.c
*/
-struct radeon_swtcl_info {
- GLuint RenderIndex;
- GLuint vertex_size;
+struct r100_swtcl_info {
GLuint vertex_format;
- struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
- GLuint vertex_attr_count;
-
GLubyte *verts;
/* Fallback rasterization functions
@@ -561,10 +369,6 @@ struct radeon_swtcl_info {
radeon_line_func draw_line;
radeon_tri_func draw_tri;
- GLuint hw_primitive;
- GLenum render_primitive;
- GLuint numverts;
-
/**
* Offset of the 4UB color data within a hardware (swtcl) vertex.
*/
@@ -576,22 +380,9 @@ struct radeon_swtcl_info {
GLuint specoffset;
GLboolean needproj;
-
- struct radeon_dma_region indexed_verts;
};
-struct radeon_ioctl {
- GLuint vertex_offset;
- GLuint vertex_size;
-};
-#define RADEON_MAX_PRIMS 64
-
-struct radeon_prim {
- GLuint start;
- GLuint end;
- GLuint prim;
-};
/* A maximum total of 20 elements per vertex: 3 floats for position, 3
* floats for normal, 4 floats for color, 4 bytes for secondary color,
@@ -602,59 +393,18 @@ struct radeon_prim {
*/
#define RADEON_MAX_VERTEX_SIZE 20
-struct radeon_context {
- GLcontext *glCtx; /* Mesa context */
+struct r100_context {
+ struct radeon_context radeon;
/* Driver and hardware state management
*/
- struct radeon_hw_state hw;
- struct radeon_state state;
-
- /* Texture object bookkeeping
- */
- unsigned nr_heaps;
- driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
- driTextureObject swapped;
- int texture_depth;
- float initialMaxAnisotropy;
-
- /* Rasterization and vertex state:
- */
- GLuint TclFallback;
- GLuint Fallback;
- GLuint NewGLState;
- DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */
+ struct r100_hw_state hw;
+ struct r100_state state;
/* Vertex buffers
*/
struct radeon_ioctl ioctl;
- struct radeon_dma dma;
struct radeon_store store;
- /* A full state emit as of the first state emit in the main store, in case
- * the context is lost.
- */
- struct radeon_store backup_store;
-
- /* Page flipping
- */
- GLuint doPageFlip;
-
- /* Busy waiting
- */
- GLuint do_usleeps;
- GLuint do_irqs;
- GLuint irqsEmitted;
- drm_radeon_irq_wait_t iw;
-
- /* Drawable, cliprect and scissor information
- */
- GLuint numClipRects; /* Cliprects for the draw buffer */
- drm_clip_rect_t *pClipRects;
- unsigned int lastStamp;
- GLboolean lost_context;
- GLboolean save_on_next_emit;
- radeonScreenPtr radeonScreen; /* Screen private DRI data */
- drm_radeon_sarea_t *sarea; /* Private SAREA data */
/* TCL stuff
*/
@@ -667,29 +417,13 @@ struct radeon_context {
GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS];
GLuint last_ReallyEnabled;
- /* VBI
- */
- int64_t swap_ust;
- int64_t swap_missed_ust;
-
- GLuint swap_count;
- GLuint swap_missed_count;
-
/* radeon_tcl.c
*/
- struct radeon_tcl_info tcl;
+ struct r100_tcl_info tcl;
/* radeon_swtcl.c
*/
- struct radeon_swtcl_info swtcl;
-
- /* Mirrors of some DRI state
- */
- struct radeon_dri_mirror dri;
-
- /* Configuration cache
- */
- driOptionCache optionCache;
+ struct r100_swtcl_info swtcl;
GLboolean using_hyperz;
GLboolean texmicrotile;
@@ -703,61 +437,19 @@ struct radeon_context {
GLuint c_textureSwaps;
GLuint c_textureBytes;
GLuint c_vertexBuffers;
+
};
-#define RADEON_CONTEXT(ctx) ((radeonContextPtr)(ctx->DriverCtx))
-
-static INLINE GLuint radeonPackColor(GLuint cpp,
- GLubyte r, GLubyte g,
- GLubyte b, GLubyte a)
-{
- switch (cpp) {
- case 2:
- return PACK_COLOR_565(r, g, b);
- case 4:
- return PACK_COLOR_8888(a, r, g, b);
- default:
- return 0;
- }
-}
+
+#define R100_CONTEXT(ctx) ((r100ContextPtr)(ctx->DriverCtx))
+
#define RADEON_OLD_PACKETS 1
-extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv);
-extern GLboolean radeonCreateContext(const __GLcontextModes * glVisual,
- __DRIcontextPrivate * driContextPriv,
- void *sharedContextPrivate);
-extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
-extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
- int x, int y, int w, int h);
-extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
- __DRIdrawablePrivate * driDrawPriv,
- __DRIdrawablePrivate * driReadPriv);
-extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
-
-/* ================================================================
- * Debugging:
- */
-#define DO_DEBUG 1
-
-#if DO_DEBUG
-extern int RADEON_DEBUG;
-#else
-#define RADEON_DEBUG 0
-#endif
-
-#define DEBUG_TEXTURE 0x0001
-#define DEBUG_STATE 0x0002
-#define DEBUG_IOCTL 0x0004
-#define DEBUG_PRIMS 0x0008
-#define DEBUG_VERTS 0x0010
-#define DEBUG_FALLBACKS 0x0020
-#define DEBUG_VFMT 0x0040
-#define DEBUG_CODEGEN 0x0080
-#define DEBUG_VERBOSE 0x0100
-#define DEBUG_DRI 0x0200
-#define DEBUG_DMA 0x0400
-#define DEBUG_SANITY 0x0800
-#define DEBUG_SYNC 0x1000
+extern GLboolean r100CreateContext( const __GLcontextModes *glVisual,
+ __DRIcontextPrivate *driContextPriv,
+ void *sharedContextPrivate);
+
+
#endif /* __RADEON_CONTEXT_H__ */
diff --git a/radeon/radeon_cs_drm.h b/radeon/radeon_cs_drm.h
new file mode 100644
index 0000000..ab4eca3
--- /dev/null
+++ b/radeon/radeon_cs_drm.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Aapo Tahkola <aet@rasterburn.org>
+ * Nicolai Haehnle <prefect_@gmx.net>
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_CS_H
+#define RADEON_CS_H
+
+#include <stdint.h>
+#include <string.h>
+#include "drm.h"
+#include "radeon_drm.h"
+
+struct radeon_cs_reloc {
+ struct radeon_bo *bo;
+ uint32_t read_domain;
+ uint32_t write_domain;
+ uint32_t flags;
+};
+
+
+#define RADEON_CS_SPACE_OK 0
+#define RADEON_CS_SPACE_OP_TO_BIG 1
+#define RADEON_CS_SPACE_FLUSH 2
+
+struct radeon_cs_space_check {
+ struct radeon_bo *bo;
+ uint32_t read_domains;
+ uint32_t write_domain;
+ uint32_t new_accounted;
+};
+
+#define MAX_SPACE_BOS (32)
+
+struct radeon_cs_manager;
+
+struct radeon_cs {
+ struct radeon_cs_manager *csm;
+ void *relocs;
+ uint32_t *packets;
+ unsigned crelocs;
+ unsigned relocs_total_size;
+ unsigned cdw;
+ unsigned ndw;
+ int section;
+ unsigned section_ndw;
+ unsigned section_cdw;
+ const char *section_file;
+ const char *section_func;
+ int section_line;
+ struct radeon_cs_space_check bos[MAX_SPACE_BOS];
+ int bo_count;
+ void (*space_flush_fn)(void *);
+ void *space_flush_data;
+};
+
+/* cs functions */
+struct radeon_cs_funcs {
+ struct radeon_cs *(*cs_create)(struct radeon_cs_manager *csm,
+ uint32_t ndw);
+ int (*cs_write_reloc)(struct radeon_cs *cs,
+ struct radeon_bo *bo,
+ uint32_t read_domain,
+ uint32_t write_domain,
+ uint32_t flags);
+ int (*cs_begin)(struct radeon_cs *cs,
+ uint32_t ndw,
+ const char *file,
+ const char *func,
+ int line);
+ int (*cs_end)(struct radeon_cs *cs,
+ const char *file,
+ const char *func,
+ int line);
+ int (*cs_emit)(struct radeon_cs *cs);
+ int (*cs_destroy)(struct radeon_cs *cs);
+ int (*cs_erase)(struct radeon_cs *cs);
+ int (*cs_need_flush)(struct radeon_cs *cs);
+ void (*cs_print)(struct radeon_cs *cs, FILE *file);
+};
+
+struct radeon_cs_manager {
+ struct radeon_cs_funcs *funcs;
+ int fd;
+ int32_t vram_limit, gart_limit;
+ int32_t vram_write_used, gart_write_used;
+ int32_t read_used;
+};
+
+static inline struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
+ uint32_t ndw)
+{
+ return csm->funcs->cs_create(csm, ndw);
+}
+
+static inline int radeon_cs_write_reloc(struct radeon_cs *cs,
+ struct radeon_bo *bo,
+ uint32_t read_domain,
+ uint32_t write_domain,
+ uint32_t flags)
+{
+ return cs->csm->funcs->cs_write_reloc(cs,
+ bo,
+ read_domain,
+ write_domain,
+ flags);
+}
+
+static inline int radeon_cs_begin(struct radeon_cs *cs,
+ uint32_t ndw,
+ const char *file,
+ const char *func,
+ int line)
+{
+ return cs->csm->funcs->cs_begin(cs, ndw, file, func, line);
+}
+
+static inline int radeon_cs_end(struct radeon_cs *cs,
+ const char *file,
+ const char *func,
+ int line)
+{
+ return cs->csm->funcs->cs_end(cs, file, func, line);
+}
+
+static inline int radeon_cs_emit(struct radeon_cs *cs)
+{
+ return cs->csm->funcs->cs_emit(cs);
+}
+
+static inline int radeon_cs_destroy(struct radeon_cs *cs)
+{
+ return cs->csm->funcs->cs_destroy(cs);
+}
+
+static inline int radeon_cs_erase(struct radeon_cs *cs)
+{
+ return cs->csm->funcs->cs_erase(cs);
+}
+
+static inline int radeon_cs_need_flush(struct radeon_cs *cs)
+{
+ return cs->csm->funcs->cs_need_flush(cs);
+}
+
+static inline void radeon_cs_print(struct radeon_cs *cs, FILE *file)
+{
+ cs->csm->funcs->cs_print(cs, file);
+}
+
+static inline void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit)
+{
+
+ if (domain == RADEON_GEM_DOMAIN_VRAM)
+ cs->csm->vram_limit = limit;
+ else
+ cs->csm->gart_limit = limit;
+}
+
+static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword)
+{
+ cs->packets[cs->cdw++] = dword;
+ if (cs->section) {
+ cs->section_cdw++;
+ }
+}
+
+static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword)
+{
+
+ memcpy(cs->packets + cs->cdw, &qword, sizeof(qword));
+ cs->cdw+=2;
+ if (cs->section) {
+ cs->section_cdw+=2;
+ }
+}
+
+static inline void radeon_cs_write_table(struct radeon_cs *cs, void *data, uint32_t size)
+{
+ memcpy(cs->packets + cs->cdw, data, size * 4);
+ cs->cdw += size;
+ if (cs->section) {
+ cs->section_cdw += size;
+ }
+}
+
+static inline void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data)
+{
+ cs->space_flush_fn = fn;
+ cs->space_flush_data = data;
+}
+
+
+/*
+ * add a persistent BO to the list
+ * a persistent BO is one that will be referenced across flushes,
+ * i.e. colorbuffer, textures etc.
+ * They get reset when a new "operation" happens, where an operation
+ * is a state emission with a color/textures etc followed by a bunch of vertices.
+ */
+void radeon_cs_space_add_persistent_bo(struct radeon_cs *cs,
+ struct radeon_bo *bo,
+ uint32_t read_domains,
+ uint32_t write_domain);
+
+/* reset the persistent BO list */
+void radeon_cs_space_reset_bos(struct radeon_cs *cs);
+
+/* do a space check with the current persistent BO list */
+int radeon_cs_space_check(struct radeon_cs *cs);
+
+/* do a space check with the current persistent BO list and a temporary BO
+ * a temporary BO is like a DMA buffer, which gets flushed with the
+ * command buffer */
+int radeon_cs_space_check_with_bo(struct radeon_cs *cs,
+ struct radeon_bo *bo,
+ uint32_t read_domains,
+ uint32_t write_domain);
+
+#endif
diff --git a/radeon/radeon_cs_legacy.c b/radeon/radeon_cs_legacy.c
new file mode 100644
index 0000000..f1addb2
--- /dev/null
+++ b/radeon/radeon_cs_legacy.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Aapo Tahkola <aet@rasterburn.org>
+ * Nicolai Haehnle <prefect_@gmx.net>
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#include <errno.h>
+
+#include "radeon_bocs_wrapper.h"
+#include "radeon_common.h"
+
+struct cs_manager_legacy {
+ struct radeon_cs_manager base;
+ struct radeon_context *ctx;
+ /* hack for scratch stuff */
+ uint32_t pending_age;
+ uint32_t pending_count;
+
+
+};
+
+struct cs_reloc_legacy {
+ struct radeon_cs_reloc base;
+ uint32_t cindices;
+ uint32_t *indices;
+};
+
+
+static struct radeon_cs *cs_create(struct radeon_cs_manager *csm,
+ uint32_t ndw)
+{
+ struct radeon_cs *cs;
+
+ cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
+ if (cs == NULL) {
+ return NULL;
+ }
+ cs->csm = csm;
+ cs->ndw = (ndw + 0x3FF) & (~0x3FF);
+ cs->packets = (uint32_t*)malloc(4*cs->ndw);
+ if (cs->packets == NULL) {
+ free(cs);
+ return NULL;
+ }
+ cs->relocs_total_size = 0;
+ return cs;
+}
+
+static int cs_write_reloc(struct radeon_cs *cs,
+ struct radeon_bo *bo,
+ uint32_t read_domain,
+ uint32_t write_domain,
+ uint32_t flags)
+{
+ struct cs_reloc_legacy *relocs;
+ int i;
+
+ relocs = (struct cs_reloc_legacy *)cs->relocs;
+ /* check domains */
+ if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
+ /* in one CS a bo can only be in read or write domain but not
+ * in read & write domain at the same sime
+ */
+ return -EINVAL;
+ }
+ if (read_domain == RADEON_GEM_DOMAIN_CPU) {
+ return -EINVAL;
+ }
+ if (write_domain == RADEON_GEM_DOMAIN_CPU) {
+ return -EINVAL;
+ }
+ /* check if bo is already referenced */
+ for(i = 0; i < cs->crelocs; i++) {
+ uint32_t *indices;
+
+ if (relocs[i].base.bo->handle == bo->handle) {
+ /* Check domains must be in read or write. As we check already
+ * checked that in argument one of the read or write domain was
+ * set we only need to check that if previous reloc as the read
+ * domain set then the read_domain should also be set for this
+ * new relocation.
+ */
+ if (relocs[i].base.read_domain && !read_domain) {
+ return -EINVAL;
+ }
+ if (relocs[i].base.write_domain && !write_domain) {
+ return -EINVAL;
+ }
+ relocs[i].base.read_domain |= read_domain;
+ relocs[i].base.write_domain |= write_domain;
+ /* save indice */
+ relocs[i].cindices++;
+ indices = (uint32_t*)realloc(relocs[i].indices,
+ relocs[i].cindices * 4);
+ if (indices == NULL) {
+ relocs[i].cindices -= 1;
+ return -ENOMEM;
+ }
+ relocs[i].indices = indices;
+ relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1;
+ return 0;
+ }
+ }
+ /* add bo to reloc */
+ relocs = (struct cs_reloc_legacy*)
+ realloc(cs->relocs,
+ sizeof(struct cs_reloc_legacy) * (cs->crelocs + 1));
+ if (relocs == NULL) {
+ return -ENOMEM;
+ }
+ cs->relocs = relocs;
+ relocs[cs->crelocs].base.bo = bo;
+ relocs[cs->crelocs].base.read_domain = read_domain;
+ relocs[cs->crelocs].base.write_domain = write_domain;
+ relocs[cs->crelocs].base.flags = flags;
+ relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
+ if (relocs[cs->crelocs].indices == NULL) {
+ return -ENOMEM;
+ }
+ relocs[cs->crelocs].indices[0] = cs->cdw - 1;
+ relocs[cs->crelocs].cindices = 1;
+ cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
+ cs->crelocs++;
+ radeon_bo_ref(bo);
+ return 0;
+}
+
+static int cs_begin(struct radeon_cs *cs,
+ uint32_t ndw,
+ const char *file,
+ const char *func,
+ int line)
+{
+ if (cs->section) {
+ fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
+ cs->section_file, cs->section_func, cs->section_line);
+ fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
+ file, func, line);
+ return -EPIPE;
+ }
+ cs->section = 1;
+ cs->section_ndw = ndw;
+ cs->section_cdw = 0;
+ cs->section_file = file;
+ cs->section_func = func;
+ cs->section_line = line;
+
+
+ if (cs->cdw + ndw > cs->ndw) {
+ uint32_t tmp, *ptr;
+ int num = (ndw > 0x3FF) ? ndw : 0x3FF;
+
+ tmp = (cs->cdw + 1 + num) & (~num);
+ ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+ cs->packets = ptr;
+ cs->ndw = tmp;
+ }
+
+ return 0;
+}
+
+static int cs_end(struct radeon_cs *cs,
+ const char *file,
+ const char *func,
+ int line)
+
+{
+ if (!cs->section) {
+ fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
+ file, func, line);
+ return -EPIPE;
+ }
+ cs->section = 0;
+ if (cs->section_ndw != cs->section_cdw) {
+ fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
+ cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
+ fprintf(stderr, "CS section end at (%s,%s,%d)\n",
+ file, func, line);
+ return -EPIPE;
+ }
+ return 0;
+}
+
+static int cs_process_relocs(struct radeon_cs *cs)
+{
+ struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
+ struct cs_reloc_legacy *relocs;
+ int i, j, r;
+
+ csm = (struct cs_manager_legacy*)cs->csm;
+ relocs = (struct cs_reloc_legacy *)cs->relocs;
+restart:
+ for (i = 0; i < cs->crelocs; i++)
+ {
+ for (j = 0; j < relocs[i].cindices; j++)
+ {
+ uint32_t soffset, eoffset;
+
+ r = radeon_bo_legacy_validate(relocs[i].base.bo,
+ &soffset, &eoffset);
+ if (r == -EAGAIN)
+ {
+ goto restart;
+ }
+ if (r)
+ {
+ fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
+ relocs[i].base.bo, soffset, eoffset);
+ return r;
+ }
+ cs->packets[relocs[i].indices[j]] += soffset;
+ if (cs->packets[relocs[i].indices[j]] >= eoffset)
+ {
+ /* radeon_bo_debug(relocs[i].base.bo, 12); */
+ fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
+ relocs[i].base.bo, soffset, eoffset);
+ fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
+ relocs[i].base.bo,
+ cs->packets[relocs[i].indices[j]],
+ eoffset);
+ exit(0);
+ return -EINVAL;
+ }
+ }
+ }
+ return 0;
+}
+
+static int cs_set_age(struct radeon_cs *cs)
+{
+ struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
+ struct cs_reloc_legacy *relocs;
+ int i;
+
+ relocs = (struct cs_reloc_legacy *)cs->relocs;
+ for (i = 0; i < cs->crelocs; i++) {
+ radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
+ radeon_bo_unref(relocs[i].base.bo);
+ }
+ return 0;
+}
+
+static int cs_emit(struct radeon_cs *cs)
+{
+ struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
+ drm_radeon_cmd_buffer_t cmd;
+ drm_r300_cmd_header_t age;
+ uint64_t ull;
+ int r;
+
+ csm->ctx->vtbl.emit_cs_header(cs, csm->ctx);
+
+ /* append buffer age */
+ if ( IS_R300_CLASS(csm->ctx->radeonScreen) )
+ {
+ age.scratch.cmd_type = R300_CMD_SCRATCH;
+ /* Scratch register 2 corresponds to what radeonGetAge polls */
+ csm->pending_age = 0;
+ csm->pending_count = 1;
+ ull = (uint64_t) (intptr_t) &csm->pending_age;
+ age.scratch.reg = 2;
+ age.scratch.n_bufs = 1;
+ age.scratch.flags = 0;
+ radeon_cs_write_dword(cs, age.u);
+ radeon_cs_write_qword(cs, ull);
+ radeon_cs_write_dword(cs, 0);
+ }
+
+ r = cs_process_relocs(cs);
+ if (r) {
+ return 0;
+ }
+
+ cmd.buf = (char *)cs->packets;
+ cmd.bufsz = cs->cdw * 4;
+ if (csm->ctx->state.scissor.enabled) {
+ cmd.nbox = csm->ctx->state.scissor.numClipRects;
+ cmd.boxes = (drm_clip_rect_t *) csm->ctx->state.scissor.pClipRects;
+ } else {
+ cmd.nbox = csm->ctx->numClipRects;
+ cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects;
+ }
+
+ //dump_cmdbuf(cs);
+
+ r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
+ if (r) {
+ return r;
+ }
+ if ((!IS_R300_CLASS(csm->ctx->radeonScreen)) &&
+ (!IS_R600_CLASS(csm->ctx->radeonScreen))) { /* +r6/r7 : No irq for r6/r7 yet. */
+ drm_radeon_irq_emit_t emit_cmd;
+ emit_cmd.irq_seq = (int*)&csm->pending_age;
+ r = drmCommandWrite(cs->csm->fd, DRM_RADEON_IRQ_EMIT, &emit_cmd, sizeof(emit_cmd));
+ if (r) {
+ return r;
+ }
+ }
+ cs_set_age(cs);
+
+ cs->csm->read_used = 0;
+ cs->csm->vram_write_used = 0;
+ cs->csm->gart_write_used = 0;
+ return 0;
+}
+
+static void inline cs_free_reloc(void *relocs_p, int crelocs)
+{
+ struct cs_reloc_legacy *relocs = relocs_p;
+ int i;
+ if (!relocs_p)
+ return;
+ for (i = 0; i < crelocs; i++)
+ free(relocs[i].indices);
+}
+
+static int cs_destroy(struct radeon_cs *cs)
+{
+ cs_free_reloc(cs->relocs, cs->crelocs);
+ free(cs->relocs);
+ free(cs->packets);
+ free(cs);
+ return 0;
+}
+
+static int cs_erase(struct radeon_cs *cs)
+{
+ cs_free_reloc(cs->relocs, cs->crelocs);
+ free(cs->relocs);
+ cs->relocs_total_size = 0;
+ cs->relocs = NULL;
+ cs->crelocs = 0;
+ cs->cdw = 0;
+ cs->section = 0;
+ return 0;
+}
+
+static int cs_need_flush(struct radeon_cs *cs)
+{
+ /* this function used to flush when the BO usage got to
+ * a certain size, now the higher levels handle this better */
+ return 0;
+}
+
+static void cs_print(struct radeon_cs *cs, FILE *file)
+{
+}
+
+static struct radeon_cs_funcs radeon_cs_legacy_funcs = {
+ cs_create,
+ cs_write_reloc,
+ cs_begin,
+ cs_end,
+ cs_emit,
+ cs_destroy,
+ cs_erase,
+ cs_need_flush,
+ cs_print,
+};
+
+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
+{
+ struct cs_manager_legacy *csm;
+
+ csm = (struct cs_manager_legacy*)
+ calloc(1, sizeof(struct cs_manager_legacy));
+ if (csm == NULL) {
+ return NULL;
+ }
+ csm->base.funcs = &radeon_cs_legacy_funcs;
+ csm->base.fd = ctx->dri.fd;
+ csm->ctx = ctx;
+ csm->pending_age = 1;
+ return (struct radeon_cs_manager*)csm;
+}
+
+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm)
+{
+ free(csm);
+}
+
diff --git a/radeon/radeon_cs_legacy.h b/radeon/radeon_cs_legacy.h
new file mode 100644
index 0000000..cafbc9e
--- /dev/null
+++ b/radeon/radeon_cs_legacy.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Aapo Tahkola <aet@rasterburn.org>
+ * Nicolai Haehnle <prefect_@gmx.net>
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_CS_LEGACY_H
+#define RADEON_CS_LEGACY_H
+
+struct radeon_context;
+
+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm);
+
+#endif
diff --git a/radeon/radeon_cs_space_drm.c b/radeon/radeon_cs_space_drm.c
new file mode 100644
index 0000000..89cbbb5
--- /dev/null
+++ b/radeon/radeon_cs_space_drm.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright © 2009 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include "radeon_bocs_wrapper.h"
+
+struct rad_sizes {
+ int32_t op_read;
+ int32_t op_gart_write;
+ int32_t op_vram_write;
+};
+
+static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct rad_sizes *sizes)
+{
+ uint32_t read_domains, write_domain;
+ struct radeon_bo *bo;
+
+ bo = sc->bo;
+ sc->new_accounted = 0;
+ read_domains = sc->read_domains;
+ write_domain = sc->write_domain;
+
+ /* legacy needs a static check */
+ if (radeon_bo_is_static(bo)) {
+ bo->space_accounted = sc->new_accounted = (read_domains << 16) | write_domain;
+ return 0;
+ }
+
+ /* already accounted this bo */
+ if (write_domain && (write_domain == bo->space_accounted)) {
+ sc->new_accounted = bo->space_accounted;
+ return 0;
+ }
+ if (read_domains && ((read_domains << 16) == bo->space_accounted)) {
+ sc->new_accounted = bo->space_accounted;
+ return 0;
+ }
+
+ if (bo->space_accounted == 0) {
+ if (write_domain == RADEON_GEM_DOMAIN_VRAM)
+ sizes->op_vram_write += bo->size;
+ else if (write_domain == RADEON_GEM_DOMAIN_GTT)
+ sizes->op_gart_write += bo->size;
+ else
+ sizes->op_read += bo->size;
+ sc->new_accounted = (read_domains << 16) | write_domain;
+ } else {
+ uint16_t old_read, old_write;
+
+ old_read = bo->space_accounted >> 16;
+ old_write = bo->space_accounted & 0xffff;
+
+ if (write_domain && (old_read & write_domain)) {
+ sc->new_accounted = write_domain;
+ /* moving from read to a write domain */
+ if (write_domain == RADEON_GEM_DOMAIN_VRAM) {
+ sizes->op_read -= bo->size;
+ sizes->op_vram_write += bo->size;
+ } else if (write_domain == RADEON_GEM_DOMAIN_GTT) {
+ sizes->op_read -= bo->size;
+ sizes->op_gart_write += bo->size;
+ }
+ } else if (read_domains & old_write) {
+ sc->new_accounted = bo->space_accounted & 0xffff;
+ } else {
+ /* rewrite the domains */
+ if (write_domain != old_write)
+ fprintf(stderr,"WRITE DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, write_domain, old_write);
+ if (read_domains != old_read)
+ fprintf(stderr,"READ DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, read_domains, old_read);
+ return RADEON_CS_SPACE_FLUSH;
+ }
+ }
+ return 0;
+}
+
+static int radeon_cs_do_space_check(struct radeon_cs *cs, struct radeon_cs_space_check *new_tmp)
+{
+ struct radeon_cs_manager *csm = cs->csm;
+ int i;
+ struct radeon_bo *bo;
+ struct rad_sizes sizes;
+ int ret;
+
+ /* check the totals for this operation */
+
+ if (cs->bo_count == 0 && !new_tmp)
+ return 0;
+
+ memset(&sizes, 0, sizeof(struct rad_sizes));
+
+ /* prepare */
+ for (i = 0; i < cs->bo_count; i++) {
+ ret = radeon_cs_setup_bo(&cs->bos[i], &sizes);
+ if (ret)
+ return ret;
+ }
+
+ if (new_tmp) {
+ ret = radeon_cs_setup_bo(new_tmp, &sizes);
+ if (ret)
+ return ret;
+ }
+
+ if (sizes.op_read < 0)
+ sizes.op_read = 0;
+
+ /* check sizes - operation first */
+ if ((sizes.op_read + sizes.op_gart_write > csm->gart_limit) ||
+ (sizes.op_vram_write > csm->vram_limit)) {
+ return RADEON_CS_SPACE_OP_TO_BIG;
+ }
+
+ if (((csm->vram_write_used + sizes.op_vram_write) > csm->vram_limit) ||
+ ((csm->read_used + csm->gart_write_used + sizes.op_gart_write + sizes.op_read) > csm->gart_limit)) {
+ return RADEON_CS_SPACE_FLUSH;
+ }
+
+ csm->gart_write_used += sizes.op_gart_write;
+ csm->vram_write_used += sizes.op_vram_write;
+ csm->read_used += sizes.op_read;
+ /* commit */
+ for (i = 0; i < cs->bo_count; i++) {
+ bo = cs->bos[i].bo;
+ bo->space_accounted = cs->bos[i].new_accounted;
+ }
+ if (new_tmp)
+ new_tmp->bo->space_accounted = new_tmp->new_accounted;
+
+ return RADEON_CS_SPACE_OK;
+}
+
+void radeon_cs_space_add_persistent_bo(struct radeon_cs *cs, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain)
+{
+ int i;
+ for (i = 0; i < cs->bo_count; i++) {
+ if (cs->bos[i].bo == bo &&
+ cs->bos[i].read_domains == read_domains &&
+ cs->bos[i].write_domain == write_domain)
+ return;
+ }
+ radeon_bo_ref(bo);
+ i = cs->bo_count;
+ cs->bos[i].bo = bo;
+ cs->bos[i].read_domains = read_domains;
+ cs->bos[i].write_domain = write_domain;
+ cs->bos[i].new_accounted = 0;
+ cs->bo_count++;
+
+ assert(cs->bo_count < MAX_SPACE_BOS);
+}
+
+static int radeon_cs_check_space_internal(struct radeon_cs *cs, struct radeon_cs_space_check *tmp_bo)
+{
+ int ret;
+ int flushed = 0;
+
+again:
+ ret = radeon_cs_do_space_check(cs, tmp_bo);
+ if (ret == RADEON_CS_SPACE_OP_TO_BIG)
+ return -1;
+ if (ret == RADEON_CS_SPACE_FLUSH) {
+ (*cs->space_flush_fn)(cs->space_flush_data);
+ if (flushed)
+ return -1;
+ flushed = 1;
+ goto again;
+ }
+ return 0;
+}
+
+int radeon_cs_space_check_with_bo(struct radeon_cs *cs,
+ struct radeon_bo *bo,
+ uint32_t read_domains, uint32_t write_domain)
+{
+ struct radeon_cs_space_check temp_bo;
+ int ret = 0;
+
+ if (bo) {
+ temp_bo.bo = bo;
+ temp_bo.read_domains = read_domains;
+ temp_bo.write_domain = write_domain;
+ temp_bo.new_accounted = 0;
+ }
+
+ ret = radeon_cs_check_space_internal(cs, bo ? &temp_bo : NULL);
+ return ret;
+}
+
+int radeon_cs_space_check(struct radeon_cs *cs)
+{
+ return radeon_cs_check_space_internal(cs, NULL);
+}
+
+void radeon_cs_space_reset_bos(struct radeon_cs *cs)
+{
+ int i;
+ for (i = 0; i < cs->bo_count; i++) {
+ radeon_bo_unref(cs->bos[i].bo);
+ cs->bos[i].bo = NULL;
+ cs->bos[i].read_domains = 0;
+ cs->bos[i].write_domain = 0;
+ cs->bos[i].new_accounted = 0;
+ }
+ cs->bo_count = 0;
+}
+
+
diff --git a/radeon/radeon_debug.c b/radeon/radeon_debug.c
new file mode 100644
index 0000000..413000b
--- /dev/null
+++ b/radeon/radeon_debug.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright © 2009 Pauli Nieminen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Pauli Nieminen <suokkos@gmail.com>
+ */
+
+#include "utils.h"
+
+#include "radeon_debug.h"
+#include "radeon_common_context.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+static const struct dri_debug_control debug_control[] = {
+ {"fall", RADEON_FALLBACKS},
+ {"tex", RADEON_TEXTURE},
+ {"ioctl", RADEON_IOCTL},
+ {"verts", RADEON_VERTS},
+ {"render", RADEON_RENDER},
+ {"swrender", RADEON_SWRENDER},
+ {"state", RADEON_STATE},
+ {"shader", RADEON_SHADER},
+ {"vfmt", RADEON_VFMT},
+ {"vtxf", RADEON_VFMT},
+ {"dri", RADEON_DRI},
+ {"dma", RADEON_DMA},
+ {"sanity", RADEON_SANITY},
+ {"sync", RADEON_SYNC},
+ {"pixel", RADEON_PIXEL},
+ {"mem", RADEON_MEMORY},
+ {"cs", RADEON_CS},
+ {"allmsg", ~RADEON_SYNC}, /* avoid the term "sync" because the parser uses strstr */
+ {NULL, 0}
+};
+
+radeon_debug_type_t radeon_enabled_debug_types;
+
+void radeon_init_debug(void)
+{
+ radeon_enabled_debug_types = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
+
+ radeon_enabled_debug_types |= RADEON_GENERAL;
+}
+
+void _radeon_debug_add_indent(void)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ const size_t length = sizeof(radeon->debug.indent)
+ / sizeof(radeon->debug.indent[0]);
+ if (radeon->debug.indent_depth < length - 1) {
+ radeon->debug.indent[radeon->debug.indent_depth] = '\t';
+ ++radeon->debug.indent_depth;
+ };
+}
+
+void _radeon_debug_remove_indent(void)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ if (radeon->debug.indent_depth > 0) {
+ radeon->debug.indent[radeon->debug.indent_depth] = '\0';
+ --radeon->debug.indent_depth;
+ }
+}
+
+void _radeon_print(const radeon_debug_type_t type,
+ const radeon_debug_level_t level,
+ const char* message,
+ ...)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ if (ctx) {
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ // FIXME: Make this multi thread safe
+ if (radeon->debug.indent_depth)
+ fprintf(stderr, "%s", radeon->debug.indent);
+ }
+ va_list values;
+ va_start( values, message );
+ vfprintf(stderr, message, values);
+ va_end( values );
+}
diff --git a/radeon/radeon_debug.h b/radeon/radeon_debug.h
new file mode 100644
index 0000000..26da31c
--- /dev/null
+++ b/radeon/radeon_debug.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright © 2009 Pauli Nieminen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Pauli Nieminen <suokkos@gmail.com>
+ */
+
+#ifndef RADEON_DEBUG_H_INCLUDED
+#define RADEON_DEBUG_H_INCLUDED
+
+#include <stdlib.h>
+
+typedef enum radeon_debug_levels {
+ RADEON_CRITICAL = 0, /* Only errors */
+ RADEON_IMPORTANT = 1, /* Important warnings and messages */
+ RADEON_NORMAL = 2, /* Normal log messages usefull for debugging */
+ RADEON_VERBOSE = 3, /* Extra details to debugging */
+ RADEON_TRACE = 4 /* Log about everything that happens */
+} radeon_debug_level_t;
+
+/**
+ * Compile time option to change level of debugging compiled to dri driver.
+ * Selecting critical level is not recommended because perfromance gains are
+ * going to minimal but you will lose a lot of important warnings in case of
+ * errors.
+ */
+#ifndef RADEON_DEBUG_LEVEL
+#define RADEON_DEBUG_LEVEL RADEON_VERBOSE
+#endif
+
+typedef enum radeon_debug_types {
+ RADEON_TEXTURE = 0x00001,
+ RADEON_STATE = 0x00002,
+ RADEON_IOCTL = 0x00004,
+ RADEON_RENDER = 0x00008,
+ RADEON_SWRENDER = 0x00010,
+ RADEON_FALLBACKS = 0x00020,
+ RADEON_VFMT = 0x00040,
+ RADEON_SHADER = 0x00080,
+ RADEON_CS = 0x00100,
+ RADEON_DRI = 0x00200,
+ RADEON_DMA = 0x00400,
+ RADEON_SANITY = 0x00800,
+ RADEON_SYNC = 0x01000,
+ RADEON_PIXEL = 0x02000,
+ RADEON_MEMORY = 0x04000,
+ RADEON_VERTS = 0x08000,
+ RADEON_GENERAL = 0x10000 /* Used for errors and warnings */
+} radeon_debug_type_t;
+
+#define RADEON_MAX_INDENT 5
+
+struct radeon_debug {
+ size_t indent_depth;
+ char indent[RADEON_MAX_INDENT];
+};
+
+extern radeon_debug_type_t radeon_enabled_debug_types;
+
+/**
+ * Compabibility layer for old debug code
+ **/
+#define RADEON_DEBUG radeon_enabled_debug_types
+
+static inline int radeon_is_debug_enabled(const radeon_debug_type_t type,
+ const radeon_debug_level_t level)
+{
+ return RADEON_DEBUG_LEVEL >= level
+ && (type & radeon_enabled_debug_types);
+}
+/*
+ * define macro for gcc specific __attribute__ if using alternative compiler
+ */
+#ifndef __GNUC__
+#define __attribute__(x) /*empty*/
+#endif
+
+
+extern void _radeon_print(const radeon_debug_type_t type,
+ const radeon_debug_level_t level,
+ const char* message,
+ ...) __attribute__((format(printf,3,4)));
+/**
+ * Print out debug message if channel specified by type is enabled
+ * and compile time debugging level is at least as high as level parameter
+ */
+#define radeon_print(type, level, message, ...) do { \
+ const radeon_debug_level_t _debug_level = (level); \
+ const radeon_debug_type_t _debug_type = (type); \
+ /* Compile out if level of message is too high */ \
+ if (radeon_is_debug_enabled(type, level)) { \
+ _radeon_print(_debug_type, _debug_level, \
+ (message), ## __VA_ARGS__); \
+ } \
+} while(0)
+
+/**
+ * printf style function for writing error messages.
+ */
+#define radeon_error(message, ...) do { \
+ radeon_print(RADEON_GENERAL, RADEON_CRITICAL, \
+ (message), ## __VA_ARGS__); \
+} while(0)
+
+/**
+ * printf style function for writing warnings.
+ */
+#define radeon_warning(message, ...) do { \
+ radeon_print(RADEON_GENERAL, RADEON_IMPORTANT, \
+ (message), ## __VA_ARGS__); \
+} while(0)
+
+extern void radeon_init_debug(void);
+extern void _radeon_debug_add_indent(void);
+extern void _radeon_debug_remove_indent(void);
+
+static inline void radeon_debug_add_indent(void)
+{
+ if (RADEON_DEBUG_LEVEL >= RADEON_VERBOSE) {
+ _radeon_debug_add_indent();
+ }
+}
+static inline void radeon_debug_remove_indent(void)
+{
+ if (RADEON_DEBUG_LEVEL >= RADEON_VERBOSE) {
+ _radeon_debug_remove_indent();
+ }
+}
+
+
+/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
+ I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
+ with other compilers ... GLUE!
+*/
+#define WARN_ONCE(a, ...) do { \
+ static int __warn_once=1; \
+ if(__warn_once){ \
+ radeon_warning("*********************************WARN_ONCE*********************************\n"); \
+ radeon_warning("File %s function %s line %d\n", \
+ __FILE__, __FUNCTION__, __LINE__); \
+ radeon_warning( (a), ## __VA_ARGS__);\
+ radeon_warning("***************************************************************************\n"); \
+ __warn_once=0;\
+ } \
+ } while(0)
+
+
+#endif
diff --git a/radeon/radeon_dma.c b/radeon/radeon_dma.c
new file mode 100644
index 0000000..c6edbae
--- /dev/null
+++ b/radeon/radeon_dma.c
@@ -0,0 +1,481 @@
+/**************************************************************************
+
+Copyright (C) 2004 Nicolai Haehnle.
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#include <errno.h>
+#include "radeon_common.h"
+#include "main/simple_list.h"
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr ) \
+do { \
+ int __tmp; \
+ __asm__ __volatile__( "rep ; movsl" \
+ : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
+ : "0" (nr), \
+ "D" ((long)dst), \
+ "S" ((long)src) ); \
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr ) \
+do { \
+ int j; \
+ for ( j = 0 ; j < nr ; j++ ) \
+ dst[j] = ((int *)src)[j]; \
+ dst += nr; \
+} while (0)
+#endif
+
+void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+ int i;
+
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+ __FUNCTION__, count, stride, (void *)out, (void *)data);
+
+ if (stride == 4)
+ COPY_DWORDS(out, data, count);
+ else
+ for (i = 0; i < count; i++) {
+ out[0] = *(int *)data;
+ out++;
+ data += stride;
+ }
+}
+
+void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+ int i;
+
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+ __FUNCTION__, count, stride, (void *)out, (void *)data);
+
+ if (stride == 8)
+ COPY_DWORDS(out, data, count * 2);
+ else
+ for (i = 0; i < count; i++) {
+ out[0] = *(int *)data;
+ out[1] = *(int *)(data + 4);
+ out += 2;
+ data += stride;
+ }
+}
+
+void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+ int i;
+
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+ __FUNCTION__, count, stride, (void *)out, (void *)data);
+
+ if (stride == 12) {
+ COPY_DWORDS(out, data, count * 3);
+ }
+ else
+ for (i = 0; i < count; i++) {
+ out[0] = *(int *)data;
+ out[1] = *(int *)(data + 4);
+ out[2] = *(int *)(data + 8);
+ out += 3;
+ data += stride;
+ }
+}
+
+void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+ int i;
+
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+ __FUNCTION__, count, stride, (void *)out, (void *)data);
+
+ if (stride == 16)
+ COPY_DWORDS(out, data, count * 4);
+ else
+ for (i = 0; i < count; i++) {
+ out[0] = *(int *)data;
+ out[1] = *(int *)(data + 4);
+ out[2] = *(int *)(data + 8);
+ out[3] = *(int *)(data + 12);
+ out += 4;
+ data += stride;
+ }
+}
+
+void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
+ const GLvoid * data, int size, int stride, int count)
+{
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ uint32_t *out;
+
+ if (stride == 0) {
+ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+ count = 1;
+ aos->stride = 0;
+ } else {
+ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
+ aos->stride = size;
+ }
+
+ aos->components = size;
+ aos->count = count;
+
+ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+ switch (size) {
+ case 1: radeonEmitVec4(out, data, stride, count); break;
+ case 2: radeonEmitVec8(out, data, stride, count); break;
+ case 3: radeonEmitVec12(out, data, stride, count); break;
+ case 4: radeonEmitVec16(out, data, stride, count); break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+void radeon_init_dma(radeonContextPtr rmesa)
+{
+ make_empty_list(&rmesa->dma.free);
+ make_empty_list(&rmesa->dma.wait);
+ make_empty_list(&rmesa->dma.reserved);
+ rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
+}
+
+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
+{
+ struct radeon_dma_bo *dma_bo = NULL;
+ /* we set minimum sizes to at least requested size
+ aligned to next 16 bytes. */
+ if (size > rmesa->dma.minimum_size)
+ rmesa->dma.minimum_size = (size + 15) & (~15);
+
+ radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
+ __FUNCTION__, size, rmesa->dma.minimum_size);
+
+
+ /* unmap old reserved bo */
+ if (!is_empty_list(&rmesa->dma.reserved))
+ radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
+
+ if (is_empty_list(&rmesa->dma.free)
+ || last_elem(&rmesa->dma.free)->bo->size < size) {
+ dma_bo = CALLOC_STRUCT(radeon_dma_bo);
+ assert(dma_bo);
+
+again_alloc:
+ dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
+ 0, rmesa->dma.minimum_size, 4,
+ RADEON_GEM_DOMAIN_GTT, 0);
+
+ if (!dma_bo->bo) {
+ rcommonFlushCmdBuf(rmesa, __FUNCTION__);
+ goto again_alloc;
+ }
+ insert_at_head(&rmesa->dma.reserved, dma_bo);
+ } else {
+ /* We push and pop buffers from end of list so we can keep
+ counter on unused buffers for later freeing them from
+ begin of list */
+ dma_bo = last_elem(&rmesa->dma.free);
+ remove_from_list(dma_bo);
+ insert_at_head(&rmesa->dma.reserved, dma_bo);
+ }
+
+ rmesa->dma.current_used = 0;
+ rmesa->dma.current_vertexptr = 0;
+
+ if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
+ first_elem(&rmesa->dma.reserved)->bo,
+ RADEON_GEM_DOMAIN_GTT, 0))
+ fprintf(stderr,"failure to revalidate BOs - badness\n");
+
+ if (is_empty_list(&rmesa->dma.reserved)) {
+ /* Cmd buff have been flushed in radeon_revalidate_bos */
+ goto again_alloc;
+ }
+
+ radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
+}
+
+/* Allocates a region from rmesa->dma.current. If there isn't enough
+ * space in current, grab a new buffer (and discard what was left of current)
+ */
+void radeonAllocDmaRegion(radeonContextPtr rmesa,
+ struct radeon_bo **pbo, int *poffset,
+ int bytes, int alignment)
+{
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+
+ if (rmesa->dma.flush)
+ rmesa->dma.flush(rmesa->glCtx);
+
+ assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
+
+ alignment--;
+ rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
+
+ if (is_empty_list(&rmesa->dma.reserved)
+ || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
+ radeonRefillCurrentDmaRegion(rmesa, bytes);
+
+ *poffset = rmesa->dma.current_used;
+ *pbo = first_elem(&rmesa->dma.reserved)->bo;
+ radeon_bo_ref(*pbo);
+
+ /* Always align to at least 16 bytes */
+ rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
+ rmesa->dma.current_vertexptr = rmesa->dma.current_used;
+
+ assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
+}
+
+void radeonFreeDmaRegions(radeonContextPtr rmesa)
+{
+ struct radeon_dma_bo *dma_bo;
+ struct radeon_dma_bo *temp;
+ if (RADEON_DEBUG & RADEON_DMA)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ foreach_s(dma_bo, temp, &rmesa->dma.free) {
+ remove_from_list(dma_bo);
+ radeon_bo_unref(dma_bo->bo);
+ FREE(dma_bo);
+ }
+
+ foreach_s(dma_bo, temp, &rmesa->dma.wait) {
+ remove_from_list(dma_bo);
+ radeon_bo_unref(dma_bo->bo);
+ FREE(dma_bo);
+ }
+
+ foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
+ remove_from_list(dma_bo);
+ radeon_bo_unmap(dma_bo->bo);
+ radeon_bo_unref(dma_bo->bo);
+ FREE(dma_bo);
+ }
+}
+
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
+{
+ if (is_empty_list(&rmesa->dma.reserved))
+ return;
+
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
+ rmesa->dma.current_used -= return_bytes;
+ rmesa->dma.current_vertexptr = rmesa->dma.current_used;
+}
+
+static int radeon_bo_is_idle(struct radeon_bo* bo)
+{
+ uint32_t domain;
+ int ret = radeon_bo_is_busy(bo, &domain);
+ if (ret == -EINVAL) {
+ WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
+ "This may cause small performance drop for you.\n");
+ }
+ /* Protect against bug in legacy bo handling that causes bos stay
+ * referenced even after they should be freed */
+ if (bo->cref != 1)
+ return 0;
+ return ret != -EBUSY;
+}
+
+void radeonReleaseDmaRegions(radeonContextPtr rmesa)
+{
+ struct radeon_dma_bo *dma_bo;
+ struct radeon_dma_bo *temp;
+ const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
+ const int time = rmesa->dma.free.expire_counter;
+
+ if (RADEON_DEBUG & RADEON_DMA) {
+ size_t free = 0,
+ wait = 0,
+ reserved = 0;
+ foreach(dma_bo, &rmesa->dma.free)
+ ++free;
+
+ foreach(dma_bo, &rmesa->dma.wait)
+ ++wait;
+
+ foreach(dma_bo, &rmesa->dma.reserved)
+ ++reserved;
+
+ fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
+ __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
+ }
+
+ if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+ /* request updated cs processing information from kernel */
+ legacy_track_pending(rmesa->radeonScreen->bom, 0);
+ }
+ /* move waiting bos to free list.
+ wait list provides gpu time to handle data before reuse */
+ foreach_s(dma_bo, temp, &rmesa->dma.wait) {
+ if (dma_bo->expire_counter == time) {
+ WARN_ONCE("Leaking dma buffer object!\n");
+ /* force free of buffer so we don't realy start
+ * leaking stuff now*/
+ while ((dma_bo->bo = radeon_bo_unref(dma_bo->bo))) {}
+ remove_from_list(dma_bo);
+ FREE(dma_bo);
+ continue;
+ }
+ /* free objects that are too small to be used because of large request */
+ if (dma_bo->bo->size < rmesa->dma.minimum_size) {
+ radeon_bo_unref(dma_bo->bo);
+ remove_from_list(dma_bo);
+ FREE(dma_bo);
+ continue;
+ }
+ if (!radeon_bo_is_idle(dma_bo->bo))
+ continue;
+ remove_from_list(dma_bo);
+ dma_bo->expire_counter = expire_at;
+ insert_at_tail(&rmesa->dma.free, dma_bo);
+ }
+
+ /* unmap the last dma region */
+ if (!is_empty_list(&rmesa->dma.reserved))
+ radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
+ /* move reserved to wait list */
+ foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
+ /* free objects that are too small to be used because of large request */
+ if (dma_bo->bo->size < rmesa->dma.minimum_size) {
+ radeon_bo_unref(dma_bo->bo);
+ remove_from_list(dma_bo);
+ FREE(dma_bo);
+ continue;
+ }
+ remove_from_list(dma_bo);
+ dma_bo->expire_counter = expire_at;
+ insert_at_tail(&rmesa->dma.wait, dma_bo);
+ }
+
+ /* free bos that have been unused for some time */
+ foreach_s(dma_bo, temp, &rmesa->dma.free) {
+ if (dma_bo->expire_counter != time)
+ break;
+ remove_from_list(dma_bo);
+ radeon_bo_unref(dma_bo->bo);
+ FREE(dma_bo);
+ }
+
+}
+
+
+/* Flush vertices in the current dma region.
+ */
+void rcommon_flush_last_swtcl_prim( GLcontext *ctx )
+{
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ struct radeon_dma *dma = &rmesa->dma;
+
+
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+ dma->flush = NULL;
+
+ if (!is_empty_list(&dma->reserved)) {
+ GLuint current_offset = dma->current_used;
+
+ assert (dma->current_used +
+ rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+ dma->current_vertexptr);
+
+ if (dma->current_used != dma->current_vertexptr) {
+ dma->current_used = dma->current_vertexptr;
+
+ rmesa->vtbl.swtcl_flush(ctx, current_offset);
+ }
+ rmesa->swtcl.numverts = 0;
+ }
+}
+/* Alloc space in the current dma region.
+ */
+void *
+rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
+{
+ GLuint bytes = vsize * nverts;
+ void *head;
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+ if(is_empty_list(&rmesa->dma.reserved)
+ ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
+ if (rmesa->dma.flush) {
+ rmesa->dma.flush(rmesa->glCtx);
+ }
+
+ radeonRefillCurrentDmaRegion(rmesa, bytes);
+
+ return NULL;
+ }
+
+ if (!rmesa->dma.flush) {
+ /* if cmdbuf flushed DMA restart */
+ rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+ rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
+ }
+
+ ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+ ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
+ ASSERT( rmesa->dma.current_used +
+ rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+ rmesa->dma.current_vertexptr );
+
+ head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
+ rmesa->dma.current_vertexptr += bytes;
+ rmesa->swtcl.numverts += nverts;
+ return head;
+}
+
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+{
+ radeonContextPtr radeon = RADEON_CONTEXT( ctx );
+ int i;
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (radeon->dma.flush) {
+ radeon->dma.flush(radeon->glCtx);
+ }
+ for (i = 0; i < radeon->tcl.aos_count; i++) {
+ if (radeon->tcl.aos[i].bo) {
+ radeon_bo_unref(radeon->tcl.aos[i].bo);
+ radeon->tcl.aos[i].bo = NULL;
+
+ }
+ }
+}
diff --git a/radeon/radeon_dma.h b/radeon/radeon_dma.h
new file mode 100644
index 0000000..74e653f
--- /dev/null
+++ b/radeon/radeon_dma.h
@@ -0,0 +1,58 @@
+/**************************************************************************
+
+Copyright (C) 2004 Nicolai Haehnle.
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#ifndef RADEON_DMA_H
+#define RADEON_DMA_H
+
+void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count);
+
+void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
+ const GLvoid * data, int size, int stride, int count);
+
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes);
+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size);
+void radeon_init_dma(radeonContextPtr rmesa);
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes);
+void radeonAllocDmaRegion(radeonContextPtr rmesa,
+ struct radeon_bo **pbo, int *poffset,
+ int bytes, int alignment);
+void radeonReleaseDmaRegions(radeonContextPtr rmesa);
+
+void rcommon_flush_last_swtcl_prim(GLcontext *ctx);
+
+void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize);
+void radeonFreeDmaRegions(radeonContextPtr rmesa);
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs );
+#endif
diff --git a/radeon/radeon_fbo.c b/radeon/radeon_fbo.c
new file mode 100644
index 0000000..d83b166
--- /dev/null
+++ b/radeon/radeon_fbo.c
@@ -0,0 +1,599 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/context.h"
+#include "main/texformat.h"
+#include "main/texrender.h"
+#include "drivers/common/meta.h"
+
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
+
+#define FILE_DEBUG_FLAG RADEON_TEXTURE
+#define DBG(...) do { \
+ if (RADEON_DEBUG & FILE_DEBUG_FLAG) \
+ _mesa_printf(__VA_ARGS__); \
+} while(0)
+
+static struct gl_framebuffer *
+radeon_new_framebuffer(GLcontext *ctx, GLuint name)
+{
+ return _mesa_new_framebuffer(ctx, name);
+}
+
+static void
+radeon_delete_renderbuffer(struct gl_renderbuffer *rb)
+{
+ struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+
+ ASSERT(rrb);
+
+ if (rrb && rrb->bo) {
+ radeon_bo_unref(rrb->bo);
+ }
+ _mesa_free(rrb);
+}
+
+static void *
+radeon_get_pointer(GLcontext *ctx, struct gl_renderbuffer *rb,
+ GLint x, GLint y)
+{
+ return NULL;
+}
+
+/**
+ * Called via glRenderbufferStorageEXT() to set the format and allocate
+ * storage for a user-created renderbuffer.
+ */
+static GLboolean
+radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+ GLenum internalFormat,
+ GLuint width, GLuint height)
+{
+ struct radeon_context *radeon = RADEON_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+ GLboolean software_buffer = GL_FALSE;
+ int cpp;
+
+ ASSERT(rb->Name != 0);
+ switch (internalFormat) {
+ case GL_R3_G3_B2:
+ case GL_RGB4:
+ case GL_RGB5:
+ rb->_ActualFormat = GL_RGB5;
+ rb->DataType = GL_UNSIGNED_BYTE;
+ rb->RedBits = 5;
+ rb->GreenBits = 6;
+ rb->BlueBits = 5;
+ cpp = 2;
+ break;
+ case GL_RGB:
+ case GL_RGB8:
+ case GL_RGB10:
+ case GL_RGB12:
+ case GL_RGB16:
+ rb->_ActualFormat = GL_RGB8;
+ rb->DataType = GL_UNSIGNED_BYTE;
+ rb->RedBits = 8;
+ rb->GreenBits = 8;
+ rb->BlueBits = 8;
+ rb->AlphaBits = 0;
+ cpp = 4;
+ break;
+ case GL_RGBA:
+ case GL_RGBA2:
+ case GL_RGBA4:
+ case GL_RGB5_A1:
+ case GL_RGBA8:
+ case GL_RGB10_A2:
+ case GL_RGBA12:
+ case GL_RGBA16:
+ rb->_ActualFormat = GL_RGBA8;
+ rb->DataType = GL_UNSIGNED_BYTE;
+ rb->RedBits = 8;
+ rb->GreenBits = 8;
+ rb->BlueBits = 8;
+ rb->AlphaBits = 8;
+ cpp = 4;
+ break;
+ case GL_STENCIL_INDEX:
+ case GL_STENCIL_INDEX1_EXT:
+ case GL_STENCIL_INDEX4_EXT:
+ case GL_STENCIL_INDEX8_EXT:
+ case GL_STENCIL_INDEX16_EXT:
+ /* alloc a depth+stencil buffer */
+ rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+ rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+ rb->StencilBits = 8;
+ cpp = 4;
+ break;
+ case GL_DEPTH_COMPONENT16:
+ rb->_ActualFormat = GL_DEPTH_COMPONENT16;
+ rb->DataType = GL_UNSIGNED_SHORT;
+ rb->DepthBits = 16;
+ cpp = 2;
+ break;
+ case GL_DEPTH_COMPONENT:
+ case GL_DEPTH_COMPONENT24:
+ case GL_DEPTH_COMPONENT32:
+ rb->_ActualFormat = GL_DEPTH_COMPONENT24;
+ rb->DataType = GL_UNSIGNED_INT;
+ rb->DepthBits = 24;
+ cpp = 4;
+ break;
+ case GL_DEPTH_STENCIL_EXT:
+ case GL_DEPTH24_STENCIL8_EXT:
+ rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+ rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+ rb->DepthBits = 24;
+ rb->StencilBits = 8;
+ cpp = 4;
+ break;
+ default:
+ _mesa_problem(ctx,
+ "Unexpected format in intel_alloc_renderbuffer_storage");
+ return GL_FALSE;
+ }
+
+ if (ctx->Driver.Flush)
+ ctx->Driver.Flush(ctx); /* +r6/r7 */
+
+ if (rrb->bo)
+ radeon_bo_unref(rrb->bo);
+
+
+ if (software_buffer) {
+ return _mesa_soft_renderbuffer_storage(ctx, rb, internalFormat,
+ width, height);
+ }
+ else {
+ uint32_t size;
+ uint32_t pitch = ((cpp * width + 63) & ~63) / cpp;
+
+ fprintf(stderr,"Allocating %d x %d radeon RBO (pitch %d)\n", width,
+ height, pitch);
+
+ size = pitch * height * cpp;
+ rrb->pitch = pitch * cpp;
+ rrb->cpp = cpp;
+ rrb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+ 0,
+ size,
+ 0,
+ RADEON_GEM_DOMAIN_VRAM,
+ 0);
+ rb->Width = width;
+ rb->Height = height;
+ return GL_TRUE;
+ }
+
+}
+
+
+/**
+ * Called for each hardware renderbuffer when a _window_ is resized.
+ * Just update fields.
+ * Not used for user-created renderbuffers!
+ */
+static GLboolean
+radeon_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+ GLenum internalFormat, GLuint width, GLuint height)
+{
+ ASSERT(rb->Name == 0);
+ rb->Width = width;
+ rb->Height = height;
+ rb->_ActualFormat = internalFormat;
+
+ return GL_TRUE;
+}
+
+
+static void
+radeon_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb,
+ GLuint width, GLuint height)
+{
+ struct radeon_framebuffer *radeon_fb = (struct radeon_framebuffer*)fb;
+ int i;
+
+ _mesa_resize_framebuffer(ctx, fb, width, height);
+
+ fb->Initialized = GL_TRUE; /* XXX remove someday */
+
+ if (fb->Name != 0) {
+ return;
+ }
+
+ /* Make sure all window system renderbuffers are up to date */
+ for (i = 0; i < 2; i++) {
+ struct gl_renderbuffer *rb = &radeon_fb->color_rb[i]->base;
+
+ /* only resize if size is changing */
+ if (rb && (rb->Width != width || rb->Height != height)) {
+ rb->AllocStorage(ctx, rb, rb->InternalFormat, width, height);
+ }
+ }
+}
+
+
+/** Dummy function for gl_renderbuffer::AllocStorage() */
+static GLboolean
+radeon_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+ GLenum internalFormat, GLuint width, GLuint height)
+{
+ _mesa_problem(ctx, "radeon_op_alloc_storage should never be called.");
+ return GL_FALSE;
+}
+
+struct radeon_renderbuffer *
+radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv)
+{
+ struct radeon_renderbuffer *rrb;
+
+ rrb = CALLOC_STRUCT(radeon_renderbuffer);
+ if (!rrb)
+ return NULL;
+
+ _mesa_init_renderbuffer(&rrb->base, 0);
+ rrb->base.ClassID = RADEON_RB_CLASS;
+
+ /* XXX format junk */
+ switch (format) {
+ case GL_RGB5:
+ rrb->base._ActualFormat = GL_RGB5;
+ rrb->base._BaseFormat = GL_RGBA;
+ rrb->base.RedBits = 5;
+ rrb->base.GreenBits = 6;
+ rrb->base.BlueBits = 5;
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ break;
+ case GL_RGB8:
+ rrb->base._ActualFormat = GL_RGB8;
+ rrb->base._BaseFormat = GL_RGB;
+ rrb->base.RedBits = 8;
+ rrb->base.GreenBits = 8;
+ rrb->base.BlueBits = 8;
+ rrb->base.AlphaBits = 0;
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ break;
+ case GL_RGBA8:
+ rrb->base._ActualFormat = GL_RGBA8;
+ rrb->base._BaseFormat = GL_RGBA;
+ rrb->base.RedBits = 8;
+ rrb->base.GreenBits = 8;
+ rrb->base.BlueBits = 8;
+ rrb->base.AlphaBits = 8;
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ break;
+ case GL_STENCIL_INDEX8_EXT:
+ rrb->base._ActualFormat = GL_STENCIL_INDEX8_EXT;
+ rrb->base._BaseFormat = GL_STENCIL_INDEX;
+ rrb->base.StencilBits = 8;
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ break;
+ case GL_DEPTH_COMPONENT16:
+ rrb->base._ActualFormat = GL_DEPTH_COMPONENT16;
+ rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
+ rrb->base.DepthBits = 16;
+ rrb->base.DataType = GL_UNSIGNED_SHORT;
+ break;
+ case GL_DEPTH_COMPONENT24:
+ rrb->base._ActualFormat = GL_DEPTH_COMPONENT24;
+ rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
+ rrb->base.DepthBits = 24;
+ rrb->base.DataType = GL_UNSIGNED_INT;
+ break;
+ case GL_DEPTH24_STENCIL8_EXT:
+ rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+ rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
+ rrb->base.DepthBits = 24;
+ rrb->base.StencilBits = 8;
+ rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+ break;
+ default:
+ fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format);
+ _mesa_delete_renderbuffer(&rrb->base);
+ return NULL;
+ }
+
+ rrb->dPriv = driDrawPriv;
+ rrb->base.InternalFormat = format;
+
+ rrb->base.Delete = radeon_delete_renderbuffer;
+ rrb->base.AllocStorage = radeon_alloc_window_storage;
+ rrb->base.GetPointer = radeon_get_pointer;
+
+ rrb->bo = NULL;
+ return rrb;
+}
+
+static struct gl_renderbuffer *
+radeon_new_renderbuffer(GLcontext * ctx, GLuint name)
+{
+ struct radeon_renderbuffer *rrb;
+
+ rrb = CALLOC_STRUCT(radeon_renderbuffer);
+ if (!rrb)
+ return NULL;
+
+ _mesa_init_renderbuffer(&rrb->base, name);
+ rrb->base.ClassID = RADEON_RB_CLASS;
+
+ rrb->base.Delete = radeon_delete_renderbuffer;
+ rrb->base.AllocStorage = radeon_alloc_renderbuffer_storage;
+ rrb->base.GetPointer = radeon_get_pointer;
+
+ return &rrb->base;
+}
+
+static void
+radeon_bind_framebuffer(GLcontext * ctx, GLenum target,
+ struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
+{
+ if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
+ radeon_draw_buffer(ctx, fb);
+ }
+ else {
+ /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */
+ }
+}
+
+static void
+radeon_framebuffer_renderbuffer(GLcontext * ctx,
+ struct gl_framebuffer *fb,
+ GLenum attachment, struct gl_renderbuffer *rb)
+{
+
+ if (ctx->Driver.Flush)
+ ctx->Driver.Flush(ctx); /* +r6/r7 */
+
+ _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+ radeon_draw_buffer(ctx, fb);
+}
+
+
+static GLboolean
+radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb,
+ struct gl_texture_image *texImage)
+{
+ int retry = 0;
+restart:
+ if (texImage->TexFormat == &_mesa_texformat_argb8888) {
+ rrb->cpp = 4;
+ rrb->base._ActualFormat = GL_RGBA8;
+ rrb->base._BaseFormat = GL_RGBA;
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ DBG("Render to RGBA8 texture OK\n");
+ }
+ else if (texImage->TexFormat == &_mesa_texformat_rgb565) {
+ rrb->cpp = 2;
+ rrb->base._ActualFormat = GL_RGB5;
+ rrb->base._BaseFormat = GL_RGB;
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ DBG("Render to RGB5 texture OK\n");
+ }
+ else if (texImage->TexFormat == &_mesa_texformat_argb1555) {
+ rrb->cpp = 2;
+ rrb->base._ActualFormat = GL_RGB5_A1;
+ rrb->base._BaseFormat = GL_RGBA;
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ DBG("Render to ARGB1555 texture OK\n");
+ }
+ else if (texImage->TexFormat == &_mesa_texformat_argb4444) {
+ rrb->cpp = 2;
+ rrb->base._ActualFormat = GL_RGBA4;
+ rrb->base._BaseFormat = GL_RGBA;
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ DBG("Render to ARGB1555 texture OK\n");
+ }
+ else if (texImage->TexFormat == &_mesa_texformat_z16) {
+ rrb->cpp = 2;
+ rrb->base._ActualFormat = GL_DEPTH_COMPONENT16;
+ rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
+ rrb->base.DataType = GL_UNSIGNED_SHORT;
+ DBG("Render to DEPTH16 texture OK\n");
+ }
+ else if (texImage->TexFormat == &_mesa_texformat_s8_z24) {
+ rrb->cpp = 4;
+ rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+ rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
+ rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+ DBG("Render to DEPTH_STENCIL texture OK\n");
+ }
+ else {
+ /* try redoing the FBO */
+ if (retry == 1) {
+ DBG("Render to texture BAD FORMAT %d\n",
+ texImage->TexFormat->MesaFormat);
+ return GL_FALSE;
+ }
+ texImage->TexFormat = radeonChooseTextureFormat(ctx, texImage->InternalFormat, 0,
+ texImage->TexFormat->DataType,
+ 1);
+
+ retry++;
+ goto restart;
+ }
+
+ rrb->pitch = texImage->Width * rrb->cpp;
+ rrb->base.InternalFormat = rrb->base._ActualFormat;
+ rrb->base.Width = texImage->Width;
+ rrb->base.Height = texImage->Height;
+ rrb->base.RedBits = texImage->TexFormat->RedBits;
+ rrb->base.GreenBits = texImage->TexFormat->GreenBits;
+ rrb->base.BlueBits = texImage->TexFormat->BlueBits;
+ rrb->base.AlphaBits = texImage->TexFormat->AlphaBits;
+ rrb->base.DepthBits = texImage->TexFormat->DepthBits;
+ rrb->base.StencilBits = texImage->TexFormat->StencilBits;
+
+ rrb->base.Delete = radeon_delete_renderbuffer;
+ rrb->base.AllocStorage = radeon_nop_alloc_storage;
+
+ return GL_TRUE;
+}
+
+
+static struct radeon_renderbuffer *
+radeon_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage)
+{
+ const GLuint name = ~0; /* not significant, but distinct for debugging */
+ struct radeon_renderbuffer *rrb;
+
+ /* make an radeon_renderbuffer to wrap the texture image */
+ rrb = CALLOC_STRUCT(radeon_renderbuffer);
+ if (!rrb) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture");
+ return NULL;
+ }
+
+ _mesa_init_renderbuffer(&rrb->base, name);
+ rrb->base.ClassID = RADEON_RB_CLASS;
+
+ if (!radeon_update_wrapper(ctx, rrb, texImage)) {
+ _mesa_free(rrb);
+ return NULL;
+ }
+
+ return rrb;
+
+}
+static void
+radeon_render_texture(GLcontext * ctx,
+ struct gl_framebuffer *fb,
+ struct gl_renderbuffer_attachment *att)
+{
+ struct gl_texture_image *newImage
+ = att->Texture->Image[att->CubeMapFace][att->TextureLevel];
+ struct radeon_renderbuffer *rrb = radeon_renderbuffer(att->Renderbuffer);
+ radeon_texture_image *radeon_image;
+ GLuint imageOffset;
+
+ (void) fb;
+
+ ASSERT(newImage);
+
+ if (newImage->Border != 0) {
+ /* Fallback on drawing to a texture with a border, which won't have a
+ * miptree.
+ */
+ _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
+ _mesa_render_texture(ctx, fb, att);
+ return;
+ }
+ else if (!rrb) {
+ rrb = radeon_wrap_texture(ctx, newImage);
+ if (rrb) {
+ /* bind the wrapper to the attachment point */
+ _mesa_reference_renderbuffer(&att->Renderbuffer, &rrb->base);
+ }
+ else {
+ /* fallback to software rendering */
+ _mesa_render_texture(ctx, fb, att);
+ return;
+ }
+ }
+
+ if (!radeon_update_wrapper(ctx, rrb, newImage)) {
+ _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
+ _mesa_render_texture(ctx, fb, att);
+ return;
+ }
+
+ DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n",
+ _glthread_GetID(),
+ att->Texture->Name, newImage->Width, newImage->Height,
+ rrb->base.RefCount);
+
+ /* point the renderbufer's region to the texture image region */
+ radeon_image = (radeon_texture_image *)newImage;
+ if (rrb->bo != radeon_image->mt->bo) {
+ if (rrb->bo)
+ radeon_bo_unref(rrb->bo);
+ rrb->bo = radeon_image->mt->bo;
+ radeon_bo_ref(rrb->bo);
+ }
+
+ /* compute offset of the particular 2D image within the texture region */
+ imageOffset = radeon_miptree_image_offset(radeon_image->mt,
+ att->CubeMapFace,
+ att->TextureLevel);
+
+ if (att->Texture->Target == GL_TEXTURE_3D) {
+ GLuint offsets[6];
+ radeon_miptree_depth_offsets(radeon_image->mt, att->TextureLevel,
+ offsets);
+ imageOffset += offsets[att->Zoffset];
+ }
+
+ /* store that offset in the region */
+ rrb->draw_offset = imageOffset;
+
+ /* update drawing region, etc */
+ radeon_draw_buffer(ctx, fb);
+}
+
+static void
+radeon_finish_render_texture(GLcontext * ctx,
+ struct gl_renderbuffer_attachment *att)
+{
+
+}
+static void
+radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
+{
+}
+
+void radeon_fbo_init(struct radeon_context *radeon)
+{
+ radeon->glCtx->Driver.NewFramebuffer = radeon_new_framebuffer;
+ radeon->glCtx->Driver.NewRenderbuffer = radeon_new_renderbuffer;
+ radeon->glCtx->Driver.BindFramebuffer = radeon_bind_framebuffer;
+ radeon->glCtx->Driver.FramebufferRenderbuffer = radeon_framebuffer_renderbuffer;
+ radeon->glCtx->Driver.RenderTexture = radeon_render_texture;
+ radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture;
+ radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers;
+ radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer;
+ radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer;
+}
+
+
+void radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
+ struct radeon_bo *bo)
+{
+ struct radeon_bo *old;
+ old = rb->bo;
+ rb->bo = bo;
+ radeon_bo_ref(bo);
+ if (old)
+ radeon_bo_unref(old);
+}
diff --git a/radeon/radeon_ioctl.c b/radeon/radeon_ioctl.c
index 09acf6b..a0106d0 100644
--- a/radeon/radeon_ioctl.c
+++ b/radeon/radeon_ioctl.c
@@ -35,7 +35,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <sched.h>
-#include <errno.h>
+#include <errno.h>
+
+#include "main/attrib.h"
+#include "main/enable.h"
+#include "main/blend.h"
+#include "main/bufferobj.h"
+#include "main/buffers.h"
+#include "main/depth.h"
+#include "main/shaders.h"
+#include "main/texstate.h"
+#include "main/varray.h"
+#include "glapi/dispatch.h"
+#include "swrast/swrast.h"
+#include "main/stencil.h"
+#include "main/matrix.h"
#include "main/glheader.h"
#include "main/imports.h"
@@ -43,6 +57,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "swrast/swrast.h"
#include "radeon_context.h"
+#include "radeon_common.h"
#include "radeon_state.h"
#include "radeon_ioctl.h"
#include "radeon_tcl.h"
@@ -58,75 +73,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define RADEON_IDLE_RETRY 16
-static void radeonWaitForIdle( radeonContextPtr rmesa );
-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
- const char * caller );
-
-static void print_state_atom( struct radeon_state_atom *state )
-{
- int i;
-
- fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
-
- if (RADEON_DEBUG & DEBUG_VERBOSE)
- for (i = 0 ; i < state->cmd_size ; i++)
- fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
-
-}
-
-static void radeonSaveHwState( radeonContextPtr rmesa )
-{
- struct radeon_state_atom *atom;
- char * dest = rmesa->backup_store.cmd_buf;
-
- if (RADEON_DEBUG & DEBUG_STATE)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- rmesa->backup_store.cmd_used = 0;
-
- foreach( atom, &rmesa->hw.atomlist ) {
- if ( atom->check( rmesa->glCtx ) ) {
- int size = atom->cmd_size * 4;
- memcpy( dest, atom->cmd, size);
- dest += size;
- rmesa->backup_store.cmd_used += size;
- if (RADEON_DEBUG & DEBUG_STATE)
- print_state_atom( atom );
- }
- }
-
- assert( rmesa->backup_store.cmd_used <= RADEON_CMD_BUF_SZ );
- if (RADEON_DEBUG & DEBUG_STATE)
- fprintf(stderr, "Returning to radeonEmitState\n");
-}
-
-/* At this point we were in FlushCmdBufLocked but we had lost our context, so
- * we need to unwire our current cmdbuf, hook the one with the saved state in
- * it, flush it, and then put the current one back. This is so commands at the
- * start of a cmdbuf can rely on the state being kept from the previous one.
- */
-static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
-{
- GLuint nr_released_bufs;
- struct radeon_store saved_store;
-
- if (rmesa->backup_store.cmd_used == 0)
- return;
-
- if (RADEON_DEBUG & DEBUG_STATE)
- fprintf(stderr, "Emitting backup state on lost context\n");
-
- rmesa->lost_context = GL_FALSE;
-
- nr_released_bufs = rmesa->dma.nr_released_bufs;
- saved_store = rmesa->store;
- rmesa->dma.nr_released_bufs = 0;
- rmesa->store = rmesa->backup_store;
- radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
- rmesa->dma.nr_released_bufs = nr_released_bufs;
- rmesa->store = saved_store;
-}
-
/* =============================================================
* Kernel command buffer handling
*/
@@ -134,965 +80,382 @@ static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
/* The state atoms will be emitted in the order they appear in the atom list,
* so this step is important.
*/
-void radeonSetUpAtomList( radeonContextPtr rmesa )
+void radeonSetUpAtomList( r100ContextPtr rmesa )
{
- int i, mtu = rmesa->glCtx->Const.MaxTextureUnits;
-
- make_empty_list(&rmesa->hw.atomlist);
- rmesa->hw.atomlist.name = "atom-list";
-
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ctx);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.set);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lin);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msk);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.vpt);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tcl);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msc);
+ int i, mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
+
+ make_empty_list(&rmesa->radeon.hw.atomlist);
+ rmesa->radeon.hw.atomlist.name = "atom-list";
+
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc);
for (i = 0; i < mtu; ++i) {
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.cube[i]);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]);
}
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl);
for (i = 0; i < 3 + mtu; ++i)
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mat[i]);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]);
for (i = 0; i < 8; ++i)
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lit[i]);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]);
for (i = 0; i < 6; ++i)
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ucp[i]);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.eye);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.grd);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.fog);
- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.glt);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog);
+ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt);
}
-void radeonEmitState( radeonContextPtr rmesa )
+static void radeonEmitScissor(r100ContextPtr rmesa)
{
- struct radeon_state_atom *atom;
- char *dest;
-
- if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- if (rmesa->save_on_next_emit) {
- radeonSaveHwState(rmesa);
- rmesa->save_on_next_emit = GL_FALSE;
- }
-
- /* this code used to return here but now it emits zbs */
-
- /* To avoid going across the entire set of states multiple times, just check
- * for enough space for the case of emitting all state, and inline the
- * radeonAllocCmdBuf code here without all the checks.
- */
- radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size);
- dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
-
- /* We always always emit zbs, this is due to a bug found by keithw in
- the hardware and rediscovered after Erics changes by me.
- if you ever touch this code make sure you emit zbs otherwise
- you get tcl lockups on at least M7/7500 class of chips - airlied */
- rmesa->hw.zbs.dirty=1;
-
- if (RADEON_DEBUG & DEBUG_STATE) {
- foreach(atom, &rmesa->hw.atomlist) {
- if (atom->dirty || rmesa->hw.all_dirty) {
- if (atom->check(rmesa->glCtx))
- print_state_atom(atom);
- else
- fprintf(stderr, "skip state %s\n", atom->name);
- }
- }
- }
-
- foreach(atom, &rmesa->hw.atomlist) {
- if (rmesa->hw.all_dirty)
- atom->dirty = GL_TRUE;
- if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) &&
- atom->is_tcl)
- atom->dirty = GL_FALSE;
- if (atom->dirty) {
- if (atom->check(rmesa->glCtx)) {
- int size = atom->cmd_size * 4;
- memcpy(dest, atom->cmd, size);
- dest += size;
- rmesa->store.cmd_used += size;
- atom->dirty = GL_FALSE;
- }
- }
- }
-
- assert(rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ);
-
- rmesa->hw.is_dirty = GL_FALSE;
- rmesa->hw.all_dirty = GL_FALSE;
+ BATCH_LOCALS(&rmesa->radeon);
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ return;
+ }
+ if (rmesa->radeon.state.scissor.enabled) {
+ BEGIN_BATCH(6);
+ OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
+ OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] | RADEON_SCISSOR_ENABLE);
+ OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
+ OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) |
+ rmesa->radeon.state.scissor.rect.x1);
+ OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
+ OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2) << 16) |
+ (rmesa->radeon.state.scissor.rect.x2));
+ END_BATCH();
+ } else {
+ BEGIN_BATCH(2);
+ OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
+ OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ~RADEON_SCISSOR_ENABLE);
+ END_BATCH();
+ }
}
/* Fire a section of the retained (indexed_verts) buffer as a regular
- * primtive.
+ * primtive.
*/
-extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
GLuint vertex_format,
GLuint primitive,
GLuint vertex_nr )
{
- drm_radeon_cmd_header_t *cmd;
-
+ BATCH_LOCALS(&rmesa->radeon);
assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-
- radeonEmitState( rmesa );
-
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
- rmesa->store.cmd_used/4);
-
- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ,
- __FUNCTION__ );
+
+ radeonEmitState(&rmesa->radeon);
+ radeonEmitScissor(rmesa);
+
#if RADEON_OLD_PACKETS
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
- cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
- cmd[2].i = rmesa->ioctl.vertex_offset;
- cmd[3].i = vertex_nr;
- cmd[4].i = vertex_format;
- cmd[5].i = (primitive |
- RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
- (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-
- if (RADEON_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
- __FUNCTION__,
- cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
+ BEGIN_BATCH(8);
+ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3);
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ } else {
+ OUT_BATCH(rmesa->ioctl.vertex_offset);
+ }
+
+ OUT_BATCH(vertex_nr);
+ OUT_BATCH(vertex_format);
+ OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+ (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->ioctl.bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
+
+ END_BATCH();
+
#else
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
- cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
- cmd[2].i = vertex_format;
- cmd[3].i = (primitive |
- RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
- RADEON_CP_VC_CNTL_MAOS_ENABLE |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
- (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-
-
- if (RADEON_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
- __FUNCTION__,
- cmd[1].i, cmd[2].i, cmd[3].i);
+ BEGIN_BATCH(4);
+ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1);
+ OUT_BATCH(vertex_format);
+ OUT_BATCH(primitive |
+ RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+ RADEON_CP_VC_CNTL_MAOS_ENABLE |
+ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+ (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+ END_BATCH();
#endif
}
-
-void radeonFlushElts( radeonContextPtr rmesa )
+void radeonFlushElts( GLcontext *ctx )
{
- int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
- int dwords;
-#if RADEON_OLD_PACKETS
- int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
-#else
- int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
-#endif
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ BATCH_LOCALS(&rmesa->radeon);
+ int nr;
+ uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start);
+ int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw);
- if (RADEON_DEBUG & DEBUG_IOCTL)
+ if (RADEON_DEBUG & RADEON_IOCTL)
fprintf(stderr, "%s\n", __FUNCTION__);
- assert( rmesa->dma.flush == radeonFlushElts );
- rmesa->dma.flush = NULL;
+ assert( rmesa->radeon.dma.flush == radeonFlushElts );
+ rmesa->radeon.dma.flush = NULL;
- /* Cope with odd number of elts:
- */
- rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
- dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
+ nr = rmesa->tcl.elt_used;
#if RADEON_OLD_PACKETS
- cmd[1] |= (dwords - 3) << 16;
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ dwords -= 2;
+ }
+#endif
+
+#if RADEON_OLD_PACKETS
+ cmd[1] |= (dwords + 3) << 16;
cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
#else
- cmd[1] |= (dwords - 3) << 16;
+ cmd[1] |= (dwords + 2) << 16;
cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
#endif
- if (RADEON_DEBUG & DEBUG_SYNC) {
+ rmesa->radeon.cmdbuf.cs->cdw += dwords;
+ rmesa->radeon.cmdbuf.cs->section_cdw += dwords;
+
+#if RADEON_OLD_PACKETS
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->ioctl.bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
+#endif
+
+ END_BATCH();
+
+ if (RADEON_DEBUG & RADEON_SYNC) {
fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
- radeonFinish( rmesa->glCtx );
+ radeonFinish( rmesa->radeon.glCtx );
}
-}
+}
-GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
+GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
GLuint vertex_format,
GLuint primitive,
GLuint min_nr )
{
- drm_radeon_cmd_header_t *cmd;
GLushort *retval;
+ int align_min_nr;
+ BATCH_LOCALS(&rmesa->radeon);
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
+ if (RADEON_DEBUG & RADEON_IOCTL)
+ fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-
- radeonEmitState( rmesa );
-
- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa,
- ELTS_BUFSZ(min_nr),
- __FUNCTION__ );
+
+ radeonEmitState(&rmesa->radeon);
+ radeonEmitScissor(rmesa);
+
+ rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw;
+
+ /* round up min_nr to align the state */
+ align_min_nr = (min_nr + 1) & ~1;
+
#if RADEON_OLD_PACKETS
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
- cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
- cmd[2].i = rmesa->ioctl.vertex_offset;
- cmd[3].i = 0xffff;
- cmd[4].i = vertex_format;
- cmd[5].i = (primitive |
- RADEON_CP_VC_CNTL_PRIM_WALK_IND |
- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
-
- retval = (GLushort *)(cmd+6);
-#else
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
- cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
- cmd[2].i = vertex_format;
- cmd[3].i = (primitive |
- RADEON_CP_VC_CNTL_PRIM_WALK_IND |
- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
- RADEON_CP_VC_CNTL_MAOS_ENABLE |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
-
- retval = (GLushort *)(cmd+4);
+ BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4);
+ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0);
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ } else {
+ OUT_BATCH(rmesa->ioctl.vertex_offset);
+ }
+ OUT_BATCH(rmesa->ioctl.vertex_max);
+ OUT_BATCH(vertex_format);
+ OUT_BATCH(primitive |
+ RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+#else
+ BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4);
+ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0);
+ OUT_BATCH(vertex_format);
+ OUT_BATCH(primitive |
+ RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+ RADEON_CP_VC_CNTL_MAOS_ENABLE |
+ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
#endif
- if (RADEON_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
- __FUNCTION__,
- cmd[1].i, vertex_format, primitive);
- assert(!rmesa->dma.flush);
- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
- rmesa->dma.flush = radeonFlushElts;
+ rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw;
+ rmesa->tcl.elt_used = min_nr;
- rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
+ retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset);
- return retval;
-}
+ if (RADEON_DEBUG & RADEON_RENDER)
+ fprintf(stderr, "%s: header prim %x \n",
+ __FUNCTION__, primitive);
+ assert(!rmesa->radeon.dma.flush);
+ rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+ rmesa->radeon.dma.flush = radeonFlushElts;
+ return retval;
+}
-void radeonEmitVertexAOS( radeonContextPtr rmesa,
+void radeonEmitVertexAOS( r100ContextPtr rmesa,
GLuint vertex_size,
+ struct radeon_bo *bo,
GLuint offset )
{
#if RADEON_OLD_PACKETS
- rmesa->ioctl.vertex_size = vertex_size;
rmesa->ioctl.vertex_offset = offset;
+ rmesa->ioctl.bo = bo;
#else
- drm_radeon_cmd_header_t *cmd;
+ BATCH_LOCALS(&rmesa->radeon);
- if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
+ if (RADEON_DEBUG & (RADEON_PRIMS|DEBUG_IOCTL))
fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n",
__FUNCTION__, vertex_size, offset);
- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
- __FUNCTION__ );
+ BEGIN_BATCH(7);
+ OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
+ OUT_BATCH(1);
+ OUT_BATCH(vertex_size | (vertex_size << 8));
+ OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
- cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
- cmd[2].i = 1;
- cmd[3].i = vertex_size | (vertex_size << 8);
- cmd[4].i = offset;
#endif
}
-
-void radeonEmitAOS( radeonContextPtr rmesa,
- struct radeon_dma_region **component,
+
+void radeonEmitAOS( r100ContextPtr rmesa,
GLuint nr,
GLuint offset )
{
#if RADEON_OLD_PACKETS
assert( nr == 1 );
- assert( component[0]->aos_size == component[0]->aos_stride );
- rmesa->ioctl.vertex_size = component[0]->aos_size;
- rmesa->ioctl.vertex_offset =
- (component[0]->aos_start + offset * component[0]->aos_stride * 4);
+ rmesa->ioctl.bo = rmesa->radeon.tcl.aos[0].bo;
+ rmesa->ioctl.vertex_offset =
+ (rmesa->radeon.tcl.aos[0].offset + offset * rmesa->radeon.tcl.aos[0].stride * 4);
+ rmesa->ioctl.vertex_max = rmesa->radeon.tcl.aos[0].count;
#else
- drm_radeon_cmd_header_t *cmd;
- int sz = AOS_BUFSZ(nr);
+ BATCH_LOCALS(&rmesa->radeon);
+ uint32_t voffset;
+ // int sz = AOS_BUFSZ(nr);
+ int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
int i;
- int *tmp;
- if (RADEON_DEBUG & DEBUG_IOCTL)
+ if (RADEON_DEBUG & RADEON_IOCTL)
fprintf(stderr, "%s\n", __FUNCTION__);
-
- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz,
- __FUNCTION__ );
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
- cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16);
- cmd[2].i = nr;
- tmp = &cmd[0].i;
- cmd += 3;
-
- for (i = 0 ; i < nr ; i++) {
- if (i & 1) {
- cmd[0].i |= ((component[i]->aos_stride << 24) |
- (component[i]->aos_size << 16));
- cmd[2].i = (component[i]->aos_start +
- offset * component[i]->aos_stride * 4);
- cmd += 3;
- }
- else {
- cmd[0].i = ((component[i]->aos_stride << 8) |
- (component[i]->aos_size << 0));
- cmd[1].i = (component[i]->aos_start +
- offset * component[i]->aos_stride * 4);
+ BEGIN_BATCH(sz+2+(nr * 2));
+ OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+ OUT_BATCH(nr);
+
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ for (i = 0; i + 1 < nr; i += 2) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+ (rmesa->radeon.tcl.aos[i].stride << 8) |
+ (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+ (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[i].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[i+1].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
}
- }
- if (RADEON_DEBUG & DEBUG_VERTS) {
- fprintf(stderr, "%s:\n", __FUNCTION__);
- for (i = 0 ; i < sz ; i++)
- fprintf(stderr, " %d: %x\n", i, tmp[i]);
- }
-#endif
-}
-
-/* using already shifted color_fmt! */
-void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */
- GLuint color_fmt,
- GLuint src_pitch,
- GLuint src_offset,
- GLuint dst_pitch,
- GLuint dst_offset,
- GLint srcx, GLint srcy,
- GLint dstx, GLint dsty,
- GLuint w, GLuint h )
-{
- drm_radeon_cmd_header_t *cmd;
-
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
- __FUNCTION__,
- src_pitch, src_offset, srcx, srcy,
- dst_pitch, dst_offset, dstx, dsty,
- w, h);
-
- assert( (src_pitch & 63) == 0 );
- assert( (dst_pitch & 63) == 0 );
- assert( (src_offset & 1023) == 0 );
- assert( (dst_offset & 1023) == 0 );
- assert( w < (1<<16) );
- assert( h < (1<<16) );
-
- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
- __FUNCTION__ );
-
-
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
- cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16);
- cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
- RADEON_GMC_DST_PITCH_OFFSET_CNTL |
- RADEON_GMC_BRUSH_NONE |
- color_fmt |
- RADEON_GMC_SRC_DATATYPE_COLOR |
- RADEON_ROP3_S |
- RADEON_DP_SRC_SOURCE_MEMORY |
- RADEON_GMC_CLR_CMP_CNTL_DIS |
- RADEON_GMC_WR_MSK_DIS );
-
- cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
- cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
- cmd[5].i = (srcx << 16) | srcy;
- cmd[6].i = (dstx << 16) | dsty; /* dst */
- cmd[7].i = (w << 16) | h;
-}
-
-
-void radeonEmitWait( radeonContextPtr rmesa, GLuint flags )
-{
- drm_radeon_cmd_header_t *cmd;
-
- assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
-
- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
- __FUNCTION__ );
- cmd[0].i = 0;
- cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
- cmd[0].wait.flags = flags;
-}
-
-
-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
- const char * caller )
-{
- int ret, i;
- drm_radeon_cmd_buffer_t cmd;
-
- if (rmesa->lost_context)
- radeonBackUpAndEmitLostStateLocked(rmesa);
-
- if (RADEON_DEBUG & DEBUG_IOCTL) {
- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
-
- if (RADEON_DEBUG & DEBUG_VERBOSE)
- for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
- fprintf(stderr, "%d: %x\n", i/4,
- *(int *)(&rmesa->store.cmd_buf[i]));
- }
-
- if (RADEON_DEBUG & DEBUG_DMA)
- fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
- rmesa->dma.nr_released_bufs);
-
-
- if (RADEON_DEBUG & DEBUG_SANITY) {
- if (rmesa->state.scissor.enabled)
- ret = radeonSanityCmdBuffer( rmesa,
- rmesa->state.scissor.numClipRects,
- rmesa->state.scissor.pClipRects);
- else
- ret = radeonSanityCmdBuffer( rmesa,
- rmesa->numClipRects,
- rmesa->pClipRects);
- if (ret) {
- fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);
- goto out;
+ if (nr & 1) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+ (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[nr - 1].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
}
- }
-
-
- cmd.bufsz = rmesa->store.cmd_used;
- cmd.buf = rmesa->store.cmd_buf;
-
- if (rmesa->state.scissor.enabled) {
- cmd.nbox = rmesa->state.scissor.numClipRects;
- cmd.boxes = rmesa->state.scissor.pClipRects;
} else {
- cmd.nbox = rmesa->numClipRects;
- cmd.boxes = rmesa->pClipRects;
- }
-
- ret = drmCommandWrite( rmesa->dri.fd,
- DRM_RADEON_CMDBUF,
- &cmd, sizeof(cmd) );
-
- if (ret)
- fprintf(stderr, "drmCommandWrite: %d\n", ret);
-
- if (RADEON_DEBUG & DEBUG_SYNC) {
- fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
- radeonWaitForIdleLocked( rmesa );
- }
-
- out:
- rmesa->store.primnr = 0;
- rmesa->store.statenr = 0;
- rmesa->store.cmd_used = 0;
- rmesa->dma.nr_released_bufs = 0;
- rmesa->save_on_next_emit = 1;
-
- return ret;
-}
-
-
-/* Note: does not emit any commands to avoid recursion on
- * radeonAllocCmdBuf.
- */
-void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
-{
- int ret;
-
-
- LOCK_HARDWARE( rmesa );
-
- ret = radeonFlushCmdBufLocked( rmesa, caller );
-
- UNLOCK_HARDWARE( rmesa );
-
- if (ret) {
- fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret);
- exit(ret);
- }
-}
-
-/* =============================================================
- * Hardware vertex buffer handling
- */
-
-
-void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
-{
- struct radeon_dma_buffer *dmabuf;
- int fd = rmesa->dri.fd;
- int index = 0;
- int size = 0;
- drmDMAReq dma;
- int ret;
-
- if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- if (rmesa->dma.flush) {
- rmesa->dma.flush( rmesa );
- }
-
- if (rmesa->dma.current.buf)
- radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
-
- if (rmesa->dma.nr_released_bufs > 4)
- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
-
- dma.context = rmesa->dri.hwContext;
- dma.send_count = 0;
- dma.send_list = NULL;
- dma.send_sizes = NULL;
- dma.flags = 0;
- dma.request_count = 1;
- dma.request_size = RADEON_BUFFER_SIZE;
- dma.request_list = &index;
- dma.request_sizes = &size;
- dma.granted_count = 0;
-
- LOCK_HARDWARE(rmesa); /* no need to validate */
-
- ret = drmDMA( fd, &dma );
-
- if (ret != 0) {
- /* Free some up this way?
- */
- if (rmesa->dma.nr_released_bufs) {
- radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
- }
-
- if (RADEON_DEBUG & DEBUG_DMA)
- fprintf(stderr, "Waiting for buffers\n");
-
- radeonWaitForIdleLocked( rmesa );
- ret = drmDMA( fd, &dma );
-
- if ( ret != 0 ) {
- UNLOCK_HARDWARE( rmesa );
- fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
- exit( -1 );
- }
- }
-
- UNLOCK_HARDWARE(rmesa);
-
- if (RADEON_DEBUG & DEBUG_DMA)
- fprintf(stderr, "Allocated buffer %d\n", index);
-
- dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
- dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
- dmabuf->refcount = 1;
-
- rmesa->dma.current.buf = dmabuf;
- rmesa->dma.current.address = dmabuf->buf->address;
- rmesa->dma.current.end = dmabuf->buf->total;
- rmesa->dma.current.start = 0;
- rmesa->dma.current.ptr = 0;
-
- rmesa->c_vertexBuffers++;
-}
-
-void radeonReleaseDmaRegion( radeonContextPtr rmesa,
- struct radeon_dma_region *region,
- const char *caller )
-{
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
-
- if (!region->buf)
- return;
-
- if (rmesa->dma.flush)
- rmesa->dma.flush( rmesa );
-
- if (--region->buf->refcount == 0) {
- drm_radeon_cmd_header_t *cmd;
-
- if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
- fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
- region->buf->buf->idx);
-
- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd),
- __FUNCTION__ );
- cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
- cmd->dma.buf_idx = region->buf->buf->idx;
- FREE(region->buf);
- rmesa->dma.nr_released_bufs++;
- }
-
- region->buf = NULL;
- region->start = 0;
-}
-
-/* Allocates a region from rmesa->dma.current. If there isn't enough
- * space in current, grab a new buffer (and discard what was left of current)
- */
-void radeonAllocDmaRegion( radeonContextPtr rmesa,
- struct radeon_dma_region *region,
- int bytes,
- int alignment )
-{
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
-
- if (rmesa->dma.flush)
- rmesa->dma.flush( rmesa );
-
- if (region->buf)
- radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
-
- alignment--;
- rmesa->dma.current.start = rmesa->dma.current.ptr =
- (rmesa->dma.current.ptr + alignment) & ~alignment;
-
- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
- radeonRefillCurrentDmaRegion( rmesa );
-
- region->start = rmesa->dma.current.start;
- region->ptr = rmesa->dma.current.start;
- region->end = rmesa->dma.current.start + bytes;
- region->address = rmesa->dma.current.address;
- region->buf = rmesa->dma.current.buf;
- region->buf->refcount++;
-
- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
- rmesa->dma.current.start =
- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
-}
-
-/* ================================================================
- * SwapBuffers with client-side throttling
- */
-
-static uint32_t radeonGetLastFrame (radeonContextPtr rmesa)
-{
- drm_radeon_getparam_t gp;
- int ret;
- uint32_t frame;
-
- gp.param = RADEON_PARAM_LAST_FRAME;
- gp.value = (int *)&frame;
- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
- &gp, sizeof(gp) );
-
- if ( ret ) {
- fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
- exit(1);
- }
-
- return frame;
-}
-
-static void radeonEmitIrqLocked( radeonContextPtr rmesa )
-{
- drm_radeon_irq_emit_t ie;
- int ret;
-
- ie.irq_seq = &rmesa->iw.irq_seq;
- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT,
- &ie, sizeof(ie) );
- if ( ret ) {
- fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret );
- exit(1);
- }
-}
-
-
-static void radeonWaitIrq( radeonContextPtr rmesa )
-{
- int ret;
-
- do {
- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
- &rmesa->iw, sizeof(rmesa->iw) );
- } while (ret && (errno == EINTR || errno == EBUSY));
-
- if ( ret ) {
- fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
- exit(1);
- }
-}
-
-
-static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
-{
- drm_radeon_sarea_t *sarea = rmesa->sarea;
-
- if (rmesa->do_irqs) {
- if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
- if (!rmesa->irqsEmitted) {
- while (radeonGetLastFrame (rmesa) < sarea->last_frame)
- ;
- }
- else {
- UNLOCK_HARDWARE( rmesa );
- radeonWaitIrq( rmesa );
- LOCK_HARDWARE( rmesa );
- }
- rmesa->irqsEmitted = 10;
+ for (i = 0; i + 1 < nr; i += 2) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+ (rmesa->radeon.tcl.aos[i].stride << 8) |
+ (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+ (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ OUT_BATCH(voffset);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ OUT_BATCH(voffset);
}
- if (rmesa->irqsEmitted) {
- radeonEmitIrqLocked( rmesa );
- rmesa->irqsEmitted--;
+ if (nr & 1) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+ (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ OUT_BATCH(voffset);
}
- }
- else {
- while (radeonGetLastFrame (rmesa) < sarea->last_frame) {
- UNLOCK_HARDWARE( rmesa );
- if (rmesa->do_usleeps)
- DO_USLEEP( 1 );
- LOCK_HARDWARE( rmesa );
+ for (i = 0; i + 1 < nr; i += 2) {
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[i+0].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[i+1].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
}
- }
-}
-
-/* Copy the back color buffer to the front color buffer.
- */
-void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
- const drm_clip_rect_t *rect)
-{
- radeonContextPtr rmesa;
- GLint nbox, i, ret;
- GLboolean missed_target;
- int64_t ust;
- __DRIscreenPrivate *psp;
-
- assert(dPriv);
- assert(dPriv->driContextPriv);
- assert(dPriv->driContextPriv->driverPrivate);
-
- rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-
- if ( RADEON_DEBUG & DEBUG_IOCTL ) {
- fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
- }
-
- RADEON_FIREVERTICES( rmesa );
- LOCK_HARDWARE( rmesa );
-
- /* Throttle the frame rate -- only allow one pending swap buffers
- * request at a time.
- */
- radeonWaitForFrameCompletion( rmesa );
- if (!rect)
- {
- UNLOCK_HARDWARE( rmesa );
- driWaitForVBlank( dPriv, & missed_target );
- LOCK_HARDWARE( rmesa );
- }
-
- nbox = dPriv->numClipRects; /* must be in locked region */
-
- for ( i = 0 ; i < nbox ; ) {
- GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
- drm_clip_rect_t *box = dPriv->pClipRects;
- drm_clip_rect_t *b = rmesa->sarea->boxes;
- GLint n = 0;
-
- for ( ; i < nr ; i++ ) {
-
- *b = box[i];
-
- if (rect)
- {
- if (rect->x1 > b->x1)
- b->x1 = rect->x1;
- if (rect->y1 > b->y1)
- b->y1 = rect->y1;
- if (rect->x2 < b->x2)
- b->x2 = rect->x2;
- if (rect->y2 < b->y2)
- b->y2 = rect->y2;
-
- if (b->x1 >= b->x2 || b->y1 >= b->y2)
- continue;
- }
-
- b++;
- n++;
- }
- rmesa->sarea->nbox = n;
-
- if (!n)
- continue;
-
- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
-
- if ( ret ) {
- fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
- UNLOCK_HARDWARE( rmesa );
- exit( 1 );
+ if (nr & 1) {
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[nr-1].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
}
}
+ END_BATCH();
- UNLOCK_HARDWARE( rmesa );
- if (!rect)
- {
- psp = dPriv->driScreenPriv;
- rmesa->swap_count++;
- (*psp->systemTime->getUST)( & ust );
- if ( missed_target ) {
- rmesa->swap_missed_count++;
- rmesa->swap_missed_ust = ust - rmesa->swap_ust;
- }
-
- rmesa->swap_ust = ust;
- rmesa->hw.all_dirty = GL_TRUE;
- }
-}
-
-void radeonPageFlip( __DRIdrawablePrivate *dPriv )
-{
- radeonContextPtr rmesa;
- GLint ret;
- GLboolean missed_target;
- __DRIscreenPrivate *psp;
-
- assert(dPriv);
- assert(dPriv->driContextPriv);
- assert(dPriv->driContextPriv->driverPrivate);
-
- rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
- psp = dPriv->driScreenPriv;
-
- if ( RADEON_DEBUG & DEBUG_IOCTL ) {
- fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
- rmesa->sarea->pfCurrentPage);
- }
-
- RADEON_FIREVERTICES( rmesa );
- LOCK_HARDWARE( rmesa );
-
- /* Need to do this for the perf box placement:
- */
- if (dPriv->numClipRects)
- {
- drm_clip_rect_t *box = dPriv->pClipRects;
- drm_clip_rect_t *b = rmesa->sarea->boxes;
- b[0] = box[0];
- rmesa->sarea->nbox = 1;
- }
-
- /* Throttle the frame rate -- only allow a few pending swap buffers
- * request at a time.
- */
- radeonWaitForFrameCompletion( rmesa );
- UNLOCK_HARDWARE( rmesa );
- driWaitForVBlank( dPriv, & missed_target );
- if ( missed_target ) {
- rmesa->swap_missed_count++;
- (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
- }
- LOCK_HARDWARE( rmesa );
-
- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
-
- UNLOCK_HARDWARE( rmesa );
-
- if ( ret ) {
- fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
- exit( 1 );
- }
-
- rmesa->swap_count++;
- (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
-
- /* Get ready for drawing next frame. Update the renderbuffers'
- * flippedOffset/Pitch fields so we draw into the right place.
- */
- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
- rmesa->sarea->pfCurrentPage);
-
- radeonUpdateDrawBuffer(rmesa->glCtx);
+#endif
}
-
/* ================================================================
* Buffer clear
*/
#define RADEON_MAX_CLEARS 256
-static void radeonClear( GLcontext *ctx, GLbitfield mask )
+static void radeonKernelClear(GLcontext *ctx, GLuint flags)
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
- drm_radeon_sarea_t *sarea = rmesa->sarea;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
+ drm_radeon_sarea_t *sarea = rmesa->radeon.sarea;
uint32_t clear;
- GLuint flags = 0;
- GLuint color_mask = 0;
GLint ret, i;
GLint cx, cy, cw, ch;
- if ( RADEON_DEBUG & DEBUG_IOCTL ) {
- fprintf( stderr, "radeonClear\n");
- }
-
- {
- LOCK_HARDWARE( rmesa );
- UNLOCK_HARDWARE( rmesa );
- if ( dPriv->numClipRects == 0 )
- return;
- }
-
- radeonFlush( ctx );
-
- if ( mask & BUFFER_BIT_FRONT_LEFT ) {
- flags |= RADEON_FRONT;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
- mask &= ~BUFFER_BIT_FRONT_LEFT;
- }
-
- if ( mask & BUFFER_BIT_BACK_LEFT ) {
- flags |= RADEON_BACK;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
- mask &= ~BUFFER_BIT_BACK_LEFT;
- }
-
- if ( mask & BUFFER_BIT_DEPTH ) {
- flags |= RADEON_DEPTH;
- mask &= ~BUFFER_BIT_DEPTH;
- }
-
- if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
- flags |= RADEON_STENCIL;
- mask &= ~BUFFER_BIT_STENCIL;
- }
-
- if ( mask ) {
- if (RADEON_DEBUG & DEBUG_FALLBACKS)
- fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
- _swrast_Clear( ctx, mask );
- }
-
- if ( !flags )
- return;
-
- if (rmesa->using_hyperz) {
- flags |= RADEON_USE_COMP_ZBUF;
-/* if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)
- flags |= RADEON_USE_HIERZ; */
- if (!(rmesa->state.stencil.hwBuffer) ||
- ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
- ((rmesa->state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
- flags |= RADEON_CLEAR_FASTZ;
- }
- }
-
- LOCK_HARDWARE( rmesa );
+ LOCK_HARDWARE( &rmesa->radeon );
/* compute region after locking: */
cx = ctx->DrawBuffer->_Xmin;
@@ -1112,7 +475,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
gp.param = RADEON_PARAM_LAST_CLEAR;
gp.value = (int *)&clear;
- ret = drmCommandWriteRead( rmesa->dri.fd,
+ ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
if ( ret ) {
@@ -1124,20 +487,20 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
break;
}
- if ( rmesa->do_usleeps ) {
- UNLOCK_HARDWARE( rmesa );
+ if ( rmesa->radeon.do_usleeps ) {
+ UNLOCK_HARDWARE( &rmesa->radeon );
DO_USLEEP( 1 );
- LOCK_HARDWARE( rmesa );
+ LOCK_HARDWARE( &rmesa->radeon );
}
}
/* Send current state to the hardware */
- radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
for ( i = 0 ; i < dPriv->numClipRects ; ) {
GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
drm_clip_rect_t *box = dPriv->pClipRects;
- drm_clip_rect_t *b = rmesa->sarea->boxes;
+ drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
drm_radeon_clear_t clear;
drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
GLint n = 0;
@@ -1172,105 +535,107 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
}
}
- rmesa->sarea->nbox = n;
+ rmesa->radeon.sarea->nbox = n;
clear.flags = flags;
- clear.clear_color = rmesa->state.color.clear;
- clear.clear_depth = rmesa->state.depth.clear;
+ clear.clear_color = rmesa->radeon.state.color.clear;
+ clear.clear_depth = rmesa->radeon.state.depth.clear;
clear.color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
- clear.depth_mask = rmesa->state.stencil.clear;
+ clear.depth_mask = rmesa->radeon.state.stencil.clear;
clear.depth_boxes = depth_boxes;
n--;
- b = rmesa->sarea->boxes;
+ b = rmesa->radeon.sarea->boxes;
for ( ; n >= 0 ; n-- ) {
depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2;
depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2;
- depth_boxes[n].f[CLEAR_DEPTH] =
- (float)rmesa->state.depth.clear;
+ depth_boxes[n].f[CLEAR_DEPTH] =
+ (float)rmesa->radeon.state.depth.clear;
}
- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
+ ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
&clear, sizeof(drm_radeon_clear_t));
if ( ret ) {
- UNLOCK_HARDWARE( rmesa );
+ UNLOCK_HARDWARE( &rmesa->radeon );
fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
exit( 1 );
}
}
-
- UNLOCK_HARDWARE( rmesa );
- rmesa->hw.all_dirty = GL_TRUE;
+ UNLOCK_HARDWARE( &rmesa->radeon );
}
-
-void radeonWaitForIdleLocked( radeonContextPtr rmesa )
+static void radeonClear( GLcontext *ctx, GLbitfield mask )
{
- int fd = rmesa->dri.fd;
- int to = 0;
- int ret, i = 0;
-
- rmesa->c_drawWaits++;
-
- do {
- do {
- ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE);
- } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY );
- } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) );
-
- if ( ret < 0 ) {
- UNLOCK_HARDWARE( rmesa );
- fprintf( stderr, "Error: Radeon timed out... exiting\n" );
- exit( -1 );
- }
-}
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
+ GLuint flags = 0;
+ GLuint color_mask = 0;
+ GLuint orig_mask = mask;
+ if ( RADEON_DEBUG & RADEON_IOCTL ) {
+ fprintf( stderr, "radeonClear\n");
+ }
-static void radeonWaitForIdle( radeonContextPtr rmesa )
-{
- LOCK_HARDWARE(rmesa);
- radeonWaitForIdleLocked( rmesa );
- UNLOCK_HARDWARE(rmesa);
-}
+ {
+ LOCK_HARDWARE( &rmesa->radeon );
+ UNLOCK_HARDWARE( &rmesa->radeon );
+ if ( dPriv->numClipRects == 0 )
+ return;
+ }
+ radeon_firevertices(&rmesa->radeon);
-void radeonFlush( GLcontext *ctx )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+ if ( mask & BUFFER_BIT_FRONT_LEFT ) {
+ flags |= RADEON_FRONT;
+ color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+ mask &= ~BUFFER_BIT_FRONT_LEFT;
+ }
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s\n", __FUNCTION__);
+ if ( mask & BUFFER_BIT_BACK_LEFT ) {
+ flags |= RADEON_BACK;
+ color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+ mask &= ~BUFFER_BIT_BACK_LEFT;
+ }
- if (rmesa->dma.flush)
- rmesa->dma.flush( rmesa );
+ if ( mask & BUFFER_BIT_DEPTH ) {
+ flags |= RADEON_DEPTH;
+ mask &= ~BUFFER_BIT_DEPTH;
+ }
- radeonEmitState( rmesa );
-
- if (rmesa->store.cmd_used)
- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
-}
+ if ( (mask & BUFFER_BIT_STENCIL) ) {
+ flags |= RADEON_STENCIL;
+ mask &= ~BUFFER_BIT_STENCIL;
+ }
-/* Make sure all commands have been sent to the hardware and have
- * completed processing.
- */
-void radeonFinish( GLcontext *ctx )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- radeonFlush( ctx );
-
- if (rmesa->do_irqs) {
- LOCK_HARDWARE( rmesa );
- radeonEmitIrqLocked( rmesa );
- UNLOCK_HARDWARE( rmesa );
- radeonWaitIrq( rmesa );
+ if ( mask ) {
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
+ fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
+ _swrast_Clear( ctx, mask );
}
- else
- radeonWaitForIdle( rmesa );
-}
+ if ( !flags )
+ return;
+
+ if (rmesa->using_hyperz) {
+ flags |= RADEON_USE_COMP_ZBUF;
+/* if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL)
+ flags |= RADEON_USE_HIERZ; */
+ if (((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+ ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
+ flags |= RADEON_CLEAR_FASTZ;
+ }
+ }
+
+ if (rmesa->radeon.radeonScreen->kernel_mm)
+ radeonUserClear(ctx, orig_mask);
+ else {
+ radeonKernelClear(ctx, flags);
+ rmesa->radeon.hw.all_dirty = GL_TRUE;
+ }
+}
void radeonInitIoctlFuncs( GLcontext *ctx )
{
diff --git a/radeon/radeon_ioctl.h b/radeon/radeon_ioctl.h
index 4e3a44d..deb53ae 100644
--- a/radeon/radeon_ioctl.h
+++ b/radeon/radeon_ioctl.h
@@ -38,31 +38,32 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/simple_list.h"
#include "radeon_lock.h"
+#include "radeon_bocs_wrapper.h"
-
-extern void radeonEmitState( radeonContextPtr rmesa );
-extern void radeonEmitVertexAOS( radeonContextPtr rmesa,
+extern void radeonEmitVertexAOS( r100ContextPtr rmesa,
GLuint vertex_size,
+ struct radeon_bo *bo,
GLuint offset );
-extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
GLuint vertex_format,
GLuint primitive,
GLuint vertex_nr );
-extern void radeonFlushElts( radeonContextPtr rmesa );
+extern void radeonFlushElts( GLcontext *ctx );
+
-extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
+extern GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
GLuint vertex_format,
GLuint primitive,
GLuint min_nr );
-extern void radeonEmitAOS( radeonContextPtr rmesa,
- struct radeon_dma_region **regions,
+
+extern void radeonEmitAOS( r100ContextPtr rmesa,
GLuint n,
GLuint offset );
-extern void radeonEmitBlit( radeonContextPtr rmesa,
+extern void radeonEmitBlit( r100ContextPtr rmesa,
GLuint color_fmt,
GLuint src_pitch,
GLuint src_offset,
@@ -72,30 +73,15 @@ extern void radeonEmitBlit( radeonContextPtr rmesa,
GLint dstx, GLint dsty,
GLuint w, GLuint h );
-extern void radeonEmitWait( radeonContextPtr rmesa, GLuint flags );
-
-extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * );
-extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa );
+extern void radeonEmitWait( r100ContextPtr rmesa, GLuint flags );
-extern void radeonAllocDmaRegion( radeonContextPtr rmesa,
- struct radeon_dma_region *region,
- int bytes,
- int alignment );
+extern void radeonFlushCmdBuf( r100ContextPtr rmesa, const char * );
-extern void radeonReleaseDmaRegion( radeonContextPtr rmesa,
- struct radeon_dma_region *region,
- const char *caller );
-
-extern void radeonCopyBuffer( __DRIdrawablePrivate *drawable,
- const drm_clip_rect_t *rect);
-extern void radeonPageFlip( __DRIdrawablePrivate *drawable );
extern void radeonFlush( GLcontext *ctx );
extern void radeonFinish( GLcontext *ctx );
-extern void radeonWaitForIdleLocked( radeonContextPtr rmesa );
-extern void radeonWaitForVBlank( radeonContextPtr rmesa );
extern void radeonInitIoctlFuncs( GLcontext *ctx );
-extern void radeonGetAllParams( radeonContextPtr rmesa );
-extern void radeonSetUpAtomList( radeonContextPtr rmesa );
+extern void radeonGetAllParams( r100ContextPtr rmesa );
+extern void radeonSetUpAtomList( r100ContextPtr rmesa );
/* ================================================================
* Helper macros:
@@ -105,33 +91,33 @@ extern void radeonSetUpAtomList( radeonContextPtr rmesa );
*/
#define RADEON_NEWPRIM( rmesa ) \
do { \
- if ( rmesa->dma.flush ) \
- rmesa->dma.flush( rmesa ); \
+ if ( rmesa->radeon.dma.flush ) \
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \
} while (0)
/* Can accomodate several state changes and primitive changes without
* actually firing the buffer.
*/
+
#define RADEON_STATECHANGE( rmesa, ATOM ) \
do { \
RADEON_NEWPRIM( rmesa ); \
rmesa->hw.ATOM.dirty = GL_TRUE; \
- rmesa->hw.is_dirty = GL_TRUE; \
+ rmesa->radeon.hw.is_dirty = GL_TRUE; \
} while (0)
-#define RADEON_DB_STATE( ATOM ) \
+#define RADEON_DB_STATE( ATOM ) \
memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \
rmesa->hw.ATOM.cmd_size * 4)
-static INLINE int RADEON_DB_STATECHANGE(
- radeonContextPtr rmesa,
- struct radeon_state_atom *atom )
+static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa,
+ struct radeon_state_atom *atom )
{
if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
- int *tmp;
+ GLuint *tmp;
RADEON_NEWPRIM( rmesa );
atom->dirty = GL_TRUE;
- rmesa->hw.is_dirty = GL_TRUE;
+ rmesa->radeon.hw.is_dirty = GL_TRUE;
tmp = atom->cmd;
atom->cmd = atom->lastcmd;
atom->lastcmd = tmp;
@@ -141,62 +127,55 @@ static INLINE int RADEON_DB_STATECHANGE(
return 0;
}
-
-/* Fire the buffered vertices no matter what.
- */
-#define RADEON_FIREVERTICES( rmesa ) \
-do { \
- if ( rmesa->store.cmd_used || rmesa->dma.flush ) { \
- radeonFlush( rmesa->glCtx ); \
- } \
-} while (0)
-
/* Command lengths. Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
* are available, you will also be adding an rmesa->state.max_state_size because
* r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
*/
#if RADEON_OLD_PACKETS
-#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int))
+#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2))+nr*2)
#define VERT_AOS_BUFSZ (0)
#define ELTS_BUFSZ(nr) (24 + nr * 2)
-#define VBUF_BUFSZ (6 * sizeof(int))
+#define VBUF_BUFSZ (8)
#else
-#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int))
-#define VERT_AOS_BUFSZ (5 * sizeof(int))
+#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2))
+#define VERT_AOS_BUFSZ (5)
#define ELTS_BUFSZ(nr) (16 + nr * 2)
-#define VBUF_BUFSZ (4 * sizeof(int))
+#define VBUF_BUFSZ (4)
#endif
+#define SCISSOR_BUFSZ (8)
+#define INDEX_BUFSZ (7)
-/* Ensure that a minimum amount of space is available in the command buffer.
- * This is used to ensure atomicity of state updates with the rendering requests
- * that rely on them.
- *
- * An alternative would be to implement a "soft lock" such that when the buffer
- * wraps at an inopportune time, we grab the lock, flush the current buffer,
- * and hang on to the lock until the critical section is finished and we flush
- * the buffer again and unlock.
- */
-static INLINE void radeonEnsureCmdBufSpace( radeonContextPtr rmesa,
- int bytes )
-{
- if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
- assert( bytes <= RADEON_CMD_BUF_SZ );
-}
-/* Alloc space in the command buffer
- */
-static INLINE char *radeonAllocCmdBuf( radeonContextPtr rmesa,
- int bytes, const char *where )
+static inline uint32_t cmdpacket3(int cmd_type)
{
- if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+ drm_radeon_cmd_header_t cmd;
+
+ cmd.i = 0;
+ cmd.header.cmd_type = cmd_type;
+
+ return (uint32_t)cmd.i;
- {
- char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
- rmesa->store.cmd_used += bytes;
- return head;
- }
}
+#define OUT_BATCH_PACKET3(packet, num_extra) do { \
+ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
+ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3)); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } else { \
+ OUT_BATCH(CP_PACKET2); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } \
+ } while(0)
+
+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do { \
+ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
+ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP)); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } else { \
+ OUT_BATCH(CP_PACKET2); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } \
+ } while(0)
+
+
#endif /* __RADEON_IOCTL_H__ */
diff --git a/radeon/radeon_lighting.c b/radeon/radeon_lighting.c
index ac3b94e..ba444f2 100644
--- a/radeon/radeon_lighting.c
+++ b/radeon/radeon_lighting.c
@@ -195,7 +195,7 @@ void radeonUpdateMaterial( GLcontext *ctx )
if (ctx->Light.ColorMaterialEnabled)
mask &= ~ctx->Light.ColorMaterialBitmask;
- if (RADEON_DEBUG & DEBUG_STATE)
+ if (RADEON_DEBUG & RADEON_STATE)
fprintf(stderr, "%s\n", __FUNCTION__);
@@ -234,7 +234,7 @@ void radeonUpdateMaterial( GLcontext *ctx )
check_twoside_fallback( ctx );
update_global_ambient( ctx );
}
- else if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_STATE))
+ else if (RADEON_DEBUG & (RADEON_PRIMS|DEBUG_STATE))
fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__);
}
@@ -624,7 +624,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
GLboolean tmp;
RADEON_STATECHANGE( rmesa, tcl );
- if (RADEON_DEBUG & DEBUG_STATE)
+ if (RADEON_DEBUG & RADEON_STATE)
fprintf(stderr, "%s %d\n", __FUNCTION__, ctx->_NeedEyeCoords);
if (ctx->_NeedEyeCoords)
diff --git a/radeon/radeon_lock.c b/radeon/radeon_lock.c
index 64bb3ca..02de8e5 100644
--- a/radeon/radeon_lock.c
+++ b/radeon/radeon_lock.c
@@ -41,30 +41,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/glheader.h"
#include "main/mtypes.h"
-#include "radeon_context.h"
+#include "main/colormac.h"
+#include "dri_util.h"
+#include "radeon_screen.h"
+#include "radeon_common.h"
#include "radeon_lock.h"
-#include "radeon_tex.h"
-#include "radeon_state.h"
-#include "radeon_ioctl.h"
-
#include "drirenderbuffer.h"
-#if DEBUG_LOCKING
-char *prevLockFile = NULL;
-int prevLockLine = 0;
-#endif
-
-/* Turn on/off page flipping according to the flags in the sarea:
- */
-static void radeonUpdatePageFlipping(radeonContextPtr rmesa)
-{
- rmesa->doPageFlip = rmesa->sarea->pfState;
- if (rmesa->glCtx->WinSysDrawBuffer) {
- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
- rmesa->sarea->pfCurrentPage);
- }
-}
-
/* Update the hardware state. This is called if another context has
* grabbed the hardware lock, which includes the X server. This
* function also updates the driver's window state after the X server
@@ -75,10 +58,11 @@ static void radeonUpdatePageFlipping(radeonContextPtr rmesa)
*/
void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
{
- __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
- __DRIdrawablePrivate *const readable = rmesa->dri.readable;
+ __DRIdrawablePrivate *const drawable = radeon_get_drawable(rmesa);
+ __DRIdrawablePrivate *const readable = radeon_get_readable(rmesa);
__DRIscreenPrivate *sPriv = rmesa->dri.screen;
- drm_radeon_sarea_t *sarea = rmesa->sarea;
+
+ assert(drawable != NULL);
drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
@@ -96,29 +80,89 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
}
if (rmesa->lastStamp != drawable->lastStamp) {
- radeonUpdatePageFlipping(rmesa);
- radeonSetCliprects(rmesa);
- radeonUpdateViewportOffset(rmesa->glCtx);
- driUpdateFramebufferSize(rmesa->glCtx, drawable);
+ radeon_window_moved(rmesa);
+ rmesa->lastStamp = drawable->lastStamp;
}
- RADEON_STATECHANGE(rmesa, ctx);
- if (rmesa->sarea->tiling_enabled) {
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
- RADEON_COLOR_TILE_ENABLE;
- } else {
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
- ~RADEON_COLOR_TILE_ENABLE;
- }
+ rmesa->vtbl.get_lock(rmesa);
+}
+#ifndef NDEBUG
+struct lock_debug {
+ const char* function;
+ const char* file;
+ int line;
+};
+
+static struct lock_debug ldebug = {0};
+#endif
+
+#if 0
+/** TODO: use atomic operations for reference counting **/
+/** gcc 4.2 has builtin functios for this **/
+#define ATOMIC_INC_AND_FETCH(atomic) __sync_add_and_fetch(&atomic, 1)
+#define ATOMIC_DEC_AND_FETCH(atomic) __sync_sub_and_fetch(&atomic, 1)
+#else
+#define ATOMIC_INC_AND_FETCH(atomic) (++atomic)
+#define ATOMIC_DEC_AND_FETCH(atomic) (--atomic)
+#endif
+
- if (sarea->ctx_owner != rmesa->dri.hwContext) {
- int i;
- sarea->ctx_owner = rmesa->dri.hwContext;
+void radeon_lock_hardware(radeonContextPtr radeon
+#ifndef NDEBUG
+ ,const char* function
+ ,const char* file
+ ,const int line
+#endif
+ )
+{
+ char ret = 0;
+ struct radeon_framebuffer *rfb = NULL;
+ struct radeon_renderbuffer *rrb = NULL;
+
+ if (radeon_get_drawable(radeon)) {
+ rfb = radeon_get_drawable(radeon)->driverPrivate;
+
+ if (rfb)
+ rrb = radeon_get_renderbuffer(&rfb->base,
+ rfb->base._ColorDrawBufferIndexes[0]);
+ }
- for (i = 0; i < rmesa->nr_heaps; i++) {
- DRI_AGE_TEXTURES(rmesa->texture_heaps[i]);
+ if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+ if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1)
+ {
+#ifndef NDEBUG
+ if ( RADEON_DEBUG & RADEON_SANITY )
+ fprintf(stderr, "*** %d times of recursive call to %s ***\n"
+ "Original call was from %s (file: %s line: %d)\n"
+ "Now call is coming from %s (file: %s line: %d)\n"
+ , radeon->dri.hwLockCount, __FUNCTION__
+ , ldebug.function, ldebug.file, ldebug.line
+ , function, file, line
+ );
+#endif
+ return;
}
+ DRM_CAS(radeon->dri.hwLock, radeon->dri.hwContext,
+ (DRM_LOCK_HELD | radeon->dri.hwContext), ret );
+ if (ret)
+ radeonGetLock(radeon, 0);
+#ifndef NDEBUG
+ ldebug.function = function;
+ ldebug.file = file;
+ ldebug.line = line;
+#endif
}
+}
- rmesa->lost_context = GL_TRUE;
+void radeon_unlock_hardware(radeonContextPtr radeon)
+{
+ if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+ if (ATOMIC_DEC_AND_FETCH(radeon->dri.hwLockCount) > 0)
+ {
+ return;
+ }
+ DRM_UNLOCK( radeon->dri.fd,
+ radeon->dri.hwLock,
+ radeon->dri.hwContext );
+ }
}
diff --git a/radeon/radeon_lock.h b/radeon/radeon_lock.h
index 86e96aa..da5a5b4 100644
--- a/radeon/radeon_lock.h
+++ b/radeon/radeon_lock.h
@@ -39,74 +39,31 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* Kevin E. Martin <martin@valinux.com>
*/
-#ifndef __RADEON_LOCK_H__
-#define __RADEON_LOCK_H__
+#ifndef COMMON_LOCK_H
+#define COMMON_LOCK_H
-extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
-
-/* Turn DEBUG_LOCKING on to find locking conflicts.
- */
-#define DEBUG_LOCKING 0
-
-#if DEBUG_LOCKING
-extern char *prevLockFile;
-extern int prevLockLine;
-
-#define DEBUG_LOCK() \
- do { \
- prevLockFile = (__FILE__); \
- prevLockLine = (__LINE__); \
- } while (0)
-
-#define DEBUG_RESET() \
- do { \
- prevLockFile = 0; \
- prevLockLine = 0; \
- } while (0)
-
-#define DEBUG_CHECK_LOCK() \
- do { \
- if ( prevLockFile ) { \
- fprintf( stderr, \
- "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \
- prevLockFile, prevLockLine, __FILE__, __LINE__ ); \
- exit( 1 ); \
- } \
- } while (0)
-
-#else
+#include "main/colormac.h"
+#include "radeon_screen.h"
+#include "radeon_common.h"
-#define DEBUG_LOCK()
-#define DEBUG_RESET()
-#define DEBUG_CHECK_LOCK()
+extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
+void radeon_lock_hardware(radeonContextPtr rmesa
+#ifndef NDEBUG
+ ,const char* function
+ ,const char* file
+ ,const int line
#endif
-
-/*
- * !!! We may want to separate locks from locks with validation. This
- * could be used to improve performance for those things commands that
- * do not do any drawing !!!
- */
+ );
+void radeon_unlock_hardware(radeonContextPtr rmesa);
/* Lock the hardware and validate our state.
*/
-#define LOCK_HARDWARE( rmesa ) \
- do { \
- char __ret = 0; \
- DEBUG_CHECK_LOCK(); \
- DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \
- (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret ); \
- if ( __ret ) \
- radeonGetLock( (rmesa), 0 ); \
- DEBUG_LOCK(); \
- } while (0)
-
-#define UNLOCK_HARDWARE( rmesa ) \
- do { \
- DRM_UNLOCK( (rmesa)->dri.fd, \
- (rmesa)->dri.hwLock, \
- (rmesa)->dri.hwContext ); \
- DEBUG_RESET(); \
- } while (0)
+#ifdef NDEBUG
+#define LOCK_HARDWARE( rmesa ) radeon_lock_hardware(rmesa)
+#else
+#define LOCK_HARDWARE( rmesa ) radeon_lock_hardware(rmesa, __FUNCTION__, __FILE__, __LINE__)
+#endif
+#define UNLOCK_HARDWARE( rmesa ) radeon_unlock_hardware(rmesa)
-#endif /* __RADEON_LOCK_H__ */
+#endif
diff --git a/radeon/radeon_maos.h b/radeon/radeon_maos.h
index b8935e8..b88eb19 100644
--- a/radeon/radeon_maos.h
+++ b/radeon/radeon_maos.h
@@ -38,6 +38,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_context.h"
extern void radeonEmitArrays( GLcontext *ctx, GLuint inputs );
-extern void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs );
#endif
diff --git a/radeon/radeon_maos_arrays.c b/radeon/radeon_maos_arrays.c
index 31eea13..08e1c5d 100644
--- a/radeon/radeon_maos_arrays.c
+++ b/radeon/radeon_maos_arrays.c
@@ -48,160 +48,35 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_maos.h"
#include "radeon_tcl.h"
-#if 0
-/* Usage:
- * - from radeon_tcl_render
- * - call radeonEmitArrays to ensure uptodate arrays in dma
- * - emit primitives (new type?) which reference the data
- * -- need to use elts for lineloop, quads, quadstrip/flat
- * -- other primitives are all well-formed (need tristrip-1,fake-poly)
- *
- */
-static void emit_ubyte_rgba3( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p\n",
- __FUNCTION__, count, stride, (void *)out);
-
- for (i = 0; i < count; i++) {
- out->red = *data;
- out->green = *(data+1);
- out->blue = *(data+2);
- out->alpha = 0xFF;
- out++;
- data += stride;
- }
-}
-
-static void emit_ubyte_rgba4( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int stride,
- int count )
+static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
+ GLvoid *data, int stride, int count)
{
int i;
- int *out = (int *)(rvb->address + rvb->start);
+ uint32_t *out;
+ int size = 1;
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- if (RADEON_DEBUG & DEBUG_VERTS)
+ if (RADEON_DEBUG & RADEON_VERTS)
fprintf(stderr, "%s count %d stride %d\n",
__FUNCTION__, count, stride);
- if (stride == 4)
- COPY_DWORDS( out, data, count );
- else
- for (i = 0; i < count; i++) {
- *out++ = LE32_TO_CPU(*(int *)data);
- data += stride;
- }
-}
-
-
-static void emit_ubyte_rgba( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int size,
- int stride,
- int count )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
-
- assert (!rvb->buf);
-
if (stride == 0) {
- radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
+ radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
count = 1;
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = 0;
- rvb->aos_size = 1;
+ aos->stride = 0;
}
else {
- radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = 1;
- rvb->aos_size = 1;
- }
-
- /* Emit the data
- */
- switch (size) {
- case 3:
- emit_ubyte_rgba3( ctx, rvb, data, stride, count );
- break;
- case 4:
- emit_ubyte_rgba4( ctx, rvb, data, stride, count );
- break;
- default:
- assert(0);
- exit(1);
- break;
+ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+ aos->stride = size;
}
-}
-#endif
-
-#if defined(USE_X86_ASM)
-#define COPY_DWORDS( dst, src, nr ) \
-do { \
- int __tmp; \
- __asm__ __volatile__( "rep ; movsl" \
- : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
- : "0" (nr), \
- "D" ((long)dst), \
- "S" ((long)src) ); \
-} while (0)
-#else
-#define COPY_DWORDS( dst, src, nr ) \
-do { \
- int j; \
- for ( j = 0 ; j < nr ; j++ ) \
- dst[j] = ((int *)src)[j]; \
- dst += nr; \
-} while (0)
-#endif
-
-static void emit_vecfog( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- GLfloat *out;
-
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
+ aos->components = size;
+ aos->count = count;
- assert (!rvb->buf);
-
- if (stride == 0) {
- radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
- count = 1;
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = 0;
- rvb->aos_size = 1;
- }
- else {
- radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = 1;
- rvb->aos_size = 1;
- }
/* Emit the data
*/
- out = (GLfloat *)(rvb->address + rvb->start);
+ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
for (i = 0; i < count; i++) {
out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
out++;
@@ -209,170 +84,10 @@ static void emit_vecfog( GLcontext *ctx,
}
}
-static void emit_vec4( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
-
- if (stride == 4)
- COPY_DWORDS( out, data, count );
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out++;
- data += stride;
- }
-}
-
-
-static void emit_vec8( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
-
- if (stride == 8)
- COPY_DWORDS( out, data, count*2 );
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data+4);
- out += 2;
- data += stride;
- }
-}
-
-static void emit_vec12( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
- __FUNCTION__, count, stride, (void *)out, (void *)data);
-
- if (stride == 12)
- COPY_DWORDS( out, data, count*3 );
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data+4);
- out[2] = *(int *)(data+8);
- out += 3;
- data += stride;
- }
-}
-
-static void emit_vec16( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int stride,
- int count )
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
-
- if (stride == 16)
- COPY_DWORDS( out, data, count*4 );
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data+4);
- out[2] = *(int *)(data+8);
- out[3] = *(int *)(data+12);
- out += 4;
- data += stride;
- }
-}
-
-
-static void emit_vector( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int size,
- int stride,
- int count )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d size %d stride %d\n",
- __FUNCTION__, count, size, stride);
-
- assert (!rvb->buf);
-
- if (stride == 0) {
- radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
- count = 1;
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = 0;
- rvb->aos_size = size;
- }
- else {
- radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = size;
- rvb->aos_size = size;
- }
-
- /* Emit the data
- */
- switch (size) {
- case 1:
- emit_vec4( ctx, rvb, data, stride, count );
- break;
- case 2:
- emit_vec8( ctx, rvb, data, stride, count );
- break;
- case 3:
- emit_vec12( ctx, rvb, data, stride, count );
- break;
- case 4:
- emit_vec16( ctx, rvb, data, stride, count );
- break;
- default:
- assert(0);
- exit(1);
- break;
- }
-
-}
-
-
-
-static void emit_s0_vec( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int stride,
- int count )
+static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count)
{
int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
+ if (RADEON_DEBUG & RADEON_VERTS)
fprintf(stderr, "%s count %d stride %d\n",
__FUNCTION__, count, stride);
@@ -384,16 +99,11 @@ static void emit_s0_vec( GLcontext *ctx,
}
}
-static void emit_stq_vec( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int stride,
- int count )
+static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count)
{
int i;
- int *out = (int *)(rvb->address + rvb->start);
- if (RADEON_DEBUG & DEBUG_VERTS)
+ if (RADEON_DEBUG & RADEON_VERTS)
fprintf(stderr, "%s count %d stride %d\n",
__FUNCTION__, count, stride);
@@ -409,21 +119,16 @@ static void emit_stq_vec( GLcontext *ctx,
-static void emit_tex_vector( GLcontext *ctx,
- struct radeon_dma_region *rvb,
- char *data,
- int size,
- int stride,
- int count )
+static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos,
+ GLvoid *data, int size, int stride, int count)
{
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
int emitsize;
+ uint32_t *out;
- if (RADEON_DEBUG & DEBUG_VERTS)
+ if (RADEON_DEBUG & RADEON_VERTS)
fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
- assert (!rvb->buf);
-
switch (size) {
case 4: emitsize = 3; break;
case 3: emitsize = 3; break;
@@ -432,34 +137,33 @@ static void emit_tex_vector( GLcontext *ctx,
if (stride == 0) {
- radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
+ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * 4, 32);
count = 1;
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = 0;
- rvb->aos_size = emitsize;
+ aos->stride = 0;
}
else {
- radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
- rvb->aos_start = GET_START(rvb);
- rvb->aos_stride = emitsize;
- rvb->aos_size = emitsize;
+ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * count * 4, 32);
+ aos->stride = emitsize;
}
+ aos->components = emitsize;
+ aos->count = count;
/* Emit the data
*/
+ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
switch (size) {
case 1:
- emit_s0_vec( ctx, rvb, data, stride, count );
+ emit_s0_vec( out, data, stride, count );
break;
case 2:
- emit_vec8( ctx, rvb, data, stride, count );
+ radeonEmitVec8( out, data, stride, count );
break;
case 3:
- emit_vec12( ctx, rvb, data, stride, count );
+ radeonEmitVec12( out, data, stride, count );
break;
case 4:
- emit_stq_vec( ctx, rvb, data, stride, count );
+ emit_stq_vec( out, data, stride, count );
break;
default:
assert(0);
@@ -476,27 +180,26 @@ static void emit_tex_vector( GLcontext *ctx,
*/
void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
{
- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+ r100ContextPtr rmesa = R100_CONTEXT( ctx );
struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
- struct radeon_dma_region **component = rmesa->tcl.aos_components;
GLuint nr = 0;
GLuint vfmt = 0;
GLuint count = VB->Count;
GLuint vtx, unit;
#if 0
- if (RADEON_DEBUG & DEBUG_VERTS)
+ if (RADEON_DEBUG & RADEON_VERTS)
_tnl_print_vert_flags( __FUNCTION__, inputs );
#endif
if (1) {
if (!rmesa->tcl.obj.buf)
- emit_vector( ctx,
- &rmesa->tcl.obj,
- (char *)VB->ObjPtr->data,
- VB->ObjPtr->size,
- VB->ObjPtr->stride,
- count);
+ rcommon_emit_vector( ctx,
+ &(rmesa->tcl.aos[nr]),
+ (char *)VB->ObjPtr->data,
+ VB->ObjPtr->size,
+ VB->ObjPtr->stride,
+ count);
switch( VB->ObjPtr->size ) {
case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
@@ -505,21 +208,21 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
default:
break;
}
- component[nr++] = &rmesa->tcl.obj;
+ nr++;
}
if (inputs & VERT_BIT_NORMAL) {
if (!rmesa->tcl.norm.buf)
- emit_vector( ctx,
- &(rmesa->tcl.norm),
- (char *)VB->NormalPtr->data,
- 3,
- VB->NormalPtr->stride,
- count);
+ rcommon_emit_vector( ctx,
+ &(rmesa->tcl.aos[nr]),
+ (char *)VB->NormalPtr->data,
+ 3,
+ VB->NormalPtr->stride,
+ count);
vfmt |= RADEON_CP_VC_FRMT_N0;
- component[nr++] = &rmesa->tcl.norm;
+ nr++;
}
if (inputs & VERT_BIT_COLOR0) {
@@ -537,31 +240,30 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
}
if (!rmesa->tcl.rgba.buf)
- emit_vector( ctx,
- &(rmesa->tcl.rgba),
- (char *)VB->ColorPtr[0]->data,
- emitsize,
- VB->ColorPtr[0]->stride,
- count);
-
-
- component[nr++] = &rmesa->tcl.rgba;
+ rcommon_emit_vector( ctx,
+ &(rmesa->tcl.aos[nr]),
+ (char *)VB->ColorPtr[0]->data,
+ emitsize,
+ VB->ColorPtr[0]->stride,
+ count);
+
+ nr++;
}
if (inputs & VERT_BIT_COLOR1) {
if (!rmesa->tcl.spec.buf) {
- emit_vector( ctx,
- &rmesa->tcl.spec,
- (char *)VB->SecondaryColorPtr[0]->data,
- 3,
- VB->SecondaryColorPtr[0]->stride,
- count);
+ rcommon_emit_vector( ctx,
+ &(rmesa->tcl.aos[nr]),
+ (char *)VB->SecondaryColorPtr[0]->data,
+ 3,
+ VB->SecondaryColorPtr[0]->stride,
+ count);
}
vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
- component[nr++] = &rmesa->tcl.spec;
+ nr++;
}
/* FIXME: not sure if this is correct. May need to stitch this together with
@@ -570,13 +272,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
if (inputs & VERT_BIT_FOG) {
if (!rmesa->tcl.fog.buf)
emit_vecfog( ctx,
- &(rmesa->tcl.fog),
+ &(rmesa->tcl.aos[nr]),
(char *)VB->FogCoordPtr->data,
VB->FogCoordPtr->stride,
count);
vfmt |= RADEON_CP_VC_FRMT_FPFOG;
- component[nr++] = &rmesa->tcl.fog;
+ nr++;
}
@@ -587,11 +289,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
if (inputs & VERT_BIT_TEX(unit)) {
if (!rmesa->tcl.tex[unit].buf)
emit_tex_vector( ctx,
- &(rmesa->tcl.tex[unit]),
+ &(rmesa->tcl.aos[nr]),
(char *)VB->TexCoordPtr[unit]->data,
VB->TexCoordPtr[unit]->size,
VB->TexCoordPtr[unit]->stride,
count );
+ nr++;
vfmt |= RADEON_ST_BIT(unit);
/* assume we need the 3rd coord if texgen is active for r/q OR at least
@@ -609,7 +312,6 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
(swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
}
- component[nr++] = &rmesa->tcl.tex[unit];
}
}
@@ -622,34 +324,3 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
rmesa->tcl.vertex_format = vfmt;
}
-
-void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
- GLuint unit;
-
-#if 0
- if (RADEON_DEBUG & DEBUG_VERTS)
- _tnl_print_vert_flags( __FUNCTION__, newinputs );
-#endif
-
- if (newinputs & VERT_BIT_POS)
- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
-
- if (newinputs & VERT_BIT_NORMAL)
- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
-
- if (newinputs & VERT_BIT_COLOR0)
- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
-
- if (newinputs & VERT_BIT_COLOR1)
- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
-
- if (newinputs & VERT_BIT_FOG)
- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
-
- for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
- if (newinputs & VERT_BIT_TEX(unit))
- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
- }
-}
diff --git a/radeon/radeon_maos_vbtmp.h b/radeon/radeon_maos_vbtmp.h
index 034cda8..5157831 100644
--- a/radeon/radeon_maos_vbtmp.h
+++ b/radeon/radeon_maos_vbtmp.h
@@ -54,8 +54,7 @@ static void TAG(emit)( GLcontext *ctx,
union emit_union *v = (union emit_union *)dest;
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s\n", __FUNCTION__);
+ radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __FUNCTION__);
coord = (GLuint (*)[4])VB->ObjPtr->data;
coord_stride = VB->ObjPtr->stride;
diff --git a/radeon/radeon_maos_verts.c b/radeon/radeon_maos_verts.c
index 126d072..78ec119 100644
--- a/radeon/radeon_maos_verts.c
+++ b/radeon/radeon_maos_verts.c
@@ -310,7 +310,7 @@ static void init_tcl_verts( void )
void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
GLuint req = 0;
GLuint unit;
@@ -374,14 +374,15 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
break;
if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
- rmesa->tcl.indexed_verts.buf)
+ rmesa->radeon.tcl.aos[0].bo)
return;
- if (rmesa->tcl.indexed_verts.buf)
+ if (rmesa->radeon.tcl.aos[0].bo)
radeonReleaseArrays( ctx, ~0 );
- radeonAllocDmaRegion( rmesa,
- &rmesa->tcl.indexed_verts,
+ radeonAllocDmaRegion( &rmesa->radeon,
+ &rmesa->radeon.tcl.aos[0].bo,
+ &rmesa->radeon.tcl.aos[0].offset,
VB->Count * setup_tab[i].vertex_size * 4,
4);
@@ -421,29 +422,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
setup_tab[i].emit( ctx, 0, VB->Count,
- rmesa->tcl.indexed_verts.address +
- rmesa->tcl.indexed_verts.start );
+ rmesa->radeon.tcl.aos[0].bo->ptr + rmesa->radeon.tcl.aos[0].offset);
+ // rmesa->radeon.tcl.aos[0].size = setup_tab[i].vertex_size;
+ rmesa->radeon.tcl.aos[0].stride = setup_tab[i].vertex_size;
rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
- rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
- rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
- rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
-
- rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts;
- rmesa->tcl.nr_aos_components = 1;
+ rmesa->radeon.tcl.aos_count = 1;
}
-
-void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-
-#if 0
- if (RADEON_DEBUG & DEBUG_VERTS)
- _tnl_print_vert_flags( __FUNCTION__, newinputs );
-#endif
-
- if (newinputs)
- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
-}
diff --git a/radeon/radeon_mipmap_tree.c b/radeon/radeon_mipmap_tree.c
new file mode 100644
index 0000000..38db305
--- /dev/null
+++ b/radeon/radeon_mipmap_tree.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_mipmap_tree.h"
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "main/simple_list.h"
+#include "main/texcompress.h"
+#include "main/texformat.h"
+
+static GLuint radeon_compressed_texture_size(GLcontext *ctx,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLuint mesaFormat)
+{
+ GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat);
+
+ if (mesaFormat == MESA_FORMAT_RGB_DXT1 ||
+ mesaFormat == MESA_FORMAT_RGBA_DXT1) {
+ if (width + 3 < 8) /* width one block */
+ size = size * 4;
+ else if (width + 3 < 16)
+ size = size * 2;
+ } else {
+ /* DXT3/5, 16 bytes per block */
+ // WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n");
+ if (width + 3 < 8)
+ size = size * 2;
+ }
+
+ return size;
+}
+
+
+static int radeon_compressed_num_bytes(GLuint mesaFormat)
+{
+ int bytes = 0;
+ switch(mesaFormat) {
+
+ case MESA_FORMAT_RGB_FXT1:
+ case MESA_FORMAT_RGBA_FXT1:
+ case MESA_FORMAT_RGB_DXT1:
+ case MESA_FORMAT_RGBA_DXT1:
+ bytes = 2;
+ break;
+
+ case MESA_FORMAT_RGBA_DXT3:
+ case MESA_FORMAT_RGBA_DXT5:
+ bytes = 4;
+ default:
+ break;
+ }
+
+ return bytes;
+}
+
+/**
+ * Compute sizes and fill in offset and blit information for the given
+ * image (determined by \p face and \p level).
+ *
+ * \param curOffset points to the offset at which the image is to be stored
+ * and is updated by this function according to the size of the image.
+ */
+static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree *mt,
+ GLuint face, GLuint level, GLuint* curOffset)
+{
+ radeon_mipmap_level *lvl = &mt->levels[level];
+ uint32_t row_align;
+
+ /* Find image size in bytes */
+ if (mt->compressed) {
+ /* TODO: Is this correct? Need test cases for compressed textures! */
+ row_align = rmesa->texture_compressed_row_align - 1;
+ lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
+ lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx,
+ lvl->width, lvl->height, lvl->depth, mt->compressed);
+ } else if (mt->target == GL_TEXTURE_RECTANGLE_NV) {
+ row_align = rmesa->texture_rect_row_align - 1;
+ lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
+ lvl->size = lvl->rowstride * lvl->height;
+ } else if (mt->tilebits & RADEON_TXO_MICRO_TILE) {
+ /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+ * though the actual offset may be different (if texture is less than
+ * 32 bytes width) to the untiled case */
+ lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31;
+ lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth;
+ } else {
+ row_align = rmesa->texture_row_align - 1;
+ lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
+ lvl->size = lvl->rowstride * lvl->height * lvl->depth;
+ }
+ assert(lvl->size > 0);
+
+ /* All images are aligned to a 32-byte offset */
+ *curOffset = (*curOffset + 0x1f) & ~0x1f;
+ lvl->faces[face].offset = *curOffset;
+ *curOffset += lvl->size;
+
+ if (RADEON_DEBUG & RADEON_TEXTURE)
+ fprintf(stderr,
+ "level %d, face %d: rs:%d %dx%d at %d\n",
+ level, face, lvl->rowstride, lvl->width, lvl->height, lvl->faces[face].offset);
+}
+
+static GLuint minify(GLuint size, GLuint levels)
+{
+ size = size >> levels;
+ if (size < 1)
+ size = 1;
+ return size;
+}
+
+
+static void calculate_miptree_layout_r100(radeonContextPtr rmesa, radeon_mipmap_tree *mt)
+{
+ GLuint curOffset;
+ GLuint numLevels;
+ GLuint i;
+ GLuint face;
+
+ numLevels = mt->lastLevel - mt->firstLevel + 1;
+ assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels);
+
+ curOffset = 0;
+ for(face = 0; face < mt->faces; face++) {
+
+ for(i = 0; i < numLevels; i++) {
+ mt->levels[i].width = minify(mt->width0, i);
+ mt->levels[i].height = minify(mt->height0, i);
+ mt->levels[i].depth = minify(mt->depth0, i);
+ compute_tex_image_offset(rmesa, mt, face, i, &curOffset);
+ }
+ }
+
+ /* Note the required size in memory */
+ mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+}
+
+static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_tree *mt)
+{
+ GLuint curOffset;
+ GLuint numLevels;
+ GLuint i;
+
+ numLevels = mt->lastLevel - mt->firstLevel + 1;
+ assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels);
+
+ curOffset = 0;
+ for(i = 0; i < numLevels; i++) {
+ GLuint face;
+
+ mt->levels[i].width = minify(mt->width0, i);
+ mt->levels[i].height = minify(mt->height0, i);
+ mt->levels[i].depth = minify(mt->depth0, i);
+
+ for(face = 0; face < mt->faces; face++)
+ compute_tex_image_offset(rmesa, mt, face, i, &curOffset);
+ }
+
+ /* Note the required size in memory */
+ mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+}
+
+/**
+ * Create a new mipmap tree, calculate its layout and allocate memory.
+ */
+radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t,
+ GLenum target, GLenum internal_format, GLuint firstLevel, GLuint lastLevel,
+ GLuint width0, GLuint height0, GLuint depth0,
+ GLuint bpp, GLuint tilebits, GLuint compressed)
+{
+ radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree);
+
+ mt->radeon = rmesa;
+ mt->internal_format = internal_format;
+ mt->refcount = 1;
+ mt->t = t;
+ mt->target = target;
+ mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+ mt->firstLevel = firstLevel;
+ mt->lastLevel = lastLevel;
+ mt->width0 = width0;
+ mt->height0 = height0;
+ mt->depth0 = depth0;
+ mt->bpp = compressed ? radeon_compressed_num_bytes(compressed) : bpp;
+ mt->tilebits = tilebits;
+ mt->compressed = compressed;
+
+ if (rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R300)
+ calculate_miptree_layout_r300(rmesa, mt);
+ else
+ calculate_miptree_layout_r100(rmesa, mt);
+
+ mt->bo = radeon_bo_open(rmesa->radeonScreen->bom,
+ 0, mt->totalsize, 1024,
+ RADEON_GEM_DOMAIN_VRAM,
+ 0);
+
+ return mt;
+}
+
+void radeon_miptree_reference(radeon_mipmap_tree *mt)
+{
+ mt->refcount++;
+ assert(mt->refcount > 0);
+}
+
+void radeon_miptree_unreference(radeon_mipmap_tree *mt)
+{
+ if (!mt)
+ return;
+
+ assert(mt->refcount > 0);
+ mt->refcount--;
+ if (!mt->refcount) {
+ radeon_bo_unref(mt->bo);
+ free(mt);
+ }
+}
+
+
+/**
+ * Calculate first and last mip levels for the given texture object,
+ * where the dimensions are taken from the given texture image at
+ * the given level.
+ *
+ * Note: level is the OpenGL level number, which is not necessarily the same
+ * as the first level that is actually present.
+ *
+ * The base level image of the given texture face must be non-null,
+ * or this will fail.
+ */
+static void calculate_first_last_level(struct gl_texture_object *tObj,
+ GLuint *pfirstLevel, GLuint *plastLevel,
+ GLuint face, GLuint level)
+{
+ const struct gl_texture_image * const baseImage =
+ tObj->Image[face][level];
+
+ assert(baseImage);
+
+ /* These must be signed values. MinLod and MaxLod can be negative numbers,
+ * and having firstLevel and lastLevel as signed prevents the need for
+ * extra sign checks.
+ */
+ int firstLevel;
+ int lastLevel;
+
+ /* Yes, this looks overly complicated, but it's all needed.
+ */
+ switch (tObj->Target) {
+ case GL_TEXTURE_1D:
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_3D:
+ case GL_TEXTURE_CUBE_MAP:
+ if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
+ /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
+ */
+ firstLevel = lastLevel = tObj->BaseLevel;
+ } else {
+ firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
+ firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+ firstLevel = MIN2(firstLevel, level + baseImage->MaxLog2);
+ lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
+ lastLevel = MAX2(lastLevel, tObj->BaseLevel);
+ lastLevel = MIN2(lastLevel, level + baseImage->MaxLog2);
+ lastLevel = MIN2(lastLevel, tObj->MaxLevel);
+ lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+ }
+ break;
+ case GL_TEXTURE_RECTANGLE_NV:
+ case GL_TEXTURE_4D_SGIS:
+ firstLevel = lastLevel = 0;
+ break;
+ default:
+ return;
+ }
+
+ /* save these values */
+ *pfirstLevel = firstLevel;
+ *plastLevel = lastLevel;
+}
+
+
+/**
+ * Checks whether the given miptree can hold the given texture image at the
+ * given face and level.
+ */
+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
+ struct gl_texture_image *texImage, GLuint face, GLuint level)
+{
+ radeon_mipmap_level *lvl;
+
+ if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel)
+ return GL_FALSE;
+
+ if (texImage->InternalFormat != mt->internal_format ||
+ texImage->IsCompressed != mt->compressed)
+ return GL_FALSE;
+
+ if (!texImage->IsCompressed &&
+ !mt->compressed &&
+ texImage->TexFormat->TexelBytes != mt->bpp)
+ return GL_FALSE;
+
+ lvl = &mt->levels[level - mt->firstLevel];
+ if (lvl->width != texImage->Width ||
+ lvl->height != texImage->Height ||
+ lvl->depth != texImage->Depth)
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Checks whether the given miptree has the right format to store the given texture object.
+ */
+GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj)
+{
+ struct gl_texture_image *firstImage;
+ GLuint compressed;
+ GLuint numfaces = 1;
+ GLuint firstLevel, lastLevel;
+
+ calculate_first_last_level(texObj, &firstLevel, &lastLevel, 0, texObj->BaseLevel);
+ if (texObj->Target == GL_TEXTURE_CUBE_MAP)
+ numfaces = 6;
+
+ firstImage = texObj->Image[0][firstLevel];
+ compressed = firstImage->IsCompressed ? firstImage->TexFormat->MesaFormat : 0;
+
+ return (mt->firstLevel == firstLevel &&
+ mt->lastLevel == lastLevel &&
+ mt->width0 == firstImage->Width &&
+ mt->height0 == firstImage->Height &&
+ mt->depth0 == firstImage->Depth &&
+ mt->compressed == compressed &&
+ (!mt->compressed ? (mt->bpp == firstImage->TexFormat->TexelBytes) : 1));
+}
+
+
+/**
+ * Try to allocate a mipmap tree for the given texture that will fit the
+ * given image in the given position.
+ */
+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t,
+ radeon_texture_image *image, GLuint face, GLuint level)
+{
+ GLuint compressed = image->base.IsCompressed ? image->base.TexFormat->MesaFormat : 0;
+ GLuint numfaces = 1;
+ GLuint firstLevel, lastLevel;
+
+ assert(!t->mt);
+
+ calculate_first_last_level(&t->base, &firstLevel, &lastLevel, face, level);
+ if (t->base.Target == GL_TEXTURE_CUBE_MAP)
+ numfaces = 6;
+
+ if (level != firstLevel || face >= numfaces)
+ return;
+
+ t->mt = radeon_miptree_create(rmesa, t, t->base.Target,
+ image->base.InternalFormat,
+ firstLevel, lastLevel,
+ image->base.Width, image->base.Height, image->base.Depth,
+ image->base.TexFormat->TexelBytes, t->tile_bits, compressed);
+}
+
+/* Although we use the image_offset[] array to store relative offsets
+ * to cube faces, Mesa doesn't know anything about this and expects
+ * each cube face to be treated as a separate image.
+ *
+ * These functions present that view to mesa:
+ */
+void
+radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets)
+{
+ if (mt->target != GL_TEXTURE_3D || mt->faces == 1)
+ offsets[0] = 0;
+ else {
+ int i;
+ for (i = 0; i < 6; i++)
+ offsets[i] = mt->levels[level].faces[i].offset;
+ }
+}
+
+GLuint
+radeon_miptree_image_offset(radeon_mipmap_tree *mt,
+ GLuint face, GLuint level)
+{
+ if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
+ return (mt->levels[level].faces[face].offset);
+ else
+ return mt->levels[level].faces[0].offset;
+}
diff --git a/radeon/radeon_mipmap_tree.h b/radeon/radeon_mipmap_tree.h
new file mode 100644
index 0000000..db28252
--- /dev/null
+++ b/radeon/radeon_mipmap_tree.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_MIPMAP_TREE_H_
+#define __RADEON_MIPMAP_TREE_H_
+
+#include "radeon_common.h"
+
+typedef struct _radeon_mipmap_tree radeon_mipmap_tree;
+typedef struct _radeon_mipmap_level radeon_mipmap_level;
+typedef struct _radeon_mipmap_image radeon_mipmap_image;
+
+struct _radeon_mipmap_image {
+ GLuint offset; /** Offset of this image from the start of mipmap tree buffer, in bytes */
+};
+
+struct _radeon_mipmap_level {
+ GLuint width;
+ GLuint height;
+ GLuint depth;
+ GLuint size; /** Size of each image, in bytes */
+ GLuint rowstride; /** in bytes */
+ radeon_mipmap_image faces[6];
+};
+
+/* store the max possible in the miptree */
+#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 13
+
+/**
+ * A mipmap tree contains texture images in the layout that the hardware
+ * expects.
+ *
+ * The meta-data of mipmap trees is immutable, i.e. you cannot change the
+ * layout on-the-fly; however, the texture contents (i.e. texels) can be
+ * changed.
+ */
+struct _radeon_mipmap_tree {
+ radeonContextPtr radeon;
+ radeonTexObj *t;
+ struct radeon_bo *bo;
+ GLuint refcount;
+
+ GLuint totalsize; /** total size of the miptree, in bytes */
+
+ GLenum target; /** GL_TEXTURE_xxx */
+ GLenum internal_format;
+ GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */
+ GLuint firstLevel; /** First mip level stored in this mipmap tree */
+ GLuint lastLevel; /** Last mip level stored in this mipmap tree */
+
+ GLuint width0; /** Width of firstLevel image */
+ GLuint height0; /** Height of firstLevel image */
+ GLuint depth0; /** Depth of firstLevel image */
+
+ GLuint bpp; /** Bytes per texel */
+ GLuint tilebits; /** RADEON_TXO_xxx_TILE */
+ GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */
+
+ radeon_mipmap_level levels[RADEON_MIPTREE_MAX_TEXTURE_LEVELS];
+};
+
+radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t,
+ GLenum target, GLenum internal_format, GLuint firstLevel, GLuint lastLevel,
+ GLuint width0, GLuint height0, GLuint depth0,
+ GLuint bpp, GLuint tilebits, GLuint compressed);
+void radeon_miptree_reference(radeon_mipmap_tree *mt);
+void radeon_miptree_unreference(radeon_mipmap_tree *mt);
+
+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
+ struct gl_texture_image *texImage, GLuint face, GLuint level);
+GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj);
+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t,
+ radeon_texture_image *texImage, GLuint face, GLuint level);
+GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt,
+ GLuint face, GLuint level);
+void radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets);
+#endif /* __RADEON_MIPMAP_TREE_H_ */
diff --git a/radeon/radeon_queryobj.c b/radeon/radeon_queryobj.c
new file mode 100644
index 0000000..b79d864
--- /dev/null
+++ b/radeon/radeon_queryobj.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright © 2008-2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Maciej Cencora <m.cencora@gmail.com>
+ *
+ */
+#include "radeon_common.h"
+#include "radeon_queryobj.h"
+#include "radeon_debug.h"
+
+#include "main/imports.h"
+#include "main/simple_list.h"
+
+static int radeonQueryIsFlushed(GLcontext *ctx, struct gl_query_object *q)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ struct radeon_query_object *tmp, *query = (struct radeon_query_object *)q;
+
+ foreach(tmp, &radeon->query.not_flushed_head) {
+ if (tmp == query) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct radeon_query_object *query = (struct radeon_query_object *)q;
+ uint32_t *result;
+ int i;
+
+ radeon_print(RADEON_STATE, RADEON_VERBOSE,
+ "%s: query id %d, result %d\n",
+ __FUNCTION__, query->Base.Id, (int) query->Base.Result);
+
+ radeon_bo_map(query->bo, GL_FALSE);
+
+ result = query->bo->ptr;
+
+ query->Base.Result = 0;
+ for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) {
+ query->Base.Result += result[i];
+ radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, result[i]);
+ }
+
+ radeon_bo_unmap(query->bo);
+}
+
+static struct gl_query_object * radeonNewQueryObject(GLcontext *ctx, GLuint id)
+{
+ struct radeon_query_object *query;
+
+ query = _mesa_calloc(sizeof(struct radeon_query_object));
+
+ query->Base.Id = id;
+ query->Base.Result = 0;
+ query->Base.Active = GL_FALSE;
+ query->Base.Ready = GL_TRUE;
+
+ radeon_print(RADEON_STATE, RADEON_VERBOSE,"%s: query id %d\n", __FUNCTION__, query->Base.Id);
+
+ return &query->Base;
+}
+
+static void radeonDeleteQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct radeon_query_object *query = (struct radeon_query_object *)q;
+
+ radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+ if (query->bo) {
+ radeon_bo_unref(query->bo);
+ }
+
+ _mesa_free(query);
+}
+
+static void radeonWaitQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct radeon_query_object *query = (struct radeon_query_object *)q;
+
+ /* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */
+ if (!radeonQueryIsFlushed(ctx, q))
+ ctx->Driver.Flush(ctx);
+
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset);
+
+ radeonQueryGetResult(ctx, q);
+
+ query->Base.Ready = GL_TRUE;
+}
+
+
+static void radeonBeginQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ struct radeon_query_object *query = (struct radeon_query_object *)q;
+
+ radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+ assert(radeon->query.current == NULL);
+
+ if (radeon->dma.flush)
+ radeon->dma.flush(radeon->glCtx);
+
+ if (!query->bo) {
+ query->bo = radeon_bo_open(radeon->radeonScreen->bom, 0, RADEON_QUERY_PAGE_SIZE, RADEON_QUERY_PAGE_SIZE, RADEON_GEM_DOMAIN_GTT, 0);
+ }
+ query->curr_offset = 0;
+
+ radeon->query.current = query;
+
+ radeon->query.queryobj.dirty = GL_TRUE;
+ radeon->hw.is_dirty = GL_TRUE;
+ insert_at_tail(&radeon->query.not_flushed_head, query);
+
+}
+
+void radeonEmitQueryEnd(GLcontext *ctx)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ struct radeon_query_object *query = radeon->query.current;
+
+ if (!query)
+ return;
+
+ if (query->emitted_begin == GL_FALSE)
+ return;
+
+ radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, query->Base.Id, query->bo, query->curr_offset);
+
+ radeon_cs_space_check_with_bo(radeon->cmdbuf.cs,
+ query->bo,
+ 0, RADEON_GEM_DOMAIN_GTT);
+
+ radeon->vtbl.emit_query_finish(radeon);
+}
+
+static void radeonEndQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+ radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+ if (radeon->dma.flush)
+ radeon->dma.flush(radeon->glCtx);
+ radeonEmitQueryEnd(ctx);
+
+ radeon->query.current = NULL;
+}
+
+static void radeonCheckQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+#ifdef DRM_RADEON_GEM_BUSY
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+ if (radeon->radeonScreen->kernel_mm) {
+ struct radeon_query_object *query = (struct radeon_query_object *)q;
+ uint32_t domain;
+
+ /* Need to perform a flush, as per ARB_occlusion_query spec */
+ if (!radeonQueryIsFlushed(ctx, q)) {
+ ctx->Driver.Flush(ctx);
+ }
+
+ if (radeon_bo_is_busy(query->bo, &domain) == 0) {
+ radeonQueryGetResult(ctx, q);
+ query->Base.Ready = GL_TRUE;
+ }
+ } else {
+ radeonWaitQuery(ctx, q);
+ }
+#else
+ radeonWaitQuery(ctx, q);
+#endif
+}
+
+void radeonInitQueryObjFunctions(struct dd_function_table *functions)
+{
+ functions->NewQueryObject = radeonNewQueryObject;
+ functions->DeleteQuery = radeonDeleteQuery;
+ functions->BeginQuery = radeonBeginQuery;
+ functions->EndQuery = radeonEndQuery;
+ functions->CheckQuery = radeonCheckQuery;
+ functions->WaitQuery = radeonWaitQuery;
+}
+
+int radeon_check_query_active(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ struct radeon_query_object *query = radeon->query.current;
+
+ if (!query || query->emitted_begin)
+ return 0;
+ return atom->cmd_size;
+}
+
+void radeon_emit_queryobj(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ BATCH_LOCALS(radeon);
+ int dwords;
+
+ dwords = (*atom->check) (ctx, atom);
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(atom->cmd, dwords);
+ END_BATCH();
+
+ radeon->query.current->emitted_begin = GL_TRUE;
+}
diff --git a/radeon/radeon_queryobj.h b/radeon/radeon_queryobj.h
new file mode 100644
index 0000000..19374dc
--- /dev/null
+++ b/radeon/radeon_queryobj.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright © 2008 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Maciej Cencora <m.cencora@gmail.com>
+ *
+ */
+
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include "radeon_common_context.h"
+
+extern void radeonEmitQueryBegin(GLcontext *ctx);
+extern void radeonEmitQueryEnd(GLcontext *ctx);
+
+extern void radeonInitQueryObjFunctions(struct dd_function_table *functions);
+
+#define RADEON_QUERY_PAGE_SIZE 4096
+
+int radeon_check_query_active(GLcontext *ctx, struct radeon_state_atom *atom);
+void radeon_emit_queryobj(GLcontext *ctx, struct radeon_state_atom *atom);
+
+static inline void radeon_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+ radeon->query.queryobj.cmd_size = (SZ);
+ radeon->query.queryobj.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t));
+ radeon->query.queryobj.name = "queryobj";
+ radeon->query.queryobj.idx = 0;
+ radeon->query.queryobj.check = radeon_check_query_active;
+ radeon->query.queryobj.dirty = GL_FALSE;
+ radeon->query.queryobj.emit = radeon_emit_queryobj;
+
+ radeon->hw.max_state_size += (SZ);
+ insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}
+
diff --git a/radeon/radeon_sanity.c b/radeon/radeon_sanity.c
index 6613757..1ab570f 100644
--- a/radeon/radeon_sanity.c
+++ b/radeon/radeon_sanity.c
@@ -44,11 +44,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define MORE_VERBOSE 1
#if MORE_VERBOSE
-#define VERBOSE (RADEON_DEBUG & DEBUG_VERBOSE)
+#define VERBOSE (RADEON_DEBUG & RADEON_VERBOSE)
#define NORMAL (1)
#else
#define VERBOSE 0
-#define NORMAL (RADEON_DEBUG & DEBUG_VERBOSE)
+#define NORMAL (RADEON_DEBUG & RADEON_VERBOSE)
#endif
@@ -973,7 +973,7 @@ static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf )
}
-int radeonSanityCmdBuffer( radeonContextPtr rmesa,
+int radeonSanityCmdBuffer( r100ContextPtr rmesa,
int nbox,
drm_clip_rect_t *boxes )
{
diff --git a/radeon/radeon_sanity.h b/radeon/radeon_sanity.h
index 1ec06bc..f30eb1c 100644
--- a/radeon/radeon_sanity.h
+++ b/radeon/radeon_sanity.h
@@ -1,7 +1,7 @@
#ifndef RADEON_SANITY_H
#define RADEON_SANITY_H
-extern int radeonSanityCmdBuffer( radeonContextPtr rmesa,
+extern int radeonSanityCmdBuffer( r100ContextPtr rmesa,
int nbox,
drm_clip_rect_t *boxes );
diff --git a/radeon/radeon_screen.c b/radeon/radeon_screen.c
index 791f598..5ffb55d 100644
--- a/radeon/radeon_screen.c
+++ b/radeon/radeon_screen.c
@@ -35,6 +35,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* \author Gareth Hughes <gareth@valinux.com>
*/
+#include <errno.h>
#include "main/glheader.h"
#include "main/imports.h"
#include "main/mtypes.h"
@@ -45,32 +46,42 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_chipset.h"
#include "radeon_macros.h"
#include "radeon_screen.h"
+#include "radeon_common.h"
+#include "radeon_span.h"
#if !RADEON_COMMON
#include "radeon_context.h"
-#include "radeon_span.h"
#include "radeon_tex.h"
#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
#include "r200_context.h"
#include "r200_ioctl.h"
-#include "r200_span.h"
#include "r200_tex.h"
#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
#include "r300_context.h"
-#include "r300_fragprog.h"
#include "r300_tex.h"
-#include "radeon_span.h"
+#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#include "r600_context.h"
+#include "r700_driconf.h" /* +r6/r7 */
+#include "r600_tex.h" /* +r6/r7 */
#endif
#include "utils.h"
#include "vblank.h"
#include "drirenderbuffer.h"
+#include "radeon_bocs_wrapper.h"
+
#include "GL/internal/dri_interface.h"
/* Radeon configuration
*/
#include "xmlpool.h"
+#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
+ DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
+ DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
+DRI_CONF_OPT_END
+
#if !RADEON_COMMON /* R100 */
PUBLIC const char __driConfigOptions[] =
DRI_CONF_BEGIN
@@ -80,6 +91,7 @@ DRI_CONF_BEGIN
DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
DRI_CONF_MAX_TEXTURE_UNITS(3,2,3)
DRI_CONF_HYPERZ(false)
+ DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_QUALITY
DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
@@ -95,7 +107,7 @@ DRI_CONF_BEGIN
DRI_CONF_NO_RAST(false)
DRI_CONF_SECTION_END
DRI_CONF_END;
-static const GLuint __driNConfigOptions = 14;
+static const GLuint __driNConfigOptions = 15;
#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
@@ -107,6 +119,7 @@ DRI_CONF_BEGIN
DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
DRI_CONF_MAX_TEXTURE_UNITS(6,2,6)
DRI_CONF_HYPERZ(false)
+ DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_QUALITY
DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
@@ -126,7 +139,7 @@ DRI_CONF_BEGIN
DRI_CONF_NV_VERTEX_PROGRAM(false)
DRI_CONF_SECTION_END
DRI_CONF_END;
-static const GLuint __driNConfigOptions = 16;
+static const GLuint __driNConfigOptions = 17;
extern const struct dri_extension blend_extensions[];
extern const struct dri_extension ARB_vp_extension[];
@@ -134,7 +147,10 @@ extern const struct dri_extension NV_vp_extension[];
extern const struct dri_extension ATI_fs_extension[];
extern const struct dri_extension point_extensions[];
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#elif RADEON_COMMON && (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
+
+#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
+#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
/* TODO: integrate these into xmlpool.h! */
#define DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(def,min,max) \
@@ -149,11 +165,7 @@ DRI_CONF_OPT_BEGIN_V(texture_coord_units,int,def, # min ":" # max ) \
DRI_CONF_DESC(de,"Anzahl der Texturkoordinateneinheiten") \
DRI_CONF_OPT_END
-#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
-DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
- DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
- DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
-DRI_CONF_OPT_END
+
#define DRI_CONF_DISABLE_S3TC(def) \
DRI_CONF_OPT_BEGIN(disable_s3tc,bool,def) \
@@ -208,47 +220,42 @@ static const GLuint __driNConfigOptions = 17;
extern const struct dri_extension gl_20_extension[];
-#ifndef RADEON_DEBUG
-int RADEON_DEBUG = 0;
-
-static const struct dri_debug_control debug_control[] = {
- {"fall", DEBUG_FALLBACKS},
- {"tex", DEBUG_TEXTURE},
- {"ioctl", DEBUG_IOCTL},
- {"prim", DEBUG_PRIMS},
- {"vert", DEBUG_VERTS},
- {"state", DEBUG_STATE},
- {"code", DEBUG_CODEGEN},
- {"vfmt", DEBUG_VFMT},
- {"vtxf", DEBUG_VFMT},
- {"verb", DEBUG_VERBOSE},
- {"dri", DEBUG_DRI},
- {"dma", DEBUG_DMA},
- {"san", DEBUG_SANITY},
- {"sync", DEBUG_SYNC},
- {"pix", DEBUG_PIXEL},
- {"mem", DEBUG_MEMORY},
- {"allmsg", ~DEBUG_SYNC}, /* avoid the term "sync" because the parser uses strstr */
- {NULL, 0}
-};
-#endif /* RADEON_DEBUG */
-
#endif /* RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) */
extern const struct dri_extension card_extensions[];
+extern const struct dri_extension mm_extensions[];
static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo );
static int
-radeonGetParam(int fd, int param, void *value)
+radeonGetParam(__DRIscreenPrivate *sPriv, int param, void *value)
{
int ret;
- drm_radeon_getparam_t gp;
-
- gp.param = param;
- gp.value = value;
+ drm_radeon_getparam_t gp = { 0 };
+ struct drm_radeon_info info = { 0 };
+
+ if (sPriv->drm_version.major >= 2) {
+ info.value = (uint64_t)(uintptr_t)value;
+ switch (param) {
+ case RADEON_PARAM_DEVICE_ID:
+ info.request = RADEON_INFO_DEVICE_ID;
+ break;
+ case RADEON_PARAM_NUM_GB_PIPES:
+ info.request = RADEON_INFO_NUM_GB_PIPES;
+ break;
+ case RADEON_PARAM_NUM_Z_PIPES:
+ info.request = RADEON_INFO_NUM_Z_PIPES;
+ break;
+ default:
+ return -EINVAL;
+ }
+ ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info));
+ } else {
+ gp.param = param;
+ gp.value = value;
- ret = drmCommandWriteRead( fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
+ ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
+ }
return ret;
}
@@ -283,12 +290,12 @@ radeonFillInModes( __DRIscreenPrivate *psp,
* with a stencil buffer. It will be a sw fallback, but some apps won't
* care about that.
*/
- stencil_bits_array[0] = 0;
+ stencil_bits_array[0] = stencil_bits;
stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
msaa_samples_array[0] = 0;
- depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
+ depth_buffer_factor = (stencil_bits == 0) ? 2 : 1;
back_buffer_factor = (have_back_buffer) ? 2 : 1;
if (pixel_bits == 16) {
@@ -335,6 +342,12 @@ static const __DRItexOffsetExtension radeonTexOffsetExtension = {
{ __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
radeonSetTexOffset,
};
+
+static const __DRItexBufferExtension radeonTexBufferExtension = {
+ { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+ radeonSetTexBuffer,
+ radeonSetTexBuffer2,
+};
#endif
#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
@@ -349,6 +362,12 @@ static const __DRItexOffsetExtension r200texOffsetExtension = {
{ __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
r200SetTexOffset,
};
+
+static const __DRItexBufferExtension r200TexBufferExtension = {
+ { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+ r200SetTexBuffer,
+ r200SetTexBuffer2,
+};
#endif
#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
@@ -356,137 +375,32 @@ static const __DRItexOffsetExtension r300texOffsetExtension = {
{ __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
r300SetTexOffset,
};
-#endif
-/* Create the device specific screen private data struct.
- */
-static radeonScreenPtr
-radeonCreateScreen( __DRIscreenPrivate *sPriv )
-{
- radeonScreenPtr screen;
- RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
- unsigned char *RADEONMMIO;
- int i;
- int ret;
- uint32_t temp = 0;
-
- if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
- fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n");
- return GL_FALSE;
- }
-
- /* Allocate the private area */
- screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
- if ( !screen ) {
- __driUtilMessage("%s: Could not allocate memory for screen structure",
- __FUNCTION__);
- return NULL;
- }
-
-#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
- RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
+static const __DRItexBufferExtension r300TexBufferExtension = {
+ { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+ r300SetTexBuffer,
+ r300SetTexBuffer2,
+};
#endif
- /* parse information in __driConfigOptions */
- driParseOptionInfo (&screen->optionCache,
- __driConfigOptions, __driNConfigOptions);
-
- /* This is first since which regions we map depends on whether or
- * not we are using a PCI card.
- */
- screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
- {
- int ret;
- ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
- &screen->gart_buffer_offset);
-
- if (ret) {
- FREE( screen );
- fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
- return NULL;
- }
-
- ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE,
- &screen->gart_base);
- if (ret) {
- FREE( screen );
- fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
- return NULL;
- }
-
- ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
- &screen->irq);
- if (ret) {
- FREE( screen );
- fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
- return NULL;
- }
- screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
- screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
- screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
- screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
- screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
- screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
- screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
- }
-
- screen->mmio.handle = dri_priv->registerHandle;
- screen->mmio.size = dri_priv->registerSize;
- if ( drmMap( sPriv->fd,
- screen->mmio.handle,
- screen->mmio.size,
- &screen->mmio.map ) ) {
- FREE( screen );
- __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
- return NULL;
- }
-
- RADEONMMIO = screen->mmio.map;
-
- screen->status.handle = dri_priv->statusHandle;
- screen->status.size = dri_priv->statusSize;
- if ( drmMap( sPriv->fd,
- screen->status.handle,
- screen->status.size,
- &screen->status.map ) ) {
- drmUnmap( screen->mmio.map, screen->mmio.size );
- FREE( screen );
- __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
- return NULL;
- }
- screen->scratch = (__volatile__ uint32_t *)
- ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
-
- screen->buffers = drmMapBufs( sPriv->fd );
- if ( !screen->buffers ) {
- drmUnmap( screen->status.map, screen->status.size );
- drmUnmap( screen->mmio.map, screen->mmio.size );
- FREE( screen );
- __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
- return NULL;
- }
-
- if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
- screen->gartTextures.handle = dri_priv->gartTexHandle;
- screen->gartTextures.size = dri_priv->gartTexMapSize;
- if ( drmMap( sPriv->fd,
- screen->gartTextures.handle,
- screen->gartTextures.size,
- (drmAddressPtr)&screen->gartTextures.map ) ) {
- drmUnmapBufs( screen->buffers );
- drmUnmap( screen->status.map, screen->status.size );
- drmUnmap( screen->mmio.map, screen->mmio.size );
- FREE( screen );
- __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
- return NULL;
- }
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+static const __DRItexOffsetExtension r600texOffsetExtension = {
+ { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+ r600SetTexOffset, /* +r6/r7 */
+};
- screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
- }
+static const __DRItexBufferExtension r600TexBufferExtension = {
+ { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+ r600SetTexBuffer, /* +r6/r7 */
+ r600SetTexBuffer2, /* +r6/r7 */
+};
+#endif
+static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
+{
+ screen->device_id = device_id;
screen->chip_flags = 0;
- /* XXX: add more chipsets */
- switch ( dri_priv->deviceID ) {
+ switch ( device_id ) {
case PCI_CHIP_RADEON_LY:
case PCI_CHIP_RADEON_LZ:
case PCI_CHIP_RADEON_QY:
@@ -561,7 +475,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
screen->chip_family = CHIP_FAMILY_RS300;
break;
-
case PCI_CHIP_R300_AD:
case PCI_CHIP_R300_AE:
case PCI_CHIP_R300_AF:
@@ -819,11 +732,325 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
+ case PCI_CHIP_R600_9400:
+ case PCI_CHIP_R600_9401:
+ case PCI_CHIP_R600_9402:
+ case PCI_CHIP_R600_9403:
+ case PCI_CHIP_R600_9405:
+ case PCI_CHIP_R600_940A:
+ case PCI_CHIP_R600_940B:
+ case PCI_CHIP_R600_940F:
+ screen->chip_family = CHIP_FAMILY_R600;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV610_94C0:
+ case PCI_CHIP_RV610_94C1:
+ case PCI_CHIP_RV610_94C3:
+ case PCI_CHIP_RV610_94C4:
+ case PCI_CHIP_RV610_94C5:
+ case PCI_CHIP_RV610_94C6:
+ case PCI_CHIP_RV610_94C7:
+ case PCI_CHIP_RV610_94C8:
+ case PCI_CHIP_RV610_94C9:
+ case PCI_CHIP_RV610_94CB:
+ case PCI_CHIP_RV610_94CC:
+ case PCI_CHIP_RV610_94CD:
+ screen->chip_family = CHIP_FAMILY_RV610;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV630_9580:
+ case PCI_CHIP_RV630_9581:
+ case PCI_CHIP_RV630_9583:
+ case PCI_CHIP_RV630_9586:
+ case PCI_CHIP_RV630_9587:
+ case PCI_CHIP_RV630_9588:
+ case PCI_CHIP_RV630_9589:
+ case PCI_CHIP_RV630_958A:
+ case PCI_CHIP_RV630_958B:
+ case PCI_CHIP_RV630_958C:
+ case PCI_CHIP_RV630_958D:
+ case PCI_CHIP_RV630_958E:
+ case PCI_CHIP_RV630_958F:
+ screen->chip_family = CHIP_FAMILY_RV630;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV670_9500:
+ case PCI_CHIP_RV670_9501:
+ case PCI_CHIP_RV670_9504:
+ case PCI_CHIP_RV670_9505:
+ case PCI_CHIP_RV670_9506:
+ case PCI_CHIP_RV670_9507:
+ case PCI_CHIP_RV670_9508:
+ case PCI_CHIP_RV670_9509:
+ case PCI_CHIP_RV670_950F:
+ case PCI_CHIP_RV670_9511:
+ case PCI_CHIP_RV670_9515:
+ case PCI_CHIP_RV670_9517:
+ case PCI_CHIP_RV670_9519:
+ screen->chip_family = CHIP_FAMILY_RV670;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV620_95C0:
+ case PCI_CHIP_RV620_95C2:
+ case PCI_CHIP_RV620_95C4:
+ case PCI_CHIP_RV620_95C5:
+ case PCI_CHIP_RV620_95C6:
+ case PCI_CHIP_RV620_95C7:
+ case PCI_CHIP_RV620_95C9:
+ case PCI_CHIP_RV620_95CC:
+ case PCI_CHIP_RV620_95CD:
+ case PCI_CHIP_RV620_95CE:
+ case PCI_CHIP_RV620_95CF:
+ screen->chip_family = CHIP_FAMILY_RV620;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV635_9590:
+ case PCI_CHIP_RV635_9591:
+ case PCI_CHIP_RV635_9593:
+ case PCI_CHIP_RV635_9595:
+ case PCI_CHIP_RV635_9596:
+ case PCI_CHIP_RV635_9597:
+ case PCI_CHIP_RV635_9598:
+ case PCI_CHIP_RV635_9599:
+ case PCI_CHIP_RV635_959B:
+ screen->chip_family = CHIP_FAMILY_RV635;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RS780_9610:
+ case PCI_CHIP_RS780_9611:
+ case PCI_CHIP_RS780_9612:
+ case PCI_CHIP_RS780_9613:
+ case PCI_CHIP_RS780_9614:
+ case PCI_CHIP_RS780_9615:
+ case PCI_CHIP_RS780_9616:
+ screen->chip_family = CHIP_FAMILY_RS780;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+ case PCI_CHIP_RS880_9710:
+ case PCI_CHIP_RS880_9711:
+ case PCI_CHIP_RS880_9712:
+ case PCI_CHIP_RS880_9713:
+ case PCI_CHIP_RS880_9714:
+ screen->chip_family = CHIP_FAMILY_RS880;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV770_9440:
+ case PCI_CHIP_RV770_9441:
+ case PCI_CHIP_RV770_9442:
+ case PCI_CHIP_RV770_9443:
+ case PCI_CHIP_RV770_9444:
+ case PCI_CHIP_RV770_9446:
+ case PCI_CHIP_RV770_944A:
+ case PCI_CHIP_RV770_944B:
+ case PCI_CHIP_RV770_944C:
+ case PCI_CHIP_RV770_944E:
+ case PCI_CHIP_RV770_9450:
+ case PCI_CHIP_RV770_9452:
+ case PCI_CHIP_RV770_9456:
+ case PCI_CHIP_RV770_945A:
+ case PCI_CHIP_RV770_945B:
+ case PCI_CHIP_RV790_9460:
+ case PCI_CHIP_RV790_9462:
+ case PCI_CHIP_RV770_946A:
+ case PCI_CHIP_RV770_946B:
+ case PCI_CHIP_RV770_947A:
+ case PCI_CHIP_RV770_947B:
+ screen->chip_family = CHIP_FAMILY_RV770;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV730_9480:
+ case PCI_CHIP_RV730_9487:
+ case PCI_CHIP_RV730_9488:
+ case PCI_CHIP_RV730_9489:
+ case PCI_CHIP_RV730_948F:
+ case PCI_CHIP_RV730_9490:
+ case PCI_CHIP_RV730_9491:
+ case PCI_CHIP_RV730_9495:
+ case PCI_CHIP_RV730_9498:
+ case PCI_CHIP_RV730_949C:
+ case PCI_CHIP_RV730_949E:
+ case PCI_CHIP_RV730_949F:
+ screen->chip_family = CHIP_FAMILY_RV730;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV710_9540:
+ case PCI_CHIP_RV710_9541:
+ case PCI_CHIP_RV710_9542:
+ case PCI_CHIP_RV710_954E:
+ case PCI_CHIP_RV710_954F:
+ case PCI_CHIP_RV710_9552:
+ case PCI_CHIP_RV710_9553:
+ case PCI_CHIP_RV710_9555:
+ case PCI_CHIP_RV710_9557:
+ screen->chip_family = CHIP_FAMILY_RV710;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV740_94A0:
+ case PCI_CHIP_RV740_94A1:
+ case PCI_CHIP_RV740_94A3:
+ case PCI_CHIP_RV740_94B1:
+ case PCI_CHIP_RV740_94B3:
+ case PCI_CHIP_RV740_94B4:
+ case PCI_CHIP_RV740_94B5:
+ case PCI_CHIP_RV740_94B9:
+ screen->chip_family = CHIP_FAMILY_RV740;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
default:
fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
- dri_priv->deviceID);
+ device_id);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/* Create the device specific screen private data struct.
+ */
+static radeonScreenPtr
+radeonCreateScreen( __DRIscreenPrivate *sPriv )
+{
+ radeonScreenPtr screen;
+ RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
+ unsigned char *RADEONMMIO = NULL;
+ int i;
+ int ret;
+ uint32_t temp = 0;
+
+ if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
+ fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n");
+ return GL_FALSE;
+ }
+
+ /* Allocate the private area */
+ screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
+ if ( !screen ) {
+ __driUtilMessage("%s: Could not allocate memory for screen structure",
+ __FUNCTION__);
return NULL;
}
+
+ radeon_init_debug();
+
+ /* parse information in __driConfigOptions */
+ driParseOptionInfo (&screen->optionCache,
+ __driConfigOptions, __driNConfigOptions);
+
+ /* This is first since which regions we map depends on whether or
+ * not we are using a PCI card.
+ */
+ screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
+ {
+ int ret;
+
+ ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BUFFER_OFFSET,
+ &screen->gart_buffer_offset);
+
+ if (ret) {
+ FREE( screen );
+ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
+ return NULL;
+ }
+
+ ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BASE,
+ &screen->gart_base);
+ if (ret) {
+ FREE( screen );
+ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
+ return NULL;
+ }
+
+ ret = radeonGetParam(sPriv, RADEON_PARAM_IRQ_NR,
+ &screen->irq);
+ if (ret) {
+ FREE( screen );
+ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
+ return NULL;
+ }
+ screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
+ screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
+ screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
+ screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
+ screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
+ screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
+ screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
+ screen->drmSupportsOcclusionQueries = (sPriv->drm_version.minor >= 30);
+ }
+
+ ret = radeon_set_screen_flags(screen, dri_priv->deviceID);
+ if (ret == -1)
+ return NULL;
+
+ screen->mmio.handle = dri_priv->registerHandle;
+ screen->mmio.size = dri_priv->registerSize;
+ if ( drmMap( sPriv->fd,
+ screen->mmio.handle,
+ screen->mmio.size,
+ &screen->mmio.map ) ) {
+ FREE( screen );
+ __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
+ return NULL;
+ }
+
+ RADEONMMIO = screen->mmio.map;
+
+ screen->status.handle = dri_priv->statusHandle;
+ screen->status.size = dri_priv->statusSize;
+ if ( drmMap( sPriv->fd,
+ screen->status.handle,
+ screen->status.size,
+ &screen->status.map ) ) {
+ drmUnmap( screen->mmio.map, screen->mmio.size );
+ FREE( screen );
+ __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
+ return NULL;
+ }
+ if (screen->chip_family < CHIP_FAMILY_R600)
+ screen->scratch = (__volatile__ uint32_t *)
+ ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
+ else
+ screen->scratch = (__volatile__ uint32_t *)
+ ((GLubyte *)screen->status.map + R600_SCRATCH_REG_OFFSET);
+
+ screen->buffers = drmMapBufs( sPriv->fd );
+ if ( !screen->buffers ) {
+ drmUnmap( screen->status.map, screen->status.size );
+ drmUnmap( screen->mmio.map, screen->mmio.size );
+ FREE( screen );
+ __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
+ return NULL;
+ }
+
+ if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
+ screen->gartTextures.handle = dri_priv->gartTexHandle;
+ screen->gartTextures.size = dri_priv->gartTexMapSize;
+ if ( drmMap( sPriv->fd,
+ screen->gartTextures.handle,
+ screen->gartTextures.size,
+ (drmAddressPtr)&screen->gartTextures.map ) ) {
+ drmUnmapBufs( screen->buffers );
+ drmUnmap( screen->status.map, screen->status.size );
+ drmUnmap( screen->mmio.map, screen->mmio.size );
+ FREE( screen );
+ __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
+ return NULL;
+ }
+
+ screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
+ }
+
if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) &&
sPriv->ddx_version.minor < 2) {
fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n");
@@ -836,35 +1063,57 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
}
if (getenv("R300_NO_TCL"))
- screen->chip_flags &= ~RADEON_CHIPSET_TCL;
+ screen->chip_flags &= ~RADEON_CHIPSET_TCL;
if (screen->chip_family <= CHIP_FAMILY_RS200)
- screen->chip_flags |= RADEON_CLASS_R100;
+ screen->chip_flags |= RADEON_CLASS_R100;
else if (screen->chip_family <= CHIP_FAMILY_RV280)
- screen->chip_flags |= RADEON_CLASS_R200;
+ screen->chip_flags |= RADEON_CLASS_R200;
+ else if (screen->chip_family <= CHIP_FAMILY_RV570)
+ screen->chip_flags |= RADEON_CLASS_R300;
else
- screen->chip_flags |= RADEON_CLASS_R300;
+ screen->chip_flags |= RADEON_CLASS_R600;
screen->cpp = dri_priv->bpp / 8;
screen->AGPMode = dri_priv->AGPMode;
- ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
- &temp);
- if (ret) {
- if (screen->chip_family < CHIP_FAMILY_RS600)
- screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
- else {
- FREE( screen );
- fprintf(stderr, "Unable to get fb location need newer drm\n");
- return NULL;
+ ret = radeonGetParam(sPriv, RADEON_PARAM_FB_LOCATION, &temp);
+
+ /* +r6/r7 */
+ if(screen->chip_family >= CHIP_FAMILY_R600)
+ {
+ if (ret)
+ {
+ FREE( screen );
+ fprintf(stderr, "Unable to get fb location need newer drm\n");
+ return NULL;
+ }
+ else
+ {
+ screen->fbLocation = (temp & 0xffff) << 24;
}
- } else {
- screen->fbLocation = (temp & 0xffff) << 16;
+ }
+ else
+ {
+ if (ret)
+ {
+ if (screen->chip_family < CHIP_FAMILY_RS600 && !screen->kernel_mm)
+ screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
+ else
+ {
+ FREE( screen );
+ fprintf(stderr, "Unable to get fb location need newer drm\n");
+ return NULL;
+ }
+ }
+ else
+ {
+ screen->fbLocation = (temp & 0xffff) << 16;
+ }
}
- if (screen->chip_family >= CHIP_FAMILY_R300) {
- ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES,
- &temp);
+ if (IS_R300_CLASS(screen)) {
+ ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
if (ret) {
fprintf(stderr, "Unable to get num_pipes, need newer drm\n");
switch (screen->chip_family) {
@@ -902,6 +1151,14 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
break;
}
+ if ( sPriv->drm_version.minor >= 31 ) {
+ ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_Z_PIPES, &temp);
+ if (ret)
+ screen->num_z_pipes = 2;
+ else
+ screen->num_z_pipes = temp;
+ } else
+ screen->num_z_pipes = 2;
}
if ( sPriv->drm_version.minor >= 10 ) {
@@ -971,7 +1228,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
if (IS_R200_CLASS(screen))
- screen->extensions[i++] = &r200AllocateExtension.base;
+ screen->extensions[i++] = &r200AllocateExtension.base;
screen->extensions[i++] = &r200texOffsetExtension.base;
#endif
@@ -980,11 +1237,173 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
screen->extensions[i++] = &r300texOffsetExtension.base;
#endif
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+ screen->extensions[i++] = &r600texOffsetExtension.base;
+#endif
+
screen->extensions[i++] = NULL;
sPriv->extensions = screen->extensions;
screen->driScreen = sPriv;
screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
+ screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
+ screen->sarea_priv_offset);
+
+ screen->bom = radeon_bo_manager_legacy_ctor(screen);
+ if (screen->bom == NULL) {
+ free(screen);
+ return NULL;
+ }
+
+ return screen;
+}
+
+static radeonScreenPtr
+radeonCreateScreen2(__DRIscreenPrivate *sPriv)
+{
+ radeonScreenPtr screen;
+ int i;
+ int ret;
+ uint32_t device_id = 0;
+ uint32_t temp = 0;
+
+ /* Allocate the private area */
+ screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
+ if ( !screen ) {
+ __driUtilMessage("%s: Could not allocate memory for screen structure",
+ __FUNCTION__);
+ fprintf(stderr, "leaving here\n");
+ return NULL;
+ }
+
+ radeon_init_debug();
+
+ /* parse information in __driConfigOptions */
+ driParseOptionInfo (&screen->optionCache,
+ __driConfigOptions, __driNConfigOptions);
+
+ screen->kernel_mm = 1;
+ screen->chip_flags = 0;
+
+ /* if we have kms we can support all of these */
+ screen->drmSupportsCubeMapsR200 = 1;
+ screen->drmSupportsBlendColor = 1;
+ screen->drmSupportsTriPerf = 1;
+ screen->drmSupportsFragShader = 1;
+ screen->drmSupportsPointSprites = 1;
+ screen->drmSupportsCubeMapsR100 = 1;
+ screen->drmSupportsVertexProgram = 1;
+ screen->drmSupportsOcclusionQueries = 1;
+ screen->irq = 1;
+
+ ret = radeonGetParam(sPriv, RADEON_PARAM_DEVICE_ID, &device_id);
+ if (ret) {
+ FREE( screen );
+ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_DEVICE_ID): %d\n", ret);
+ return NULL;
+ }
+
+ ret = radeon_set_screen_flags(screen, device_id);
+ if (ret == -1)
+ return NULL;
+
+ if (getenv("R300_NO_TCL"))
+ screen->chip_flags &= ~RADEON_CHIPSET_TCL;
+
+ if (screen->chip_family <= CHIP_FAMILY_RS200)
+ screen->chip_flags |= RADEON_CLASS_R100;
+ else if (screen->chip_family <= CHIP_FAMILY_RV280)
+ screen->chip_flags |= RADEON_CLASS_R200;
+ else if (screen->chip_family <= CHIP_FAMILY_RV570)
+ screen->chip_flags |= RADEON_CLASS_R300;
+ else
+ screen->chip_flags |= RADEON_CLASS_R600;
+
+ if (IS_R300_CLASS(screen)) {
+ ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
+ if (ret) {
+ fprintf(stderr, "Unable to get num_pipes, need newer drm\n");
+ switch (screen->chip_family) {
+ case CHIP_FAMILY_R300:
+ case CHIP_FAMILY_R350:
+ screen->num_gb_pipes = 2;
+ break;
+ case CHIP_FAMILY_R420:
+ case CHIP_FAMILY_R520:
+ case CHIP_FAMILY_R580:
+ case CHIP_FAMILY_RV560:
+ case CHIP_FAMILY_RV570:
+ screen->num_gb_pipes = 4;
+ break;
+ case CHIP_FAMILY_RV350:
+ case CHIP_FAMILY_RV515:
+ case CHIP_FAMILY_RV530:
+ case CHIP_FAMILY_RV410:
+ default:
+ screen->num_gb_pipes = 1;
+ break;
+ }
+ } else {
+ screen->num_gb_pipes = temp;
+ }
+
+ /* pipe overrides */
+ switch (device_id) {
+ case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */
+ case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */
+ case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */
+ screen->num_gb_pipes = 1;
+ break;
+ default:
+ break;
+ }
+
+ ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_Z_PIPES, &temp);
+ if (ret)
+ screen->num_z_pipes = 2;
+ else
+ screen->num_z_pipes = temp;
+
+ }
+
+ i = 0;
+ screen->extensions[i++] = &driCopySubBufferExtension.base;
+ screen->extensions[i++] = &driFrameTrackingExtension.base;
+ screen->extensions[i++] = &driReadDrawableExtension;
+
+ if ( screen->irq != 0 ) {
+ screen->extensions[i++] = &driSwapControlExtension.base;
+ screen->extensions[i++] = &driMediaStreamCounterExtension.base;
+ }
+
+#if !RADEON_COMMON
+ screen->extensions[i++] = &radeonTexBufferExtension.base;
+#endif
+
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+ if (IS_R200_CLASS(screen))
+ screen->extensions[i++] = &r200AllocateExtension.base;
+
+ screen->extensions[i++] = &r200TexBufferExtension.base;
+#endif
+
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+ screen->extensions[i++] = &r300TexBufferExtension.base;
+#endif
+
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+ screen->extensions[i++] = &r600TexBufferExtension.base;
+#endif
+
+ screen->extensions[i++] = NULL;
+ sPriv->extensions = screen->extensions;
+
+ screen->driScreen = sPriv;
+ screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
+ if (screen->bom == NULL) {
+ free(screen);
+ return NULL;
+ }
return screen;
}
@@ -993,23 +1412,32 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
static void
radeonDestroyScreen( __DRIscreenPrivate *sPriv )
{
- radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
+ radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
- if (!screen)
- return;
+ if (!screen)
+ return;
- if ( screen->gartTextures.map ) {
- drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
- }
- drmUnmapBufs( screen->buffers );
- drmUnmap( screen->status.map, screen->status.size );
- drmUnmap( screen->mmio.map, screen->mmio.size );
+ if (screen->kernel_mm) {
+#ifdef RADEON_BO_TRACK
+ radeon_tracker_print(&screen->bom->tracker, stderr);
+#endif
+ radeon_bo_manager_gem_dtor(screen->bom);
+ } else {
+ radeon_bo_manager_legacy_dtor(screen->bom);
+
+ if ( screen->gartTextures.map ) {
+ drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
+ }
+ drmUnmapBufs( screen->buffers );
+ drmUnmap( screen->status.map, screen->status.size );
+ drmUnmap( screen->mmio.map, screen->mmio.size );
+ }
- /* free all option information */
- driDestroyOptionInfo (&screen->optionCache);
+ /* free all option information */
+ driDestroyOptionInfo (&screen->optionCache);
- FREE( screen );
- sPriv->private = NULL;
+ FREE( screen );
+ sPriv->private = NULL;
}
@@ -1018,16 +1446,21 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
static GLboolean
radeonInitDriver( __DRIscreenPrivate *sPriv )
{
- sPriv->private = (void *) radeonCreateScreen( sPriv );
- if ( !sPriv->private ) {
- radeonDestroyScreen( sPriv );
- return GL_FALSE;
- }
+ if (sPriv->dri2.enabled) {
+ sPriv->private = (void *) radeonCreateScreen2( sPriv );
+ } else {
+ sPriv->private = (void *) radeonCreateScreen( sPriv );
+ }
+ if ( !sPriv->private ) {
+ radeonDestroyScreen( sPriv );
+ return GL_FALSE;
+ }
- return GL_TRUE;
+ return GL_TRUE;
}
+
/**
* Create the Mesa framebuffer and renderbuffers for a given window/drawable.
*
@@ -1040,132 +1473,112 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
const __GLcontextModes *mesaVis,
GLboolean isPixmap )
{
- radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
+ radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
- if (isPixmap) {
+ const GLboolean swDepth = GL_FALSE;
+ const GLboolean swAlpha = GL_FALSE;
+ const GLboolean swAccum = mesaVis->accumRedBits > 0;
+ const GLboolean swStencil = mesaVis->stencilBits > 0 &&
+ mesaVis->depthBits != 24;
+ GLenum rgbFormat;
+ struct radeon_framebuffer *rfb;
+
+ if (isPixmap)
return GL_FALSE; /* not implemented */
- }
- else {
- const GLboolean swDepth = GL_FALSE;
- const GLboolean swAlpha = GL_FALSE;
- const GLboolean swAccum = mesaVis->accumRedBits > 0;
- const GLboolean swStencil = mesaVis->stencilBits > 0 &&
- mesaVis->depthBits != 24;
- struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
-
- /* front color renderbuffer */
- {
- driRenderbuffer *frontRb
- = driNewRenderbuffer(GL_RGBA,
- driScrnPriv->pFB + screen->frontOffset,
- screen->cpp,
- screen->frontOffset, screen->frontPitch,
- driDrawPriv);
- radeonSetSpanFunctions(frontRb, mesaVis);
- _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
- }
- /* back color renderbuffer */
- if (mesaVis->doubleBufferMode) {
- driRenderbuffer *backRb
- = driNewRenderbuffer(GL_RGBA,
- driScrnPriv->pFB + screen->backOffset,
- screen->cpp,
- screen->backOffset, screen->backPitch,
- driDrawPriv);
- radeonSetSpanFunctions(backRb, mesaVis);
- _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
- }
+ rfb = CALLOC_STRUCT(radeon_framebuffer);
+ if (!rfb)
+ return GL_FALSE;
- /* depth renderbuffer */
- if (mesaVis->depthBits == 16) {
- driRenderbuffer *depthRb
- = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
- driScrnPriv->pFB + screen->depthOffset,
- screen->cpp,
- screen->depthOffset, screen->depthPitch,
- driDrawPriv);
- radeonSetSpanFunctions(depthRb, mesaVis);
- _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
- depthRb->depthHasSurface = screen->depthHasSurface;
- }
- else if (mesaVis->depthBits == 24) {
- driRenderbuffer *depthRb
- = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
- driScrnPriv->pFB + screen->depthOffset,
- screen->cpp,
- screen->depthOffset, screen->depthPitch,
- driDrawPriv);
- radeonSetSpanFunctions(depthRb, mesaVis);
- _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
- depthRb->depthHasSurface = screen->depthHasSurface;
- }
+ _mesa_initialize_framebuffer(&rfb->base, mesaVis);
+
+ if (mesaVis->redBits == 5)
+ rgbFormat = GL_RGB5;
+ else if (mesaVis->alphaBits == 0)
+ rgbFormat = GL_RGB8;
+ else
+ rgbFormat = GL_RGBA8;
+
+ /* front color renderbuffer */
+ rfb->color_rb[0] = radeon_create_renderbuffer(rgbFormat, driDrawPriv);
+ _mesa_add_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT, &rfb->color_rb[0]->base);
+ rfb->color_rb[0]->has_surface = 1;
+
+ /* back color renderbuffer */
+ if (mesaVis->doubleBufferMode) {
+ rfb->color_rb[1] = radeon_create_renderbuffer(rgbFormat, driDrawPriv);
+ _mesa_add_renderbuffer(&rfb->base, BUFFER_BACK_LEFT, &rfb->color_rb[1]->base);
+ rfb->color_rb[1]->has_surface = 1;
+ }
- /* stencil renderbuffer */
- if (mesaVis->stencilBits > 0 && !swStencil) {
- driRenderbuffer *stencilRb
- = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
- driScrnPriv->pFB + screen->depthOffset,
- screen->cpp,
- screen->depthOffset, screen->depthPitch,
- driDrawPriv);
- radeonSetSpanFunctions(stencilRb, mesaVis);
- _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
- stencilRb->depthHasSurface = screen->depthHasSurface;
+ if (mesaVis->depthBits == 24) {
+ if (mesaVis->stencilBits == 8) {
+ struct radeon_renderbuffer *depthStencilRb = radeon_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT, driDrawPriv);
+ _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depthStencilRb->base);
+ _mesa_add_renderbuffer(&rfb->base, BUFFER_STENCIL, &depthStencilRb->base);
+ depthStencilRb->has_surface = screen->depthHasSurface;
+ } else {
+ /* depth renderbuffer */
+ struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT24, driDrawPriv);
+ _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base);
+ depth->has_surface = screen->depthHasSurface;
}
+ } else if (mesaVis->depthBits == 16) {
+ /* just 16-bit depth buffer, no hw stencil */
+ struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT16, driDrawPriv);
+ _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base);
+ depth->has_surface = screen->depthHasSurface;
+ }
- _mesa_add_soft_renderbuffers(fb,
- GL_FALSE, /* color */
- swDepth,
- swStencil,
- swAccum,
- swAlpha,
- GL_FALSE /* aux */);
- driDrawPriv->driverPrivate = (void *) fb;
+ _mesa_add_soft_renderbuffers(&rfb->base,
+ GL_FALSE, /* color */
+ swDepth,
+ swStencil,
+ swAccum,
+ swAlpha,
+ GL_FALSE /* aux */);
+ driDrawPriv->driverPrivate = (void *) rfb;
- return (driDrawPriv->driverPrivate != NULL);
- }
+ return (driDrawPriv->driverPrivate != NULL);
}
-static void
-radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
+static void radeon_cleanup_renderbuffers(struct radeon_framebuffer *rfb)
{
- _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
-}
+ struct radeon_renderbuffer *rb;
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-/**
- * Choose the appropriate CreateContext function based on the chipset.
- * Eventually, all drivers will go through this process.
- */
-static GLboolean radeonCreateContext(const __GLcontextModes * glVisual,
- __DRIcontextPrivate * driContextPriv,
- void *sharedContextPriv)
-{
- __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
- radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
-
- if (IS_R300_CLASS(screen))
- return r300CreateContext(glVisual, driContextPriv, sharedContextPriv);
- return GL_FALSE;
+ rb = rfb->color_rb[0];
+ if (rb && rb->bo) {
+ radeon_bo_unref(rb->bo);
+ rb->bo = NULL;
+ }
+ rb = rfb->color_rb[1];
+ if (rb && rb->bo) {
+ radeon_bo_unref(rb->bo);
+ rb->bo = NULL;
+ }
+ rb = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH);
+ if (rb && rb->bo) {
+ radeon_bo_unref(rb->bo);
+ rb->bo = NULL;
+ }
}
-/**
- * Choose the appropriate DestroyContext function based on the chipset.
- */
-static void radeonDestroyContext(__DRIcontextPrivate * driContextPriv)
+void
+radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
{
- radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
-
- if (IS_R300_CLASS(radeon->radeonScreen))
- return r300DestroyContext(driContextPriv);
+ struct radeon_framebuffer *rfb;
+ if (!driDrawPriv)
+ return;
+
+ rfb = (void*)driDrawPriv->driverPrivate;
+ if (!rfb)
+ return;
+ radeon_cleanup_renderbuffers(rfb);
+ _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
}
-#endif
-
-
/**
* This is the driver specific part of the createNewScreen entry point.
*
@@ -1191,6 +1604,11 @@ radeonInitScreen(__DRIscreenPrivate *psp)
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
static const __DRIversion dri_expected = { 4, 0, 0 };
static const __DRIversion drm_expected = { 1, 24, 0 };
+#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+ static const char *driver_name = "R600";
+ static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
+ static const __DRIversion dri_expected = { 4, 0, 0 };
+ static const __DRIversion drm_expected = { 1, 24, 0 };
#endif
RADEONDRIPtr dri_priv = (RADEONDRIPtr) psp->pDevPriv;
@@ -1218,20 +1636,112 @@ radeonInitScreen(__DRIscreenPrivate *psp)
driInitSingleExtension( NULL, NV_vp_extension );
driInitSingleExtension( NULL, ATI_fs_extension );
driInitExtensions( NULL, point_extensions, GL_FALSE );
-#elif defined(RADEON_COMMON_FOR_R300)
+#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
driInitSingleExtension( NULL, gl_20_extension );
#endif
if (!radeonInitDriver(psp))
return NULL;
+ /* for now fill in all modes */
return radeonFillInModes( psp,
dri_priv->bpp,
(dri_priv->bpp == 16) ? 16 : 24,
- (dri_priv->bpp == 16) ? 0 : 8,
- (dri_priv->backOffset != dri_priv->depthOffset) );
+ (dri_priv->bpp == 16) ? 0 : 8, 1);
}
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * Called when using DRI2.
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const
+__DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp)
+{
+ GLenum fb_format[3];
+ GLenum fb_type[3];
+ /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
+ * support pageflipping at all.
+ */
+ static const GLenum back_buffer_modes[] = {
+ GLX_NONE, GLX_SWAP_UNDEFINED_OML, /*, GLX_SWAP_COPY_OML*/
+ };
+ uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1];
+ int color;
+ __DRIconfig **configs = NULL;
+
+ /* Calling driInitExtensions here, with a NULL context pointer,
+ * does not actually enable the extensions. It just makes sure
+ * that all the dispatch offsets for all the extensions that
+ * *might* be enables are known. This is needed because the
+ * dispatch offsets need to be known when _mesa_context_create
+ * is called, but we can't enable the extensions until we have a
+ * context pointer.
+ *
+ * Hello chicken. Hello egg. How are you two today?
+ */
+ driInitExtensions( NULL, card_extensions, GL_FALSE );
+ driInitExtensions( NULL, mm_extensions, GL_FALSE );
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+ driInitExtensions( NULL, blend_extensions, GL_FALSE );
+ driInitSingleExtension( NULL, ARB_vp_extension );
+ driInitSingleExtension( NULL, NV_vp_extension );
+ driInitSingleExtension( NULL, ATI_fs_extension );
+ driInitExtensions( NULL, point_extensions, GL_FALSE );
+#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
+ driInitSingleExtension( NULL, gl_20_extension );
+#endif
+
+ if (!radeonInitDriver(psp)) {
+ return NULL;
+ }
+ depth_bits[0] = 0;
+ stencil_bits[0] = 0;
+ depth_bits[1] = 16;
+ stencil_bits[1] = 0;
+ depth_bits[2] = 24;
+ stencil_bits[2] = 0;
+ depth_bits[3] = 24;
+ stencil_bits[3] = 8;
+
+ msaa_samples_array[0] = 0;
+
+ fb_format[0] = GL_RGB;
+ fb_type[0] = GL_UNSIGNED_SHORT_5_6_5;
+
+ fb_format[1] = GL_BGR;
+ fb_type[1] = GL_UNSIGNED_INT_8_8_8_8_REV;
+
+ fb_format[2] = GL_BGRA;
+ fb_type[2] = GL_UNSIGNED_INT_8_8_8_8_REV;
+
+ for (color = 0; color < ARRAY_SIZE(fb_format); color++) {
+ __DRIconfig **new_configs;
+
+ new_configs = driCreateConfigs(fb_format[color], fb_type[color],
+ depth_bits,
+ stencil_bits,
+ ARRAY_SIZE(depth_bits),
+ back_buffer_modes,
+ ARRAY_SIZE(back_buffer_modes),
+ msaa_samples_array,
+ ARRAY_SIZE(msaa_samples_array));
+ if (configs == NULL)
+ configs = new_configs;
+ else
+ configs = driConcatConfigs(configs, new_configs);
+ }
+
+ if (configs == NULL) {
+ fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+ __LINE__);
+ return NULL;
+ }
+
+ return (const __DRIconfig **)configs;
+}
/**
* Get information about previous buffer swaps.
@@ -1239,36 +1749,42 @@ radeonInitScreen(__DRIscreenPrivate *psp)
static int
getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
{
-#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
- radeonContextPtr rmesa;
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
- r200ContextPtr rmesa;
-#endif
+ struct radeon_framebuffer *rfb;
- if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
- || (dPriv->driContextPriv->driverPrivate == NULL)
- || (sInfo == NULL) ) {
- return -1;
+ if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
+ || (dPriv->driContextPriv->driverPrivate == NULL)
+ || (sInfo == NULL) ) {
+ return -1;
}
- rmesa = dPriv->driContextPriv->driverPrivate;
- sInfo->swap_count = rmesa->swap_count;
- sInfo->swap_ust = rmesa->swap_ust;
- sInfo->swap_missed_count = rmesa->swap_missed_count;
+ rfb = dPriv->driverPrivate;
+ sInfo->swap_count = rfb->swap_count;
+ sInfo->swap_ust = rfb->swap_ust;
+ sInfo->swap_missed_count = rfb->swap_missed_count;
sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
- ? driCalculateSwapUsage( dPriv, 0, rmesa->swap_missed_ust )
+ ? driCalculateSwapUsage( dPriv, 0, rfb->swap_missed_ust )
: 0.0;
return 0;
}
-#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
const struct __DriverAPIRec driDriverAPI = {
.InitScreen = radeonInitScreen,
.DestroyScreen = radeonDestroyScreen,
- .CreateContext = radeonCreateContext,
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+ .CreateContext = r200CreateContext,
+ .DestroyContext = r200DestroyContext,
+#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+ .CreateContext = r600CreateContext,
+ .DestroyContext = radeonDestroyContext,
+#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+ .CreateContext = r300CreateContext,
.DestroyContext = radeonDestroyContext,
+#else
+ .CreateContext = r100CreateContext,
+ .DestroyContext = radeonDestroyContext,
+#endif
.CreateBuffer = radeonCreateBuffer,
.DestroyBuffer = radeonDestroyBuffer,
.SwapBuffers = radeonSwapBuffers,
@@ -1280,23 +1796,7 @@ const struct __DriverAPIRec driDriverAPI = {
.WaitForSBC = NULL,
.SwapBuffersMSC = NULL,
.CopySubBuffer = radeonCopySubBuffer,
+ /* DRI2 */
+ .InitScreen2 = radeonInitScreen2,
};
-#else
-const struct __DriverAPIRec driDriverAPI = {
- .InitScreen = radeonInitScreen,
- .DestroyScreen = radeonDestroyScreen,
- .CreateContext = r200CreateContext,
- .DestroyContext = r200DestroyContext,
- .CreateBuffer = radeonCreateBuffer,
- .DestroyBuffer = radeonDestroyBuffer,
- .SwapBuffers = r200SwapBuffers,
- .MakeCurrent = r200MakeCurrent,
- .UnbindContext = r200UnbindContext,
- .GetSwapInfo = getSwapInfo,
- .GetDrawableMSC = driDrawableGetMSC32,
- .WaitForMSC = driWaitForMSC32,
- .WaitForSBC = NULL,
- .SwapBuffersMSC = NULL,
- .CopySubBuffer = r200CopySubBuffer,
-};
-#endif
+
diff --git a/radeon/radeon_screen.h b/radeon/radeon_screen.h
index b84c70b..15744e8 100644
--- a/radeon/radeon_screen.h
+++ b/radeon/radeon_screen.h
@@ -54,11 +54,12 @@ typedef struct {
drmAddress map; /* Mapping of the DRM region */
} radeonRegionRec, *radeonRegionPtr;
-typedef struct {
+typedef struct radeon_screen {
int chip_family;
int chip_flags;
int cpp;
int card_type;
+ int device_id; /* PCI ID */
int AGPMode;
unsigned int irq; /* IRQ number (0 means none) */
@@ -98,14 +99,19 @@ typedef struct {
GLboolean drmSupportsPointSprites; /* need radeon kernel module >= 1.13 */
GLboolean drmSupportsCubeMapsR100; /* need radeon kernel module >= 1.15 */
GLboolean drmSupportsVertexProgram; /* need radeon kernel module >= 1.25 */
+ GLboolean drmSupportsOcclusionQueries; /* need radeon kernel module >= 1.30 */
GLboolean depthHasSurface;
/* Configuration cache with default values for all contexts */
driOptionCache optionCache;
- const __DRIextension *extensions[8];
+ const __DRIextension *extensions[16];
int num_gb_pipes;
+ int num_z_pipes;
+ int kernel_mm;
+ drm_radeon_sarea_t *sarea; /* Private SAREA data */
+ struct radeon_bo_manager *bom;
} radeonScreenRec, *radeonScreenPtr;
#define IS_R100_CLASS(screen) \
@@ -114,5 +120,8 @@ typedef struct {
((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R200)
#define IS_R300_CLASS(screen) \
((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R300)
+#define IS_R600_CLASS(screen) \
+ ((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R600)
+extern void radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv);
#endif /* __RADEON_SCREEN_H__ */
diff --git a/radeon/radeon_span.c b/radeon/radeon_span.c
index 12051ff..d603f52 100644
--- a/radeon/radeon_span.c
+++ b/radeon/radeon_span.c
@@ -43,46 +43,361 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/glheader.h"
#include "swrast/swrast.h"
-#include "radeon_context.h"
-#include "radeon_ioctl.h"
-#include "radeon_state.h"
+#include "radeon_common.h"
+#include "radeon_lock.h"
#include "radeon_span.h"
-#include "radeon_tex.h"
-
-#include "drirenderbuffer.h"
#define DBG 0
+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
+
+
+/* r200 depth buffer is always tiled - this is the formula
+ according to the docs unless I typo'ed in it
+*/
+#if defined(RADEON_COMMON_FOR_R200)
+static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y)
+{
+ GLubyte *ptr = rrb->bo->ptr;
+ GLint offset;
+ if (rrb->has_surface) {
+ offset = x * rrb->cpp + y * rrb->pitch;
+ } else {
+ GLuint b;
+ offset = 0;
+ b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6)));
+ offset += (b >> 1) << 12;
+ offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
+ offset += ((y >> 2) & 0x3) << 9;
+ offset += ((x >> 3) & 0x1) << 8;
+ offset += ((x >> 4) & 0x3) << 6;
+ offset += ((x >> 2) & 0x1) << 5;
+ offset += ((y >> 1) & 0x1) << 4;
+ offset += ((x >> 1) & 0x1) << 3;
+ offset += (y & 0x1) << 2;
+ offset += (x & 0x1) << 1;
+ }
+ return &ptr[offset];
+}
+
+static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y)
+{
+ GLubyte *ptr = rrb->bo->ptr;
+ GLint offset;
+ if (rrb->has_surface) {
+ offset = x * rrb->cpp + y * rrb->pitch;
+ } else {
+ GLuint b;
+ offset = 0;
+ b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
+ offset += (b >> 1) << 12;
+ offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
+ offset += ((y >> 2) & 0x3) << 9;
+ offset += ((x >> 2) & 0x1) << 8;
+ offset += ((x >> 3) & 0x3) << 6;
+ offset += ((y >> 1) & 0x1) << 5;
+ offset += ((x >> 1) & 0x1) << 4;
+ offset += (y & 0x1) << 3;
+ offset += (x & 0x1) << 2;
+ }
+ return &ptr[offset];
+}
+#endif
+
+/* r600 tiling
+ * two main types:
+ * - 1D (akin to macro-linear/micro-tiled on older asics)
+ * - 2D (akin to macro-tiled/micro-tiled on older asics)
+ * only 1D tiling is implemented below
+ */
+#if defined(RADEON_COMMON_FOR_R600)
+static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y, GLint is_depth, GLint is_stencil)
+{
+ GLint element_bytes = rrb->cpp;
+ GLint num_samples = 1;
+ GLint tile_width = 8;
+ GLint tile_height = 8;
+ GLint tile_thickness = 1;
+ GLint pitch_elements = rrb->pitch / element_bytes;
+ GLint height = rrb->base.Height;
+ GLint z = 0;
+ GLint sample_number = 0;
+ /* */
+ GLint tile_bytes;
+ GLint tiles_per_row;
+ GLint tiles_per_slice;
+ GLint slice_offset;
+ GLint tile_row_index;
+ GLint tile_column_index;
+ GLint tile_offset;
+ GLint pixel_number = 0;
+ GLint element_offset;
+ GLint offset = 0;
+
+ tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
+ tiles_per_row = pitch_elements / tile_width;
+ tiles_per_slice = tiles_per_row * (height / tile_height);
+ slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
+ tile_row_index = y / tile_height;
+ tile_column_index = x / tile_width;
+ tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
+
+ if (is_depth) {
+ GLint pixel_offset = 0;
+
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
+ pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
+ pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+ pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ switch (element_bytes) {
+ case 2:
+ pixel_offset = pixel_number * element_bytes * num_samples;
+ break;
+ case 4:
+ /* stencil and depth data are stored separately within a tile.
+ * stencil is stored in a contiguous tile before the depth tile.
+ * stencil element is 1 byte, depth element is 3 bytes.
+ * stencil tile is 64 bytes.
+ */
+ if (is_stencil)
+ pixel_offset = pixel_number * 1 * num_samples;
+ else
+ pixel_offset = (pixel_number * 3 * num_samples) + 64;
+ break;
+ }
+ element_offset = pixel_offset + (sample_number * element_bytes);
+ } else {
+ GLint sample_offset;
+
+ switch (element_bytes) {
+ case 1:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+ pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+ pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ case 2:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+ pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
+ pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ case 4:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
+ pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
+ pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ }
+ sample_offset = sample_number * (tile_bytes / num_samples);
+ element_offset = sample_offset + (pixel_number * element_bytes);
+ }
+ offset = slice_offset + tile_offset + element_offset;
+ return offset;
+}
+
+/* depth buffers */
+static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y)
+{
+ GLubyte *ptr = rrb->bo->ptr;
+ GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
+ return &ptr[offset];
+}
+
+static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y)
+{
+ GLubyte *ptr = rrb->bo->ptr;
+ GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
+ return &ptr[offset];
+}
+
+static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y)
+{
+ GLubyte *ptr = rrb->bo->ptr;
+ uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+ GLint offset;
+
+ if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+ offset = x * rrb->cpp + y * rrb->pitch;
+ } else {
+ offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
+ }
+ return &ptr[offset];
+}
+
+#else
+
+/* radeon tiling on r300-r500 has 4 states,
+ macro-linear/micro-linear
+ macro-linear/micro-tiled
+ macro-tiled /micro-linear
+ macro-tiled /micro-tiled
+ 1 byte surface
+ 2 byte surface - two types - we only provide 8x2 microtiling
+ 4 byte surface
+ 8/16 byte (unused)
+*/
+static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y)
+{
+ GLubyte *ptr = rrb->bo->ptr;
+ uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+ GLint offset;
+
+ if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+ offset = x * rrb->cpp + y * rrb->pitch;
+ } else {
+ offset = 0;
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+ offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
+ offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
+ offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
+ offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
+ offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
+ offset += ((y >> 1) & 0x1) << 6;
+ offset += ((x >> 2) & 0x1) << 5;
+ offset += (y & 1) << 4;
+ offset += (x & 3) << 2;
+ } else {
+ offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
+ offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
+ offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
+ offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
+ offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
+ offset += (y & 1) << 6;
+ offset += (x & 15) << 2;
+ }
+ } else {
+ offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
+ offset += (y & 1) << 4;
+ offset += (x & 3) << 2;
+ }
+ }
+ return &ptr[offset];
+}
+
+static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y)
+{
+ GLubyte *ptr = rrb->bo->ptr;
+ uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+ GLint offset;
+
+ if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+ offset = x * rrb->cpp + y * rrb->pitch;
+ } else {
+ offset = 0;
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+ offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
+ offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
+ offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
+ offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
+ offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
+ offset += ((y >> 1) & 0x1) << 6;
+ offset += ((x >> 3) & 0x1) << 5;
+ offset += (y & 1) << 4;
+ offset += (x & 3) << 2;
+ } else {
+ offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
+ offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
+ offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
+ offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
+ offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
+ offset += (y & 1) << 6;
+ offset += ((x >> 4) & 0x1) << 5;
+ offset += (x & 15) << 2;
+ }
+ } else {
+ offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
+ offset += (y & 0x1) << 4;
+ offset += (x & 0x7) << 1;
+ }
+ }
+ return &ptr[offset];
+}
+
+#endif
+
+#ifndef COMPILE_R300
+#ifndef COMPILE_R600
+static uint32_t
+z24s8_to_s8z24(uint32_t val)
+{
+ return (val << 24) | (val >> 8);
+}
+
+static uint32_t
+s8z24_to_z24s8(uint32_t val)
+{
+ return (val >> 24) | (val << 8);
+}
+#endif
+#endif
+
/*
* Note that all information needed to access pixels in a renderbuffer
* should be obtained through the gl_renderbuffer parameter, not per-context
* information.
*/
#define LOCAL_VARS \
- driRenderbuffer *drb = (driRenderbuffer *) rb; \
- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
- const GLuint bottom = dPriv->h - 1; \
- GLubyte *buf = (GLubyte *) drb->flippedData \
- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \
- GLuint p; \
- (void) p;
+ struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
+ struct radeon_renderbuffer *rrb = (void *) rb; \
+ const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
+ const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
+ unsigned int num_cliprects; \
+ struct drm_clip_rect *cliprects; \
+ int x_off, y_off; \
+ GLuint p; \
+ (void)p; \
+ radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
#define LOCAL_DEPTH_VARS \
- driRenderbuffer *drb = (driRenderbuffer *) rb; \
- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
- const GLuint bottom = dPriv->h - 1; \
- GLuint xo = dPriv->x; \
- GLuint yo = dPriv->y; \
- GLubyte *buf = (GLubyte *) drb->Base.Data;
+ struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
+ struct radeon_renderbuffer *rrb = (void *) rb; \
+ const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
+ const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
+ unsigned int num_cliprects; \
+ struct drm_clip_rect *cliprects; \
+ int x_off, y_off; \
+ radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
-#define Y_FLIP(Y) (bottom - (Y))
+#define Y_FLIP(_y) ((_y) * yScale + yBias)
#define HW_LOCK()
#define HW_UNLOCK()
+/* XXX FBO: this is identical to the macro in spantmp2.h except we get
+ * the cliprect info from the context, not the driDrawable.
+ * Move this into spantmp2.h someday.
+ */
+#define HW_CLIPLOOP() \
+ do { \
+ int _nc = num_cliprects; \
+ while ( _nc-- ) { \
+ int minx = cliprects[_nc].x1 - x_off; \
+ int miny = cliprects[_nc].y1 - y_off; \
+ int maxx = cliprects[_nc].x2 - x_off; \
+ int maxy = cliprects[_nc].y2 - y_off;
+
/* ================================================================
* Color buffer
*/
@@ -94,7 +409,61 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define TAG(x) radeon##x##_RGB565
#define TAG2(x,y) radeon##x##_RGB565##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+#if defined(RADEON_COMMON_FOR_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+/* 16 bit, ARGB1555 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
+
+#define TAG(x) radeon##x##_ARGB1555
+#define TAG2(x,y) radeon##x##_ARGB1555##y
+#if defined(RADEON_COMMON_FOR_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+/* 16 bit, RGBA4 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
+
+#define TAG(x) radeon##x##_ARGB4444
+#define TAG2(x,y) radeon##x##_ARGB4444##y
+#if defined(RADEON_COMMON_FOR_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+/* 32 bit, xRGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x) radeon##x##_xRGB8888
+#define TAG2(x,y) radeon##x##_xRGB8888##y
+#if defined(RADEON_COMMON_FOR_R600)
+#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
+#define PUT_VALUE(_x, _y, d) { \
+ GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
+ *_ptr = d; \
+} while (0)
+#else
+#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
+#define PUT_VALUE(_x, _y, d) { \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ *_ptr = d; \
+} while (0)
+#endif
#include "spantmp2.h"
/* 32 bit, ARGB8888 color spanline and pixel functions
@@ -104,7 +473,19 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define TAG(x) radeon##x##_ARGB8888
#define TAG2(x,y) radeon##x##_ARGB8888##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+#if defined(RADEON_COMMON_FOR_R600)
+#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
+#define PUT_VALUE(_x, _y, d) { \
+ GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
+ *_ptr = d; \
+} while (0)
+#else
+#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
+#define PUT_VALUE(_x, _y, d) { \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ *_ptr = d; \
+} while (0)
+#endif
#include "spantmp2.h"
/* ================================================================
@@ -121,106 +502,167 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* too...
*/
-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
-{
- GLuint pitch = drb->pitch;
- if (drb->depthHasSurface) {
- return 4 * (x + y * pitch);
- } else {
- GLuint ba, address = 0; /* a[0..1] = 0 */
-
-#ifdef COMPILE_R300
- ba = (y / 8) * (pitch / 8) + (x / 8);
-#else
- ba = (y / 16) * (pitch / 16) + (x / 16);
-#endif
-
- address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */
- address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */
- address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */
- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
-
- address |= (y & 0x8) << 7; /* a[10] = y[3] */
- address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */
- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
-
- return address;
- }
-}
-
-static INLINE GLuint
-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
-{
- GLuint pitch = drb->pitch;
- if (drb->depthHasSurface) {
- return 2 * (x + y * pitch);
- } else {
- GLuint ba, address = 0; /* a[0] = 0 */
-
- ba = (y / 16) * (pitch / 32) + (x / 32);
-
- address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */
- address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */
- address |= (x & 0x8) << 4; /* a[7] = x[3] */
- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
- address |= (y & 0x8) << 7; /* a[10] = y[3] */
- address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */
- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
-
- return address;
- }
-}
-
/* 16-bit depth buffer functions
*/
#define VALUE_TYPE GLushort
+#if defined(RADEON_COMMON_FOR_R200)
+#define WRITE_DEPTH( _x, _y, d ) \
+ *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
+#elif defined(RADEON_COMMON_FOR_R600)
+#define WRITE_DEPTH( _x, _y, d ) \
+ *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
+#else
#define WRITE_DEPTH( _x, _y, d ) \
- *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
+ *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
+#endif
+#if defined(RADEON_COMMON_FOR_R200)
+#define READ_DEPTH( d, _x, _y ) \
+ d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
+#elif defined(RADEON_COMMON_FOR_R600)
#define READ_DEPTH( d, _x, _y ) \
- d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
+ d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
+#else
+#define READ_DEPTH( d, _x, _y ) \
+ d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
+#endif
#define TAG(x) radeon##x##_z16
#include "depthtmp.h"
-/* 24 bit depth, 8 bit stencil depthbuffer functions
+/* 24 bit depth
*
* Careful: It looks like the R300 uses ZZZS byte order while the R200
* uses SZZZ for 24 bit depth, 8 bit stencil mode.
*/
#define VALUE_TYPE GLuint
-#ifdef COMPILE_R300
+#if defined(COMPILE_R300)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
tmp &= 0x000000ff; \
tmp |= ((d << 8) & 0xffffff00); \
- *(GLuint *)(buf + offset) = tmp; \
+ *_ptr = tmp; \
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
+ tmp &= 0xff000000; \
+ tmp |= ((d) & 0x00ffffff); \
+ *_ptr = tmp; \
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
+ tmp &= 0xff000000; \
+ tmp |= ((d) & 0x00ffffff); \
+ *_ptr = tmp; \
} while (0)
#else
#define WRITE_DEPTH( _x, _y, d ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
tmp &= 0xff000000; \
tmp |= ((d) & 0x00ffffff); \
- *(GLuint *)(buf + offset) = tmp; \
+ *_ptr = tmp; \
} while (0)
#endif
-#ifdef COMPILE_R300
+#if defined(COMPILE_R300)
#define READ_DEPTH( d, _x, _y ) \
- do { \
- d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
- _y + yo )) & 0xffffff00) >> 8; \
+ do { \
+ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
+ }while(0)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define READ_DEPTH( d, _x, _y ) \
+ do { \
+ d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
+ }while(0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define READ_DEPTH( d, _x, _y ) \
+ do { \
+ d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
}while(0)
#else
+#define READ_DEPTH( d, _x, _y ) \
+ d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff;
+#endif
+
+#define TAG(x) radeon##x##_z24
+#include "depthtmp.h"
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ * EXT_depth_stencil
+ *
+ * Careful: It looks like the R300 uses ZZZS byte order while the R200
+ * uses SZZZ for 24 bit depth, 8 bit stencil mode.
+ */
+#define VALUE_TYPE GLuint
+
+#if defined(COMPILE_R300)
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ *_ptr = d; \
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
+ tmp &= 0xff000000; \
+ tmp |= (((d) >> 8) & 0x00ffffff); \
+ *_ptr = tmp; \
+ _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
+ tmp = *_ptr; \
+ tmp &= 0xffffff00; \
+ tmp |= (d) & 0xff; \
+ *_ptr = tmp; \
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = z24s8_to_s8z24(d); \
+ *_ptr = tmp; \
+} while (0)
+#else
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = z24s8_to_s8z24(d); \
+ *_ptr = tmp; \
+} while (0)
+#endif
+
+#if defined(COMPILE_R300)
+#define READ_DEPTH( d, _x, _y ) \
+ do { \
+ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
+ }while(0)
+#elif defined(RADEON_COMMON_FOR_R600)
#define READ_DEPTH( d, _x, _y ) \
- d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
- _y + yo )) & 0x00ffffff;
+ do { \
+ d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \
+ d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \
+ }while(0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define READ_DEPTH( d, _x, _y ) \
+ do { \
+ d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \
+ }while(0)
+#else
+#define READ_DEPTH( d, _x, _y ) do { \
+ d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \
+ } while (0)
#endif
#define TAG(x) radeon##x##_z24_s8
@@ -235,35 +677,67 @@ do { \
#ifdef COMPILE_R300
#define WRITE_STENCIL( _x, _y, d ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
+ GLuint tmp = *_ptr; \
tmp &= 0xffffff00; \
tmp |= (d) & 0xff; \
- *(GLuint *)(buf + offset) = tmp; \
+ *_ptr = tmp; \
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define WRITE_STENCIL( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
+ GLuint tmp = *_ptr; \
+ tmp &= 0xffffff00; \
+ tmp |= (d) & 0xff; \
+ *_ptr = tmp; \
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define WRITE_STENCIL( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \
+ GLuint tmp = *_ptr; \
+ tmp &= 0x00ffffff; \
+ tmp |= (((d) & 0xff) << 24); \
+ *_ptr = tmp; \
} while (0)
#else
#define WRITE_STENCIL( _x, _y, d ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
+ GLuint tmp = *_ptr; \
tmp &= 0x00ffffff; \
tmp |= (((d) & 0xff) << 24); \
- *(GLuint *)(buf + offset) = tmp; \
+ *_ptr = tmp; \
} while (0)
#endif
#ifdef COMPILE_R300
#define READ_STENCIL( d, _x, _y ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
+ d = tmp & 0x000000ff; \
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define READ_STENCIL( d, _x, _y ) \
+do { \
+ GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
d = tmp & 0x000000ff; \
} while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define READ_STENCIL( d, _x, _y ) \
+do { \
+ GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
+ d = (tmp & 0xff000000) >> 24; \
+} while (0)
#else
#define READ_STENCIL( d, _x, _y ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
d = (tmp & 0xff000000) >> 24; \
} while (0)
#endif
@@ -271,29 +745,110 @@ do { \
#define TAG(x) radeon##x##_z24_s8
#include "stenciltmp.h"
-/* Move locking out to get reasonable span performance (10x better
- * than doing this in HW_LOCK above). WaitForIdle() is the main
- * culprit.
- */
+
+static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
+{
+ struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+ int r;
+
+ if (rrb == NULL || !rrb->bo)
+ return;
+
+ if (flag) {
+ if (rrb->bo->bom->funcs->bo_wait)
+ radeon_bo_wait(rrb->bo);
+ r = radeon_bo_map(rrb->bo, 1);
+ if (r) {
+ fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
+ __FUNCTION__, r);
+ }
+
+ radeonSetSpanFunctions(rrb);
+ } else {
+ radeon_bo_unmap(rrb->bo);
+ rb->GetRow = NULL;
+ rb->PutRow = NULL;
+ }
+}
+
+static void
+radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
+{
+ GLuint i, j;
+
+ /* color draw buffers */
+ for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
+ map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
+
+ /* check for render to textures */
+ for (i = 0; i < BUFFER_COUNT; i++) {
+ struct gl_renderbuffer_attachment *att =
+ ctx->DrawBuffer->Attachment + i;
+ struct gl_texture_object *tex = att->Texture;
+ if (tex) {
+ /* Render to texture. Note that a mipmapped texture need not
+ * be complete for render to texture, so we must restrict to
+ * mapping only the attached image.
+ */
+ radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
+ ASSERT(att->Renderbuffer);
+
+ if (map)
+ radeon_teximage_map(image, GL_TRUE);
+ else
+ radeon_teximage_unmap(image);
+ }
+ }
+
+ map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
+
+ /* depth buffer (Note wrapper!) */
+ if (ctx->DrawBuffer->_DepthBuffer)
+ map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
+
+ if (ctx->DrawBuffer->_StencilBuffer)
+ map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
+}
static void radeonSpanRenderStart(GLcontext * ctx)
{
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-#ifdef COMPILE_R300
- r300ContextPtr r300 = (r300ContextPtr) rmesa;
- R300_FIREVERTICES(r300);
-#else
- RADEON_FIREVERTICES(rmesa);
-#endif
- LOCK_HARDWARE(rmesa);
- radeonWaitForIdleLocked(rmesa);
+ int i;
+
+ radeon_firevertices(rmesa);
+
+ /* The locking and wait for idle should really only be needed in classic mode.
+ * In a future memory manager based implementation, this should become
+ * unnecessary due to the fact that mapping our buffers, textures, etc.
+ * should implicitly wait for any previous rendering commands that must
+ * be waited on. */
+ if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+ LOCK_HARDWARE(rmesa);
+ radeonWaitForIdleLocked(rmesa);
+ }
+
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled)
+ ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
+ }
+
+ radeon_map_unmap_buffers(ctx, 1);
}
static void radeonSpanRenderFinish(GLcontext * ctx)
{
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ int i;
_swrast_flush(ctx);
- UNLOCK_HARDWARE(rmesa);
+ if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+ UNLOCK_HARDWARE(rmesa);
+ }
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled)
+ ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
+ }
+
+ radeon_map_unmap_buffers(ctx, 0);
}
void radeonInitSpanFuncs(GLcontext * ctx)
@@ -307,20 +862,27 @@ void radeonInitSpanFuncs(GLcontext * ctx)
/**
* Plug in the Get/Put routines for the given driRenderbuffer.
*/
-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
{
- if (drb->Base.InternalFormat == GL_RGBA) {
- if (vis->redBits == 5 && vis->greenBits == 6
- && vis->blueBits == 5) {
- radeonInitPointers_RGB565(&drb->Base);
- } else {
- radeonInitPointers_ARGB8888(&drb->Base);
- }
- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
- radeonInitDepthPointers_z16(&drb->Base);
- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
- radeonInitDepthPointers_z24_s8(&drb->Base);
- } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
- radeonInitStencilPointers_z24_s8(&drb->Base);
+ if (rrb->base._ActualFormat == GL_RGB5) {
+ radeonInitPointers_RGB565(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_RGB8) {
+ radeonInitPointers_xRGB8888(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_RGBA8) {
+ radeonInitPointers_ARGB8888(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_RGBA4) {
+ radeonInitPointers_ARGB4444(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_RGB5_A1) {
+ radeonInitPointers_ARGB1555(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
+ radeonInitDepthPointers_z16(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
+ radeonInitDepthPointers_z24(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
+ radeonInitDepthPointers_z24_s8(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
+ radeonInitStencilPointers_z24_s8(&rrb->base);
+ } else {
+ fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat);
}
}
diff --git a/radeon/radeon_span.h b/radeon/radeon_span.h
index 9abe086..ea6a2e7 100644
--- a/radeon/radeon_span.h
+++ b/radeon/radeon_span.h
@@ -42,9 +42,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#ifndef __RADEON_SPAN_H__
#define __RADEON_SPAN_H__
-#include "drirenderbuffer.h"
-
extern void radeonInitSpanFuncs(GLcontext * ctx);
-extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
#endif
diff --git a/radeon/radeon_state.c b/radeon/radeon_state.c
index b656100..4d0d35e 100644
--- a/radeon/radeon_state.c
+++ b/radeon/radeon_state.c
@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/state.h"
#include "main/context.h"
#include "main/framebuffer.h"
+#include "main/simple_list.h"
#include "vbo/vbo.h"
#include "tnl/tnl.h"
@@ -47,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "swrast_setup/swrast_setup.h"
#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
#include "radeon_ioctl.h"
#include "radeon_state.h"
#include "radeon_tcl.h"
@@ -62,7 +64,7 @@ static void radeonUpdateSpecular( GLcontext *ctx );
static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
GLubyte refByte;
@@ -106,7 +108,7 @@ static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
static void radeonBlendEquationSeparate( GLcontext *ctx,
GLenum modeRGB, GLenum modeA )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
GLboolean fallback = GL_FALSE;
@@ -147,8 +149,8 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
GLenum sfactorRGB, GLenum dfactorRGB,
GLenum sfactorA, GLenum dfactorA )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] &
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] &
~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
GLboolean fallback = GL_FALSE;
@@ -257,7 +259,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
static void radeonDepthFunc( GLcontext *ctx, GLenum func )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
RADEON_STATECHANGE( rmesa, ctx );
rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
@@ -293,7 +295,7 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
RADEON_STATECHANGE( rmesa, ctx );
if ( ctx->Depth.Mask ) {
@@ -305,16 +307,16 @@ static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
static void radeonClearDepth( GLcontext *ctx, GLclampd d )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
RADEON_DEPTH_FORMAT_MASK);
switch ( format ) {
case RADEON_DEPTH_FORMAT_16BIT_INT_Z:
- rmesa->state.depth.clear = d * 0x0000ffff;
+ rmesa->radeon.state.depth.clear = d * 0x0000ffff;
break;
case RADEON_DEPTH_FORMAT_24BIT_INT_Z:
- rmesa->state.depth.clear = d * 0x00ffffff;
+ rmesa->radeon.state.depth.clear = d * 0x00ffffff;
break;
}
}
@@ -327,7 +329,7 @@ static void radeonClearDepth( GLcontext *ctx, GLclampd d )
static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
union { int i; float f; } c, d;
GLchan col[4];
@@ -391,7 +393,7 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
rmesa->hw.fog.cmd[FOG_D] = d.i;
}
break;
- case GL_FOG_COLOR:
+ case GL_FOG_COLOR:
RADEON_STATECHANGE( rmesa, ctx );
UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~RADEON_FOG_COLOR_MASK;
@@ -406,109 +408,13 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
}
}
-
-/* =============================================================
- * Scissoring
- */
-
-
-static GLboolean intersect_rect( drm_clip_rect_t *out,
- drm_clip_rect_t *a,
- drm_clip_rect_t *b )
-{
- *out = *a;
- if ( b->x1 > out->x1 ) out->x1 = b->x1;
- if ( b->y1 > out->y1 ) out->y1 = b->y1;
- if ( b->x2 < out->x2 ) out->x2 = b->x2;
- if ( b->y2 < out->y2 ) out->y2 = b->y2;
- if ( out->x1 >= out->x2 ) return GL_FALSE;
- if ( out->y1 >= out->y2 ) return GL_FALSE;
- return GL_TRUE;
-}
-
-
-void radeonRecalcScissorRects( radeonContextPtr rmesa )
-{
- drm_clip_rect_t *out;
- int i;
-
- /* Grow cliprect store?
- */
- if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
- while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
- rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */
- rmesa->state.scissor.numAllocedClipRects *= 2;
- }
-
- if (rmesa->state.scissor.pClipRects)
- FREE(rmesa->state.scissor.pClipRects);
-
- rmesa->state.scissor.pClipRects =
- MALLOC( rmesa->state.scissor.numAllocedClipRects *
- sizeof(drm_clip_rect_t) );
-
- if ( rmesa->state.scissor.pClipRects == NULL ) {
- rmesa->state.scissor.numAllocedClipRects = 0;
- return;
- }
- }
-
- out = rmesa->state.scissor.pClipRects;
- rmesa->state.scissor.numClipRects = 0;
-
- for ( i = 0 ; i < rmesa->numClipRects ; i++ ) {
- if ( intersect_rect( out,
- &rmesa->pClipRects[i],
- &rmesa->state.scissor.rect ) ) {
- rmesa->state.scissor.numClipRects++;
- out++;
- }
- }
-}
-
-
-static void radeonUpdateScissor( GLcontext *ctx )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
- if ( rmesa->dri.drawable ) {
- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-
- int x = ctx->Scissor.X;
- int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
- int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
- int h = dPriv->h - ctx->Scissor.Y - 1;
-
- rmesa->state.scissor.rect.x1 = x + dPriv->x;
- rmesa->state.scissor.rect.y1 = y + dPriv->y;
- rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
- rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
-
- radeonRecalcScissorRects( rmesa );
- }
-}
-
-
-static void radeonScissor( GLcontext *ctx,
- GLint x, GLint y, GLsizei w, GLsizei h )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
- if ( ctx->Scissor.Enabled ) {
- RADEON_FIREVERTICES( rmesa ); /* don't pipeline cliprect changes */
- radeonUpdateScissor( ctx );
- }
-
-}
-
-
/* =============================================================
* Culling
*/
static void radeonCullFace( GLcontext *ctx, GLenum unused )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
@@ -545,7 +451,7 @@ static void radeonCullFace( GLcontext *ctx, GLenum unused )
static void radeonFrontFace( GLcontext *ctx, GLenum mode )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
RADEON_STATECHANGE( rmesa, set );
rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
@@ -553,6 +459,10 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
RADEON_STATECHANGE( rmesa, tcl );
rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_CULL_FRONT_IS_CCW;
+ /* Winding is inverted when rendering to FBO */
+ if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+ mode = (mode == GL_CW) ? GL_CCW : GL_CW;
+
switch ( mode ) {
case GL_CW:
rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CW;
@@ -570,7 +480,7 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
*/
static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
RADEON_STATECHANGE( rmesa, lin );
RADEON_STATECHANGE( rmesa, set );
@@ -587,10 +497,10 @@ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
RADEON_STATECHANGE( rmesa, lin );
- rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] =
+ rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] =
((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
}
@@ -602,12 +512,19 @@ static void radeonColorMask( GLcontext *ctx,
GLboolean r, GLboolean g,
GLboolean b, GLboolean a )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- GLuint mask = radeonPackColor( rmesa->radeonScreen->cpp,
- ctx->Color.ColorMask[RCOMP],
- ctx->Color.ColorMask[GCOMP],
- ctx->Color.ColorMask[BCOMP],
- ctx->Color.ColorMask[ACOMP] );
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb;
+ GLuint mask;
+
+ rrb = radeon_get_colorbuffer(&rmesa->radeon);
+ if (!rrb)
+ return;
+
+ mask = radeonPackColor( rrb->cpp,
+ ctx->Color.ColorMask[RCOMP],
+ ctx->Color.ColorMask[GCOMP],
+ ctx->Color.ColorMask[BCOMP],
+ ctx->Color.ColorMask[ACOMP] );
if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
RADEON_STATECHANGE( rmesa, msk );
@@ -623,8 +540,9 @@ static void radeonColorMask( GLcontext *ctx,
static void radeonPolygonOffset( GLcontext *ctx,
GLfloat factor, GLfloat units )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- float_ui32_type constant = { units * rmesa->state.depth.scale };
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+ float_ui32_type constant = { units * depthScale };
float_ui32_type factoru = { factor };
RADEON_STATECHANGE( rmesa, zbs );
@@ -632,41 +550,16 @@ static void radeonPolygonOffset( GLcontext *ctx,
rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
}
-static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- GLuint i;
- drm_radeon_stipple_t stipple;
-
- /* Must flip pattern upside down.
- */
- for ( i = 0 ; i < 32 ; i++ ) {
- rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i];
- }
-
- /* TODO: push this into cmd mechanism
- */
- RADEON_FIREVERTICES( rmesa );
- LOCK_HARDWARE( rmesa );
-
- /* FIXME: Use window x,y offsets into stipple RAM.
- */
- stipple.mask = rmesa->state.stipple.mask;
- drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE,
- &stipple, sizeof(drm_radeon_stipple_t) );
- UNLOCK_HARDWARE( rmesa );
-}
-
static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
/* Can't generally do unfilled via tcl, but some good special
- * cases work.
+ * cases work.
*/
TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag);
- if (rmesa->TclFallback) {
+ if (rmesa->radeon.TclFallback) {
radeonChooseRenderState( ctx );
radeonChooseVertexState( ctx );
}
@@ -686,7 +579,7 @@ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
*/
static void radeonUpdateSpecular( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
GLuint flag = 0;
@@ -711,7 +604,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
p |= RADEON_SPECULAR_ENABLE;
- rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &=
+ rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &=
~RADEON_DIFFUSE_SPECULAR_COMBINE;
}
else if (ctx->Light.Enabled) {
@@ -741,7 +634,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
RADEON_TCL_COMPUTE_SPECULAR) != 0;
}
}
-
+
TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_FOGCOORDSPEC, flag);
if (NEED_SECONDARY_COLOR(ctx)) {
@@ -757,7 +650,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
/* Update vertex/render formats
*/
- if (rmesa->TclFallback) {
+ if (rmesa->radeon.TclFallback) {
radeonChooseRenderState( ctx );
radeonChooseVertexState( ctx );
}
@@ -769,12 +662,12 @@ static void radeonUpdateSpecular( GLcontext *ctx )
*/
-/* Update on colormaterial, material emmissive/ambient,
+/* Update on colormaterial, material emmissive/ambient,
* lightmodel.globalambient
*/
static void update_global_ambient( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
float *fcmd = (float *)RADEON_DB_STATE( glt );
/* Need to do more if both emmissive & ambient are PREMULT:
@@ -782,23 +675,23 @@ static void update_global_ambient( GLcontext *ctx )
*/
if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &
((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
- (3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0)
+ (3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0)
{
- COPY_3V( &fcmd[GLT_RED],
+ COPY_3V( &fcmd[GLT_RED],
ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]);
ACC_SCALE_3V( &fcmd[GLT_RED],
ctx->Light.Model.Ambient,
ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]);
- }
+ }
else
{
COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
}
-
+
RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
}
-/* Update on change to
+/* Update on change to
* - light[p].colors
* - light[p].enabled
*/
@@ -809,13 +702,13 @@ static void update_light_colors( GLcontext *ctx, GLuint p )
/* fprintf(stderr, "%s\n", __FUNCTION__); */
if (l->Enabled) {
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
- COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );
+ COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );
COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
-
+
RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
}
}
@@ -829,7 +722,7 @@ static void check_twoside_fallback( GLcontext *ctx )
if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
if (ctx->Light.ColorMaterialEnabled &&
- (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) !=
+ (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) !=
((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1))
fallback = GL_TRUE;
else {
@@ -837,7 +730,7 @@ static void check_twoside_fallback( GLcontext *ctx )
if (memcmp( ctx->Light.Material.Attrib[i],
ctx->Light.Material.Attrib[i+1],
sizeof(GLfloat)*4) != 0) {
- fallback = GL_TRUE;
+ fallback = GL_TRUE;
break;
}
}
@@ -849,14 +742,14 @@ static void check_twoside_fallback( GLcontext *ctx )
static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
(3 << RADEON_AMBIENT_SOURCE_SHIFT) |
(3 << RADEON_DIFFUSE_SOURCE_SHIFT) |
- (3 << RADEON_SPECULAR_SOURCE_SHIFT));
-
+ (3 << RADEON_SPECULAR_SOURCE_SHIFT));
+
if (ctx->Light.ColorMaterialEnabled) {
GLuint mask = ctx->Light.ColorMaterialBitmask;
@@ -877,7 +770,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
RADEON_AMBIENT_SOURCE_SHIFT);
}
-
+
if (mask & MAT_BIT_FRONT_DIFFUSE) {
light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
RADEON_DIFFUSE_SOURCE_SHIFT);
@@ -886,7 +779,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
RADEON_DIFFUSE_SOURCE_SHIFT);
}
-
+
if (mask & MAT_BIT_FRONT_SPECULAR) {
light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
RADEON_SPECULAR_SOURCE_SHIFT);
@@ -904,27 +797,27 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
(RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT) |
(RADEON_LM_SOURCE_STATE_MULT << RADEON_SPECULAR_SOURCE_SHIFT);
}
-
+
if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) {
RADEON_STATECHANGE( rmesa, tcl );
- rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1;
+ rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1;
}
}
void radeonUpdateMaterial( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
GLuint mask = ~0;
-
+
if (ctx->Light.ColorMaterialEnabled)
mask &= ~ctx->Light.ColorMaterialBitmask;
- if (RADEON_DEBUG & DEBUG_STATE)
+ if (RADEON_DEBUG & RADEON_STATE)
fprintf(stderr, "%s\n", __FUNCTION__);
-
+
if (mask & MAT_BIT_FRONT_EMISSION) {
fcmd[MTL_EMMISSIVE_RED] = mat[MAT_ATTRIB_FRONT_EMISSION][0];
fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1];
@@ -974,11 +867,11 @@ void radeonUpdateMaterial( GLcontext *ctx )
*
* which are calculated in light.c and are correct for the current
* lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
- * and _MESA_NEW_NEED_EYE_COORDS.
+ * and _MESA_NEW_NEED_EYE_COORDS.
*/
static void update_light( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
/* Have to check these, or have an automatic shortcircuit mechanism
* to remove noop statechanges. (Or just do a better job on the
@@ -991,12 +884,12 @@ static void update_light( GLcontext *ctx )
tmp &= ~RADEON_LIGHT_IN_MODELSPACE;
else
tmp |= RADEON_LIGHT_IN_MODELSPACE;
-
+
/* Leave this test disabled: (unexplained q3 lockup) (even with
new packets)
*/
- if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL])
+ if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL])
{
RADEON_STATECHANGE( rmesa, tcl );
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp;
@@ -1020,10 +913,10 @@ static void update_light( GLcontext *ctx )
if (ctx->Light.Light[p].Enabled) {
struct gl_light *l = &ctx->Light.Light[p];
GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] );
-
+
if (l->EyePosition[3] == 0.0) {
- COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm );
- COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm );
+ COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm );
+ COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm );
fcmd[LIT_POSITION_W] = 0;
fcmd[LIT_DIRECTION_W] = 0;
} else {
@@ -1043,30 +936,30 @@ static void update_light( GLcontext *ctx )
static void radeonLightfv( GLcontext *ctx, GLenum light,
GLenum pname, const GLfloat *params )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLint p = light - GL_LIGHT0;
struct gl_light *l = &ctx->Light.Light[p];
GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
-
+
switch (pname) {
- case GL_AMBIENT:
+ case GL_AMBIENT:
case GL_DIFFUSE:
case GL_SPECULAR:
update_light_colors( ctx, p );
break;
- case GL_SPOT_DIRECTION:
- /* picked up in update_light */
+ case GL_SPOT_DIRECTION:
+ /* picked up in update_light */
break;
case GL_POSITION: {
- /* positions picked up in update_light, but can do flag here */
+ /* positions picked up in update_light, but can do flag here */
GLuint flag;
GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
/* FIXME: Set RANGE_ATTEN only when needed */
- if (p&1)
+ if (p&1)
flag = RADEON_LIGHT_1_IS_LOCAL;
else
flag = RADEON_LIGHT_0_IS_LOCAL;
@@ -1158,16 +1051,16 @@ static void radeonLightfv( GLcontext *ctx, GLenum light,
}
}
-
+
static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
const GLfloat *param )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
switch (pname) {
- case GL_LIGHT_MODEL_AMBIENT:
+ case GL_LIGHT_MODEL_AMBIENT:
update_global_ambient( ctx );
break;
@@ -1188,7 +1081,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
check_twoside_fallback( ctx );
- if (rmesa->TclFallback) {
+ if (rmesa->radeon.TclFallback) {
radeonChooseRenderState( ctx );
radeonChooseVertexState( ctx );
}
@@ -1205,7 +1098,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
static void radeonShadeModel( GLcontext *ctx, GLenum mode )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
s &= ~(RADEON_DIFFUSE_SHADE_MASK |
@@ -1244,7 +1137,7 @@ static void radeonShadeModel( GLcontext *ctx, GLenum mode )
static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
{
GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
RADEON_STATECHANGE( rmesa, ucp[p] );
@@ -1256,7 +1149,7 @@ static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
static void radeonUpdateClipPlanes( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint p;
for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
@@ -1281,7 +1174,7 @@ static void
radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
GLint ref, GLuint mask )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) |
((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT));
@@ -1325,7 +1218,7 @@ radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
static void
radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
RADEON_STATECHANGE( rmesa, msk );
rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
@@ -1336,20 +1229,20 @@ radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
GLenum zfail, GLenum zpass )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
/* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP,
and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC,
but DEC_WRAP can be fixed by using DEC and INC_WRAP at least use INC. */
-
+
GLuint tempRADEON_STENCIL_FAIL_DEC_WRAP;
GLuint tempRADEON_STENCIL_FAIL_INC_WRAP;
GLuint tempRADEON_STENCIL_ZFAIL_DEC_WRAP;
GLuint tempRADEON_STENCIL_ZFAIL_INC_WRAP;
GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP;
GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP;
-
- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
+
+ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC;
tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC;
tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC;
@@ -1365,7 +1258,7 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
tempRADEON_STENCIL_ZPASS_DEC_WRAP = RADEON_STENCIL_ZPASS_DEC_WRAP;
tempRADEON_STENCIL_ZPASS_INC_WRAP = RADEON_STENCIL_ZPASS_INC_WRAP;
}
-
+
RADEON_STATECHANGE( rmesa, ctx );
rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(RADEON_STENCIL_FAIL_MASK |
RADEON_STENCIL_ZFAIL_MASK |
@@ -1455,9 +1348,9 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
static void radeonClearStencil( GLcontext *ctx, GLint s )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
- rmesa->state.stencil.clear =
+ rmesa->radeon.state.stencil.clear =
((GLuint) (ctx->Stencil.Clear & 0xff) |
(0xff << RADEON_STENCIL_MASK_SHIFT) |
((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT));
@@ -1481,20 +1374,30 @@ static void radeonClearStencil( GLcontext *ctx, GLint s )
*/
void radeonUpdateWindow( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
- GLfloat xoffset = (GLfloat)dPriv->x;
- GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
+ GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+ GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
const GLfloat *v = ctx->Viewport._WindowMap.m;
+ const GLboolean render_to_fbo = (ctx->DrawBuffer ? (ctx->DrawBuffer->Name != 0) : 0);
+ const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+ GLfloat y_scale, y_bias;
+
+ if (render_to_fbo) {
+ y_scale = 1.0;
+ y_bias = 0;
+ } else {
+ y_scale = -1.0;
+ y_bias = yoffset;
+ }
float_ui32_type sx = { v[MAT_SX] };
float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
- float_ui32_type sy = { - v[MAT_SY] };
- float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
- float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
- float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
+ float_ui32_type sy = { v[MAT_SY] * y_scale };
+ float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y };
+ float_ui32_type sz = { v[MAT_SZ] * depthScale };
+ float_ui32_type tz = { v[MAT_TZ] * depthScale };
- RADEON_FIREVERTICES( rmesa );
RADEON_STATECHANGE( rmesa, vpt );
rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = sx.ui32;
@@ -1514,6 +1417,8 @@ static void radeonViewport( GLcontext *ctx, GLint x, GLint y,
* values, or keep the originals hanging around.
*/
radeonUpdateWindow( ctx );
+
+ radeon_viewport(ctx, x, y, width, height);
}
static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
@@ -1524,8 +1429,8 @@ static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
void radeonUpdateViewportOffset( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
GLfloat xoffset = (GLfloat)dPriv->x;
GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
const GLfloat *v = ctx->Viewport._WindowMap.m;
@@ -1555,8 +1460,8 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
RADEON_STIPPLE_Y_OFFSET_MASK);
/* add magic offsets, then invert */
- stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
- sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
+ stx = 31 - ((dPriv->x - 1) & RADEON_STIPPLE_COORD_MASK);
+ sty = 31 - ((dPriv->y + dPriv->h - 1)
& RADEON_STIPPLE_COORD_MASK);
m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
@@ -1580,20 +1485,26 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLubyte c[4];
+ struct radeon_renderbuffer *rrb;
+
+ rrb = radeon_get_colorbuffer(&rmesa->radeon);
+ if (!rrb)
+ return;
+
CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
- rmesa->state.color.clear = radeonPackColor( rmesa->radeonScreen->cpp,
+ rmesa->radeon.state.color.clear = radeonPackColor( rrb->cpp,
c[0], c[1], c[2], c[3] );
}
static void radeonRenderMode( GLcontext *ctx, GLenum mode )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
}
@@ -1619,7 +1530,7 @@ static GLuint radeon_rop_tab[] = {
static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint rop = (GLuint)opcode - GL_CLEAR;
ASSERT( rop < 16 );
@@ -1628,108 +1539,16 @@ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = radeon_rop_tab[rop];
}
-
-/**
- * Set up the cliprects for either front or back-buffer drawing.
- */
-void radeonSetCliprects( radeonContextPtr rmesa )
-{
- __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
- __DRIdrawablePrivate *const readable = rmesa->dri.readable;
- GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
- GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
-
- if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
- /* Can't ignore 2d windows if we are page flipping.
- */
- if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
- rmesa->numClipRects = drawable->numClipRects;
- rmesa->pClipRects = drawable->pClipRects;
- }
- else {
- rmesa->numClipRects = drawable->numBackClipRects;
- rmesa->pClipRects = drawable->pBackClipRects;
- }
- }
- else {
- /* front buffer (or none, or multiple buffers */
- rmesa->numClipRects = drawable->numClipRects;
- rmesa->pClipRects = drawable->pClipRects;
- }
-
- if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
- _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
- drawable->w, drawable->h);
- draw_fb->Initialized = GL_TRUE;
- }
-
- if (drawable != readable) {
- if ((read_fb->Width != readable->w) || (read_fb->Height != readable->h)) {
- _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
- readable->w, readable->h);
- read_fb->Initialized = GL_TRUE;
- }
- }
-
- if (rmesa->state.scissor.enabled)
- radeonRecalcScissorRects( rmesa );
-
- rmesa->lastStamp = drawable->lastStamp;
-}
-
-
-/**
- * Called via glDrawBuffer.
- */
-static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
- if (RADEON_DEBUG & DEBUG_DRI)
- fprintf(stderr, "%s %s\n", __FUNCTION__,
- _mesa_lookup_enum_by_nr( mode ));
-
- RADEON_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */
-
- if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
- /* 0 (GL_NONE) buffers or multiple color drawing buffers */
- FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
- return;
- }
-
- switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
- case BUFFER_FRONT_LEFT:
- case BUFFER_BACK_LEFT:
- FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE );
- break;
- default:
- FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
- return;
- }
-
- radeonSetCliprects( rmesa );
-
- /* We'll set the drawing engine's offset/pitch parameters later
- * when we update other state.
- */
-}
-
-static void radeonReadBuffer( GLcontext *ctx, GLenum mode )
-{
- /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
-}
-
-
/* =============================================================
* State enable/disable
*/
static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint p, flag;
- if ( RADEON_DEBUG & DEBUG_STATE )
+ if ( RADEON_DEBUG & RADEON_STATE )
fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__,
_mesa_lookup_enum_by_nr( cap ),
state ? "GL_TRUE" : "GL_FALSE" );
@@ -1787,7 +1606,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
case GL_CLIP_PLANE2:
case GL_CLIP_PLANE3:
case GL_CLIP_PLANE4:
- case GL_CLIP_PLANE5:
+ case GL_CLIP_PLANE5:
p = cap-GL_CLIP_PLANE0;
RADEON_STATECHANGE( rmesa, tcl );
if (state) {
@@ -1821,10 +1640,10 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
RADEON_STATECHANGE(rmesa, ctx );
if ( state ) {
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE;
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
} else {
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
}
break;
@@ -1852,13 +1671,13 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
case GL_LIGHT7:
RADEON_STATECHANGE(rmesa, tcl);
p = cap - GL_LIGHT0;
- if (p&1)
+ if (p&1)
flag = (RADEON_LIGHT_1_ENABLE |
- RADEON_LIGHT_1_ENABLE_AMBIENT |
+ RADEON_LIGHT_1_ENABLE_AMBIENT |
RADEON_LIGHT_1_ENABLE_SPECULAR);
else
flag = (RADEON_LIGHT_0_ENABLE |
- RADEON_LIGHT_0_ENABLE_AMBIENT |
+ RADEON_LIGHT_0_ENABLE_AMBIENT |
RADEON_LIGHT_0_ENABLE_SPECULAR);
if (state)
@@ -1866,7 +1685,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
else
rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
- /*
+ /*
*/
update_light_colors( ctx, p );
break;
@@ -1904,7 +1723,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
}
break;
-
+
case GL_NORMALIZE:
RADEON_STATECHANGE( rmesa, tcl );
if ( state ) {
@@ -1971,21 +1790,30 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
}
case GL_SCISSOR_TEST:
- RADEON_FIREVERTICES( rmesa );
- rmesa->state.scissor.enabled = state;
+ radeon_firevertices(&rmesa->radeon);
+ rmesa->radeon.state.scissor.enabled = state;
radeonUpdateScissor( ctx );
break;
case GL_STENCIL_TEST:
- if ( rmesa->state.stencil.hwBuffer ) {
- RADEON_STATECHANGE( rmesa, ctx );
- if ( state ) {
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_STENCIL_ENABLE;
+ {
+ GLboolean hw_stencil = GL_FALSE;
+ if (ctx->DrawBuffer) {
+ struct radeon_renderbuffer *rrbStencil
+ = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+ hw_stencil = (rrbStencil && rrbStencil->bo);
+ }
+
+ if (hw_stencil) {
+ RADEON_STATECHANGE( rmesa, ctx );
+ if ( state ) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_STENCIL_ENABLE;
+ } else {
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE;
+ }
} else {
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE;
+ FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state );
}
- } else {
- FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state );
}
break;
@@ -1995,7 +1823,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
case GL_TEXTURE_GEN_T:
/* Picked up in radeonUpdateTextureState.
*/
- rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE;
+ rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE;
break;
case GL_COLOR_SUM_EXT:
@@ -2010,11 +1838,11 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
static void radeonLightingSpaceChange( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLboolean tmp;
RADEON_STATECHANGE( rmesa, tcl );
- if (RADEON_DEBUG & DEBUG_STATE)
+ if (RADEON_DEBUG & RADEON_STATE)
fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]);
@@ -2029,7 +1857,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
}
- if (RADEON_DEBUG & DEBUG_STATE)
+ if (RADEON_DEBUG & RADEON_STATE)
fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]);
}
@@ -2039,7 +1867,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
*/
-void radeonUploadTexMatrix( radeonContextPtr rmesa,
+void radeonUploadTexMatrix( r100ContextPtr rmesa,
int unit, GLboolean swapcols )
{
/* Here's how this works: on r100, only 3 tex coords can be submitted, so the
@@ -2065,7 +1893,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
int idx = TEXMAT_0 + unit;
float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0;
int i;
- struct gl_texture_unit tUnit = rmesa->glCtx->Texture.Unit[unit];
+ struct gl_texture_unit tUnit = rmesa->radeon.glCtx->Texture.Unit[unit];
GLfloat *src = rmesa->tmpmat[unit].m;
rmesa->TexMatColSwap &= ~(1 << unit);
@@ -2119,7 +1947,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
}
-static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
+static void upload_matrix( r100ContextPtr rmesa, GLfloat *src, int idx )
{
float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
int i;
@@ -2135,7 +1963,7 @@ static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
}
-static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
+static void upload_matrix_t( r100ContextPtr rmesa, GLfloat *src, int idx )
{
float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
memcpy(dest, src, 16*sizeof(float));
@@ -2145,7 +1973,7 @@ static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
static void update_texturematrix( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+ r100ContextPtr rmesa = R100_CONTEXT( ctx );
GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
int unit;
@@ -2209,64 +2037,74 @@ static void update_texturematrix( GLcontext *ctx )
}
}
-
-/**
- * Tell the card where to render (offset, pitch).
- * Effected by glDrawBuffer, etc
- */
-void
-radeonUpdateDrawBuffer(GLcontext *ctx)
+static GLboolean r100ValidateBuffers(GLcontext *ctx)
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- driRenderbuffer *drb;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb;
+ int i, ret;
- if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
- /* draw to front */
- drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
- }
- else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
- /* draw to back */
- drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+ radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+
+ rrb = radeon_get_colorbuffer(&rmesa->radeon);
+ /* color buffer */
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+ 0, RADEON_GEM_DOMAIN_VRAM);
}
- else {
- /* drawing to multiple buffers, or none */
- return;
+
+ /* depth buffer */
+ rrb = radeon_get_depthbuffer(&rmesa->radeon);
+ /* color buffer */
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+ 0, RADEON_GEM_DOMAIN_VRAM);
}
- assert(drb);
- assert(drb->flippedPitch);
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+ radeonTexObj *t;
- RADEON_STATECHANGE( rmesa, ctx );
+ if (!ctx->Texture.Unit[i]._ReallyEnabled)
+ continue;
- /* Note: we used the (possibly) page-flipped values */
- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
- = ((drb->flippedOffset + rmesa->radeonScreen->fbLocation)
- & RADEON_COLOROFFSET_MASK);
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
- if (rmesa->sarea->tiling_enabled) {
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
+ t = rmesa->state.texture.unit[i].texobj;
+ if (t->image_override && t->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+ else if (t->mt->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->mt->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
}
-}
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+ return GL_TRUE;
+}
-void radeonValidateState( GLcontext *ctx )
+GLboolean radeonValidateState( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- GLuint new_state = rmesa->NewGLState;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint new_state = rmesa->radeon.NewGLState;
- if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
- radeonUpdateDrawBuffer(ctx);
+ if (new_state & _NEW_BUFFERS) {
+ _mesa_update_framebuffer(ctx);
+ /* this updates the DrawBuffer's Width/Height if it's a FBO */
+ _mesa_update_draw_buffer_bounds(ctx);
+ RADEON_STATECHANGE(rmesa, ctx);
}
if (new_state & _NEW_TEXTURE) {
radeonUpdateTextureState( ctx );
- new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
+ new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
}
+ /* we need to do a space check here */
+ if (!r100ValidateBuffers(ctx))
+ return GL_FALSE;
+
/* Need an event driven matrix update?
*/
- if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
+ if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, MODEL_PROJ );
/* Need these for lighting (shouldn't upload otherwise)
@@ -2290,12 +2128,14 @@ void radeonValidateState( GLcontext *ctx )
/* emit all active clip planes if projection matrix changes.
*/
if (new_state & (_NEW_PROJECTION)) {
- if (ctx->Transform.ClipPlanesEnabled)
+ if (ctx->Transform.ClipPlanesEnabled)
radeonUpdateClipPlanes( ctx );
}
- rmesa->NewGLState = 0;
+ rmesa->radeon.NewGLState = 0;
+
+ return GL_TRUE;
}
@@ -2306,7 +2146,7 @@ static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
_vbo_InvalidateState( ctx, new_state );
_tnl_InvalidateState( ctx, new_state );
_ae_invalidate_state( ctx, new_state );
- RADEON_CONTEXT(ctx)->NewGLState |= new_state;
+ R100_CONTEXT(ctx)->radeon.NewGLState |= new_state;
}
@@ -2317,8 +2157,8 @@ static GLboolean check_material( GLcontext *ctx )
TNLcontext *tnl = TNL_CONTEXT(ctx);
GLint i;
- for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT;
- i < _TNL_ATTRIB_MAT_BACK_INDEXES;
+ for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT;
+ i < _TNL_ATTRIB_MAT_BACK_INDEXES;
i++)
if (tnl->vb.AttribPtr[i] &&
tnl->vb.AttribPtr[i]->stride)
@@ -2326,20 +2166,21 @@ static GLboolean check_material( GLcontext *ctx )
return GL_FALSE;
}
-
+
static void radeonWrapRunPipeline( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLboolean has_material;
if (0)
- fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
+ fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
/* Validate state:
*/
- if (rmesa->NewGLState)
- radeonValidateState( ctx );
+ if (rmesa->radeon.NewGLState)
+ if (!radeonValidateState( ctx ))
+ FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
has_material = (ctx->Light.Enabled && check_material( ctx ));
@@ -2348,7 +2189,7 @@ static void radeonWrapRunPipeline( GLcontext *ctx )
}
/* Run the pipeline.
- */
+ */
_tnl_run_pipeline( ctx );
if (has_material) {
@@ -2356,12 +2197,28 @@ static void radeonWrapRunPipeline( GLcontext *ctx )
}
}
+static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ GLint i;
+
+ radeon_firevertices(&r100->radeon);
+
+ RADEON_STATECHANGE(r100, stp);
+
+ /* Must flip pattern upside down.
+ */
+ for ( i = 31 ; i >= 0; i--) {
+ r100->hw.stp.cmd[3 + i] = ((GLuint *) mask)[i];
+ }
+}
+
/* Initialize the driver's state functions.
* Many of the ctx->Driver functions might have been initialized to
* software defaults in the earlier _mesa_init_driver_functions() call.
*/
-void radeonInitStateFuncs( GLcontext *ctx )
+void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2 )
{
ctx->Driver.UpdateState = radeonInvalidateState;
ctx->Driver.LightingSpaceChange = radeonLightingSpaceChange;
@@ -2394,7 +2251,10 @@ void radeonInitStateFuncs( GLcontext *ctx )
ctx->Driver.LogicOpcode = radeonLogicOpCode;
ctx->Driver.PolygonMode = radeonPolygonMode;
ctx->Driver.PolygonOffset = radeonPolygonOffset;
- ctx->Driver.PolygonStipple = radeonPolygonStipple;
+ if (dri2)
+ ctx->Driver.PolygonStipple = radeonPolygonStipple;
+ else
+ ctx->Driver.PolygonStipple = radeonPolygonStipplePreKMS;
ctx->Driver.RenderMode = radeonRenderMode;
ctx->Driver.Scissor = radeonScissor;
ctx->Driver.ShadeModel = radeonShadeModel;
diff --git a/radeon/radeon_state.h b/radeon/radeon_state.h
index 2171879..c780cff 100644
--- a/radeon/radeon_state.h
+++ b/radeon/radeon_state.h
@@ -39,30 +39,25 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_context.h"
-extern void radeonInitState( radeonContextPtr rmesa );
-extern void radeonInitStateFuncs( GLcontext *ctx );
+extern void radeonInitState( r100ContextPtr rmesa );
+extern void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2);
extern void radeonUpdateMaterial( GLcontext *ctx );
-extern void radeonSetCliprects( radeonContextPtr rmesa );
-extern void radeonRecalcScissorRects( radeonContextPtr rmesa );
extern void radeonUpdateViewportOffset( GLcontext *ctx );
extern void radeonUpdateWindow( GLcontext *ctx );
extern void radeonUpdateDrawBuffer( GLcontext *ctx );
-extern void radeonUploadTexMatrix( radeonContextPtr rmesa,
+extern void radeonUploadTexMatrix( r100ContextPtr rmesa,
int unit, GLboolean swapcols );
-extern void radeonValidateState( GLcontext *ctx );
-
-extern void radeonPrintDirty( radeonContextPtr rmesa,
- const char *msg );
+extern GLboolean radeonValidateState( GLcontext *ctx );
extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
#define FALLBACK( rmesa, bit, mode ) do { \
if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \
__FUNCTION__, bit, mode ); \
- radeonFallback( rmesa->glCtx, bit, mode ); \
+ radeonFallback( rmesa->radeon.glCtx, bit, mode ); \
} while (0)
diff --git a/radeon/radeon_state_init.c b/radeon/radeon_state_init.c
index 57dc380..f3ad0dd 100644
--- a/radeon/radeon_state_init.c
+++ b/radeon/radeon_state_init.c
@@ -38,39 +38,141 @@
#include "swrast_setup/swrast_setup.h"
#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
#include "radeon_ioctl.h"
#include "radeon_state.h"
#include "radeon_tcl.h"
#include "radeon_tex.h"
#include "radeon_swtcl.h"
+#include "radeon_queryobj.h"
+
+#include "../r200/r200_reg.h"
#include "xmlpool.h"
+/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.
+ */
+static struct {
+ int start;
+ int len;
+ const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+ {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
+ {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
+ {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
+ {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
+ {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
+ {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
+ {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
+ {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
+ {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
+ {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
+ {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
+ {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
+ {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
+ {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
+ {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
+ {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
+ {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
+ {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
+ {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
+ {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
+ {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
+ "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
+ {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
+ {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
+ {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
+ {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
+ {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
+ {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
+ {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
+ {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
+ {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
+ {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
+ {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
+ {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
+ {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
+ {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
+ {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
+ {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
+ {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
+ {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
+ {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
+ {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
+ {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
+ {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
+ {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
+ {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
+ {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
+ {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
+ {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
+ {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
+ {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
+ "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
+ {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
+ {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
+ {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
+ {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
+ {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
+ {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
+ {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
+ {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
+ {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
+ {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
+ {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
+ "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
+ {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
+ {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
+ {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
+ {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
+ {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
+ {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
+ {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
+ {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
+ {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
+ {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
+ {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
+ {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
+ {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
+ {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
+ {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
+ {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
+ {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
+ {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
+ {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
+ {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
+ {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
+ {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
+ {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
+ {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
+ {R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"}, /* 85 */
+ {R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
+ {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
+ {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
+ {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
+ {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
+ {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
+ {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
+ {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
+ {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
+};
+
/* =============================================================
* State initialization
*/
-
-void radeonPrintDirty( radeonContextPtr rmesa, const char *msg )
+static int cmdpkt( r100ContextPtr rmesa, int id )
{
- struct radeon_state_atom *l;
-
- fprintf(stderr, msg);
- fprintf(stderr, ": ");
+ drm_radeon_cmd_header_t h;
- foreach(l, &rmesa->hw.atomlist) {
- if (l->dirty || rmesa->hw.all_dirty)
- fprintf(stderr, "%s, ", l->name);
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ return CP_PACKET0(packet[id].start, packet[id].len - 1);
+ } else {
+ h.i = 0;
+ h.packet.cmd_type = RADEON_CMD_PACKET;
+ h.packet.packet_id = id;
}
-
- fprintf(stderr, "\n");
-}
-
-static int cmdpkt( int id )
-{
- drm_radeon_cmd_header_t h;
- h.i = 0;
- h.packet.cmd_type = RADEON_CMD_PACKET;
- h.packet.packet_id = id;
return h.i;
}
@@ -96,131 +198,523 @@ static int cmdscl( int offset, int stride, int count )
return h.i;
}
-#define CHECK( NM, FLAG ) \
-static GLboolean check_##NM( GLcontext *ctx ) \
-{ \
- return FLAG; \
+#define CHECK( NM, FLAG, ADD ) \
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
+{ \
+ return FLAG ? atom->cmd_size + (ADD) : 0; \
}
-#define TCL_CHECK( NM, FLAG ) \
-static GLboolean check_##NM( GLcontext *ctx ) \
+#define TCL_CHECK( NM, FLAG, ADD ) \
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
{ \
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \
- return !rmesa->TclFallback && (FLAG); \
+ r100ContextPtr rmesa = R100_CONTEXT(ctx); \
+ return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
}
-CHECK( always, GL_TRUE )
-CHECK( never, GL_FALSE )
-CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled )
-CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+CHECK( always, GL_TRUE, 0 )
+CHECK( always_add2, GL_TRUE, 2 )
+CHECK( always_add4, GL_TRUE, 4 )
+CHECK( never, GL_FALSE, 0 )
+CHECK( tex0_mm, ctx->Texture.Unit[0]._ReallyEnabled, 3 )
+CHECK( tex1_mm, ctx->Texture.Unit[1]._ReallyEnabled, 3 )
/* need this for the cubic_map on disabled unit 2 bug, maybe r100 only? */
-CHECK( tex2, ctx->Texture._EnabledUnits )
-CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT))
-CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT))
-CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT))
-CHECK( fog, ctx->Fog.Enabled )
-TCL_CHECK( tcl, GL_TRUE )
-TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled )
-TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled )
-TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled )
-TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
-TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled )
-TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled )
-TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled )
-TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled )
-TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled )
-TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled )
-TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled )
-TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled )
-TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled )
-TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1) )
-TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2) )
-TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4) )
-TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8) )
-TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10) )
-TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20) )
-TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled )
-
-CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT))
-CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT))
-CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT))
+CHECK( tex2_mm, ctx->Texture._EnabledUnits, 3 )
+CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled, 2 )
+CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled, 2 )
+CHECK( tex2, ctx->Texture._EnabledUnits, 2 )
+CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube0_mm, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( cube1_mm, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( cube2_mm, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( fog, ctx->Fog.Enabled, 0 )
+CHECK( fog_add4, ctx->Fog.Enabled, 4 )
+TCL_CHECK( tcl, GL_TRUE, 0 )
+TCL_CHECK( tcl_add4, GL_TRUE, 4 )
+TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex0_add4, ctx->Texture.Unit[0]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_tex1_add4, ctx->Texture.Unit[1]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_tex2_add4, ctx->Texture.Unit[2]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_eyespace_or_lighting_add4, ctx->_NeedEyeCoords || ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 0 )
+TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 0 )
+TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 0 )
+TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 0 )
+TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 0 )
+TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 0 )
+TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 0 )
+TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 0 )
+TCL_CHECK( tcl_lit0_add6, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 6 )
+TCL_CHECK( tcl_lit1_add6, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 6 )
+TCL_CHECK( tcl_lit2_add6, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 6 )
+TCL_CHECK( tcl_lit3_add6, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 6 )
+TCL_CHECK( tcl_lit4_add6, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 6 )
+TCL_CHECK( tcl_lit5_add6, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 6 )
+TCL_CHECK( tcl_lit6_add6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 6 )
+TCL_CHECK( tcl_lit7_add6, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 6 )
+TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1), 0 )
+TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2), 0 )
+TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4), 0 )
+TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8), 0 )
+TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10), 0 )
+TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20), 0 )
+TCL_CHECK( tcl_ucp0_add4, (ctx->Transform.ClipPlanesEnabled & 0x1), 4 )
+TCL_CHECK( tcl_ucp1_add4, (ctx->Transform.ClipPlanesEnabled & 0x2), 4 )
+TCL_CHECK( tcl_ucp2_add4, (ctx->Transform.ClipPlanesEnabled & 0x4), 4 )
+TCL_CHECK( tcl_ucp3_add4, (ctx->Transform.ClipPlanesEnabled & 0x8), 4 )
+TCL_CHECK( tcl_ucp4_add4, (ctx->Transform.ClipPlanesEnabled & 0x10), 4 )
+TCL_CHECK( tcl_ucp5_add4, (ctx->Transform.ClipPlanesEnabled & 0x20), 4 )
+TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 0 )
+TCL_CHECK( tcl_eyespace_or_fog_add4, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 4 )
+
+CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+
+#define OUT_VEC(hdr, data) do { \
+ drm_radeon_cmd_header_t h; \
+ h.i = hdr; \
+ OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \
+ OUT_BATCH(0); \
+ OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \
+ OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
+ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1)); \
+ OUT_BATCH_TABLE((data), h.vectors.count); \
+ } while(0)
+
+#define OUT_SCL(hdr, data) do { \
+ drm_radeon_cmd_header_t h; \
+ h.i = hdr; \
+ OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \
+ OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
+ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \
+ OUT_BATCH_TABLE((data), h.scalars.count); \
+ } while(0)
+
+static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ BATCH_LOCALS(&r100->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_SCL(atom->cmd[0], atom->cmd+1);
+ END_BATCH();
+}
+static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ BATCH_LOCALS(&r100->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
-/* Initialize the context's hardware state.
- */
-void radeonInitState( radeonContextPtr rmesa )
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_VEC(atom->cmd[0], atom->cmd+1);
+ END_BATCH();
+}
+
+
+static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
- GLcontext *ctx = rmesa->glCtx;
- GLuint color_fmt, depth_fmt, i;
- GLint drawPitch, drawOffset;
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ BATCH_LOCALS(&r100->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
+ OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
+ END_BATCH();
+}
- switch ( rmesa->radeonScreen->cpp ) {
- case 2:
- color_fmt = RADEON_COLOR_FORMAT_RGB565;
- break;
- case 4:
- color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
- break;
- default:
- fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
- exit( -1 );
+static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ BATCH_LOCALS(&r100->radeon);
+ struct radeon_renderbuffer *rrb;
+ uint32_t cbpitch;
+ uint32_t zbpitch, depth_fmt;
+ uint32_t dwords = atom->check(ctx, atom);
+
+ /* output the first 7 bytes of context */
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(atom->cmd, 5);
+
+ rrb = radeon_get_depthbuffer(&r100->radeon);
+ if (!rrb) {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else {
+ zbpitch = (rrb->pitch / rrb->cpp);
+ if (r100->using_hyperz)
+ zbpitch |= RADEON_DEPTH_HYPERZ;
+
+ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH(zbpitch);
+ if (rrb->cpp == 4)
+ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+ else
+ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
+ }
+
+ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+ OUT_BATCH(atom->cmd[CTX_CMD_1]);
+ OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+
+ rrb = radeon_get_colorbuffer(&r100->radeon);
+ if (!rrb || !rrb->bo) {
+ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+ OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
+ } else {
+ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+ if (rrb->cpp == 4)
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+ else
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+
+ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
}
- rmesa->state.color.clear = 0x00000000;
+ OUT_BATCH(atom->cmd[CTX_CMD_2]);
+
+ if (!rrb || !rrb->bo) {
+ OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
+ } else {
+ cbpitch = (rrb->pitch / rrb->cpp);
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ cbpitch |= RADEON_COLOR_TILE_ENABLE;
+ OUT_BATCH(cbpitch);
+ }
+
+ END_BATCH();
+}
+
+static int check_always_ctx( GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb, *drb;
+ uint32_t dwords;
+
+ rrb = radeon_get_colorbuffer(&r100->radeon);
+ if (!rrb || !rrb->bo) {
+ return 0;
+ }
+
+ drb = radeon_get_depthbuffer(&r100->radeon);
+
+ dwords = 10;
+ if (drb)
+ dwords += 6;
+ if (rrb)
+ dwords += 8;
+
+ return dwords;
+}
+
+static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ BATCH_LOCALS(&r100->radeon);
+ struct radeon_renderbuffer *rrb, *drb;
+ uint32_t cbpitch = 0;
+ uint32_t zbpitch = 0;
+ uint32_t dwords = atom->check(ctx, atom);
+ uint32_t depth_fmt;
+
+ rrb = radeon_get_colorbuffer(&r100->radeon);
+ if (!rrb || !rrb->bo) {
+ fprintf(stderr, "no rrb\n");
+ return;
+ }
+
+ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+ if (rrb->cpp == 4)
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+ else switch (rrb->base._ActualFormat) {
+ case GL_RGB5:
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+ break;
+ case GL_RGBA4:
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
+ break;
+ case GL_RGB5_A1:
+ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
+ break;
+ }
+
+ cbpitch = (rrb->pitch / rrb->cpp);
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ cbpitch |= R200_COLOR_TILE_ENABLE;
+
+ drb = radeon_get_depthbuffer(&r100->radeon);
+ if (drb) {
+ zbpitch = (drb->pitch / drb->cpp);
+ if (drb->cpp == 4)
+ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+ else
+ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
+
+ }
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+ /* In the CS case we need to split this up */
+ OUT_BATCH(CP_PACKET0(packet[0].start, 3));
+ OUT_BATCH_TABLE((atom->cmd + 1), 4);
+
+ if (drb) {
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
+ OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
+ OUT_BATCH(zbpitch);
+ }
+
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
+ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+ OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
+ OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+
+ if (rrb) {
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
+ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
+ OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ }
+
+ // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
+ // OUT_BATCH_TABLE((atom->cmd + 14), 4);
+ // }
+
+ END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
+ OUT_BATCH(0);
+ OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
+ if (rrb) {
+ OUT_BATCH(((rrb->base.Width - 1) << RADEON_RE_WIDTH_SHIFT) |
+ ((rrb->base.Height - 1) << RADEON_RE_HEIGHT_SHIFT));
+ } else {
+ OUT_BATCH(0);
+ }
+ END_BATCH();
+}
+
+static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ BATCH_LOCALS(&r100->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+ int i = atom->idx, j;
+ radeonTexObj *t = r100->state.texture.unit[i].texobj;
+ radeon_mipmap_level *lvl;
+
+ if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT))
+ return;
+
+ if (!t)
+ return;
+
+ if (!t->mt)
+ return;
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(atom->cmd, 3);
+ lvl = &t->mt->levels[0];
+ for (j = 0; j < 5; j++) {
+ OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+ RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ }
+ END_BATCH();
+}
+
+static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ BATCH_LOCALS(&r100->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+ int i = atom->idx, j;
+ radeonTexObj *t = r100->state.texture.unit[i].texobj;
+ radeon_mipmap_level *lvl;
+ uint32_t base_reg;
+
+ if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT))
+ return;
+
+ if (!t)
+ return;
+
+ if (!t->mt)
+ return;
+
+ switch(i) {
+ case 1: base_reg = RADEON_PP_CUBIC_OFFSET_T1_0; break;
+ case 2: base_reg = RADEON_PP_CUBIC_OFFSET_T2_0; break;
+ default:
+ case 0: base_reg = RADEON_PP_CUBIC_OFFSET_T0_0; break;
+ };
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(atom->cmd, 2);
+ lvl = &t->mt->levels[0];
+ for (j = 0; j < 5; j++) {
+ OUT_BATCH(CP_PACKET0(base_reg + (4 * j), 0));
+ OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ }
+ END_BATCH();
+}
+
+static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ BATCH_LOCALS(&r100->radeon);
+ uint32_t dwords = atom->cmd_size;
+ int i = atom->idx;
+ radeonTexObj *t = r100->state.texture.unit[i].texobj;
+ radeon_mipmap_level *lvl;
+
+ if (t && t->mt && !t->image_override)
+ dwords += 2;
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+ OUT_BATCH_TABLE(atom->cmd, 3);
+ if (t && t->mt && !t->image_override) {
+ if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
+ lvl = &t->mt->levels[0];
+ OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ } else {
+ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ }
+ } else if (!t) {
+ /* workaround for old CS mechanism */
+ OUT_BATCH(r100->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
+ // OUT_BATCH(r100->radeon.radeonScreen);
+ } else {
+ OUT_BATCH(t->override_offset);
+ }
+
+ OUT_BATCH_TABLE((atom->cmd+4), 5);
+ END_BATCH();
+}
+
+static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ BATCH_LOCALS(&r100->radeon);
+ uint32_t dwords = atom->cmd_size;
+ int i = atom->idx;
+ radeonTexObj *t = r100->state.texture.unit[i].texobj;
+ radeon_mipmap_level *lvl;
+ int hastexture = 1;
+
+ if (!t)
+ hastexture = 0;
+ else {
+ if (!t->mt && !t->bo)
+ hastexture = 0;
+ }
+ dwords += 1;
+ if (hastexture)
+ dwords += 2;
+ else
+ dwords -= 2;
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+ OUT_BATCH(CP_PACKET0(RADEON_PP_TXFILTER_0 + (24 * i), 1));
+ OUT_BATCH_TABLE((atom->cmd + 1), 2);
+
+ if (hastexture) {
+ OUT_BATCH(CP_PACKET0(RADEON_PP_TXOFFSET_0 + (24 * i), 0));
+ if (t->mt && !t->image_override) {
+ if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
+ lvl = &t->mt->levels[0];
+ OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ } else {
+ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ }
+ } else {
+ if (t->bo)
+ OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ }
+ }
+
+ OUT_BATCH(CP_PACKET0(RADEON_PP_TXCBLEND_0 + (i * 24), 1));
+ OUT_BATCH_TABLE((atom->cmd+4), 2);
+ OUT_BATCH(CP_PACKET0(RADEON_PP_BORDER_COLOR_0 + (i * 4), 0));
+ OUT_BATCH((atom->cmd[TEX_PP_BORDER_COLOR]));
+ END_BATCH();
+}
+
+/* Initialize the context's hardware state.
+ */
+void radeonInitState( r100ContextPtr rmesa )
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+ GLuint i;
+
+ rmesa->radeon.state.color.clear = 0x00000000;
switch ( ctx->Visual.depthBits ) {
case 16:
- rmesa->state.depth.clear = 0x0000ffff;
- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
- depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
- rmesa->state.stencil.clear = 0x00000000;
+ rmesa->radeon.state.depth.clear = 0x0000ffff;
+ rmesa->radeon.state.stencil.clear = 0x00000000;
break;
case 24:
- rmesa->state.depth.clear = 0x00ffffff;
- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
- depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
- rmesa->state.stencil.clear = 0xffff0000;
+ rmesa->radeon.state.depth.clear = 0x00ffffff;
+ rmesa->radeon.state.stencil.clear = 0xffff0000;
break;
default:
- fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
- ctx->Visual.depthBits );
- exit( -1 );
+ break;
}
- /* Only have hw stencil when depth buffer is 24 bits deep */
- rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
- ctx->Visual.depthBits == 24 );
+ rmesa->radeon.Fallback = 0;
- rmesa->Fallback = 0;
- if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
- drawOffset = rmesa->radeonScreen->backOffset;
- drawPitch = rmesa->radeonScreen->backPitch;
- } else {
- drawOffset = rmesa->radeonScreen->frontOffset;
- drawPitch = rmesa->radeonScreen->frontPitch;
- }
+ rmesa->radeon.hw.max_state_size = 0;
- rmesa->hw.max_state_size = 0;
-
-#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG ) \
+#define ALLOC_STATE_IDX( ATOM, CHK, SZ, NM, FLAG, IDX ) \
do { \
rmesa->hw.ATOM.cmd_size = SZ; \
- rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \
- rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \
- rmesa->hw.ATOM.name = NM; \
+ rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
+ rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
+ rmesa->hw.ATOM.name = NM; \
rmesa->hw.ATOM.is_tcl = FLAG; \
rmesa->hw.ATOM.check = check_##CHK; \
- rmesa->hw.ATOM.dirty = GL_TRUE; \
- rmesa->hw.max_state_size += SZ * sizeof(int); \
+ rmesa->hw.ATOM.dirty = GL_TRUE; \
+ rmesa->hw.ATOM.idx = IDX; \
+ rmesa->radeon.hw.max_state_size += SZ * sizeof(int); \
} while (0)
-
-
+
+#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG ) \
+ ALLOC_STATE_IDX(ATOM, CHK, SZ, NM, FLAG, 0)
+
/* Allocate state buffers:
*/
- ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 );
+ ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE, "CTX/context", 0 );
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ rmesa->hw.ctx.emit = ctx_emit_cs;
+ rmesa->hw.ctx.check = check_always_ctx;
+ } else
+ rmesa->hw.ctx.emit = ctx_emit;
ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
@@ -229,82 +723,133 @@ void radeonInitState( radeonContextPtr rmesa )
ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
ALLOC_STATE( tcl, always, TCL_STATE_SIZE, "TCL/tcl", 1 );
ALLOC_STATE( mtl, tcl_lighting, MTL_STATE_SIZE, "MTL/material", 1 );
- ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 );
- ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
- ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
- ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
- ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 );
- ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 );
- ALLOC_STATE( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0 );
- if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE( grd, always_add2, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+ ALLOC_STATE( fog, fog_add4, FOG_STATE_SIZE, "FOG/fog", 1 );
+ ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 1 );
+ ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+ ALLOC_STATE_IDX( tex[0], tex0_mm, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
+ ALLOC_STATE_IDX( tex[1], tex1_mm, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
+ ALLOC_STATE_IDX( tex[2], tex2_mm, TEX_STATE_SIZE, "TEX/tex-2", 0, 2);
+ ALLOC_STATE( mat[0], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+ ALLOC_STATE( mat[1], tcl_eyespace_or_fog_add4, MAT_STATE_SIZE, "MAT/modelview", 1 );
+ ALLOC_STATE( mat[2], tcl_eyespace_or_lighting_add4, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+ ALLOC_STATE( mat[3], tcl_tex0_add4, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+ ALLOC_STATE( mat[4], tcl_tex1_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+ ALLOC_STATE( mat[5], tcl_tex2_add4, MAT_STATE_SIZE, "MAT/texmat2", 1 );
+ ALLOC_STATE( lit[0], tcl_lit0_add6, LIT_STATE_SIZE, "LIT/light-0", 1 );
+ ALLOC_STATE( lit[1], tcl_lit1_add6, LIT_STATE_SIZE, "LIT/light-1", 1 );
+ ALLOC_STATE( lit[2], tcl_lit2_add6, LIT_STATE_SIZE, "LIT/light-2", 1 );
+ ALLOC_STATE( lit[3], tcl_lit3_add6, LIT_STATE_SIZE, "LIT/light-3", 1 );
+ ALLOC_STATE( lit[4], tcl_lit4_add6, LIT_STATE_SIZE, "LIT/light-4", 1 );
+ ALLOC_STATE( lit[5], tcl_lit5_add6, LIT_STATE_SIZE, "LIT/light-5", 1 );
+ ALLOC_STATE( lit[6], tcl_lit6_add6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+ ALLOC_STATE( lit[7], tcl_lit7_add6, LIT_STATE_SIZE, "LIT/light-7", 1 );
+ ALLOC_STATE( ucp[0], tcl_ucp0_add4, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+ ALLOC_STATE( ucp[1], tcl_ucp1_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+ ALLOC_STATE( ucp[2], tcl_ucp2_add4, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+ ALLOC_STATE( ucp[3], tcl_ucp3_add4, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+ ALLOC_STATE( ucp[4], tcl_ucp4_add4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+ ALLOC_STATE( ucp[5], tcl_ucp5_add4, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+ } else {
+ ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+ ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
+ ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
+ ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+ ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
+ ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
+ ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2);
+ ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+ ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
+ ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+ ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+ ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+ ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 );
+ ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 );
+ ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 );
+ ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 );
+ ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 );
+ ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 );
+ ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
+ ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+ ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
+ ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+ ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+ ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+ ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+ ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+ ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+ }
+
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 );
+ }
+
+ for (i = 0; i < 3; i++) {
+ if (rmesa->radeon.radeonScreen->kernel_mm)
+ rmesa->hw.tex[i].emit = tex_emit_cs;
+ else
+ rmesa->hw.tex[i].emit = tex_emit;
+ }
+ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
{
- ALLOC_STATE( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
- ALLOC_STATE( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
- ALLOC_STATE( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE_IDX( cube[0], cube0_mm, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+ ALLOC_STATE_IDX( cube[1], cube1_mm, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+ ALLOC_STATE_IDX( cube[2], cube2_mm, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+ for (i = 0; i < 3; i++)
+ rmesa->hw.cube[i].emit = cube_emit_cs;
+ } else {
+ ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+ ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+ ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+ for (i = 0; i < 3; i++)
+ rmesa->hw.cube[i].emit = cube_emit;
+ }
}
else
{
- ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
- ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
- ALLOC_STATE( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
- }
- ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
- ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
- ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
- ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 );
- ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 );
- ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 );
- ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
- ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
- ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
- ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
- ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
- ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
- ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 );
- ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 );
- ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 );
- ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 );
- ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 );
- ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
- ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
- ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
- ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 );
- ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 );
- ALLOC_STATE( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0 );
+ ALLOC_STATE_IDX( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+ ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+ ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+ }
+ ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 );
+ ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 );
+ ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 );
radeonSetUpAtomList( rmesa );
/* Fill in the packet headers:
*/
- rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
- rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
- rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
- rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
- rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
- rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
- rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
- rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
- rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS);
- rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
- rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0);
- rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0);
- rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1);
- rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1);
- rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_2);
- rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_2);
- rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_0);
- rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
- rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_1);
- rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
- rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_2);
- rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
- rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
- rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
+ rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
+ rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
+ rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
+ rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
+ rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
+ rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
+ rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
+ rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
+ rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL_STATUS);
+ rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
+ rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_0);
+ rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_0);
+ rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_1);
+ rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_1);
+ rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_2);
+ rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_2);
+ rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_0);
+ rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
+ rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_1);
+ rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
+ rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_2);
+ rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
+ rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
+ rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
rmesa->hw.mtl.cmd[MTL_CMD_0] =
- cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
- rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0);
- rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1);
- rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_2);
+ cmdpkt(rmesa, RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
+ rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_0);
+ rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_1);
+ rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_2);
rmesa->hw.grd.cmd[GRD_CMD_0] =
cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
rmesa->hw.fog.cmd[FOG_CMD_0] =
@@ -331,6 +876,26 @@ void radeonInitState( radeonContextPtr rmesa )
cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 );
}
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
+ rmesa->hw.stp.cmd[STP_DATA_0] = 0;
+ rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
+
+ rmesa->hw.grd.emit = scl_emit;
+ rmesa->hw.fog.emit = vec_emit;
+ rmesa->hw.glt.emit = vec_emit;
+ rmesa->hw.eye.emit = vec_emit;
+
+ for (i = 0; i < 6; i++)
+ rmesa->hw.mat[i].emit = vec_emit;
+
+ for (i = 0; i < 8; i++)
+ rmesa->hw.lit[i].emit = lit_emit;
+
+ for (i = 0; i < 6; i++)
+ rmesa->hw.ucp[i].emit = vec_emit;
+ }
+
rmesa->last_ReallyEnabled = -1;
/* Initial Harware state:
@@ -352,19 +917,7 @@ void radeonInitState( radeonContextPtr rmesa )
RADEON_SRC_BLEND_GL_ONE |
RADEON_DST_BLEND_GL_ZERO );
- rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
- rmesa->radeonScreen->depthOffset + rmesa->radeonScreen->fbLocation;
-
- rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] =
- ((rmesa->radeonScreen->depthPitch &
- RADEON_DEPTHPITCH_MASK) |
- RADEON_DEPTH_ENDIAN_NO_SWAP);
-
- if (rmesa->using_hyperz)
- rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= RADEON_DEPTH_HYPERZ;
-
- rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
- RADEON_Z_TEST_LESS |
+ rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (RADEON_Z_TEST_LESS |
RADEON_STENCIL_TEST_ALWAYS |
RADEON_STENCIL_FAIL_KEEP |
RADEON_STENCIL_ZPASS_KEEP |
@@ -374,7 +927,7 @@ void radeonInitState( radeonContextPtr rmesa )
if (rmesa->using_hyperz) {
rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE |
RADEON_Z_DECOMPRESSION_ENABLE;
- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
/* works for q3, but slight rendering errors with glxgears ? */
/* rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
/* need this otherwise get lots of lockups with q3 ??? */
@@ -386,10 +939,9 @@ void radeonInitState( radeonContextPtr rmesa )
RADEON_ANTI_ALIAS_NONE);
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE |
- color_fmt |
RADEON_ZBLOCK16);
- switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) {
+ switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
case DRI_CONF_DITHER_XERRORDIFFRESET:
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT;
break;
@@ -397,30 +949,17 @@ void radeonInitState( radeonContextPtr rmesa )
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE;
break;
}
- if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) ==
+ if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
DRI_CONF_ROUND_ROUND )
- rmesa->state.color.roundEnable = RADEON_ROUND_ENABLE;
+ rmesa->radeon.state.color.roundEnable = RADEON_ROUND_ENABLE;
else
- rmesa->state.color.roundEnable = 0;
- if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) ==
+ rmesa->radeon.state.color.roundEnable = 0;
+ if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
DRI_CONF_COLOR_REDUCTION_DITHER )
rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE;
else
- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
-
- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
- rmesa->radeonScreen->fbLocation)
- & RADEON_COLOROFFSET_MASK);
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
- RADEON_COLORPITCH_MASK) |
- RADEON_COLOR_ENDIAN_NO_SWAP);
-
-
- /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
- if (rmesa->sarea->tiling_enabled) {
- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
- }
rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW |
RADEON_BFACE_SOLID |
@@ -444,7 +983,7 @@ void radeonInitState( radeonContextPtr rmesa )
RADEON_VC_NO_SWAP;
#endif
- if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+ if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS;
}
@@ -491,8 +1030,8 @@ void radeonInitState( radeonContextPtr rmesa )
(2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
/* Initialize the texture offset to the start of the card texture heap */
- rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ // rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
+ // rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] =
@@ -513,15 +1052,15 @@ void radeonInitState( radeonContextPtr rmesa )
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] =
- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] =
- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] =
- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] =
- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] =
- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
}
/* Can only add ST1 at the time of doing some multitex but can keep
@@ -612,6 +1151,14 @@ void radeonInitState( radeonContextPtr rmesa )
rmesa->hw.eye.cmd[EYE_Y] = 0;
rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
-
- rmesa->hw.all_dirty = GL_TRUE;
+
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ radeon_init_query_stateobj(&rmesa->radeon, R100_QUERYOBJ_CMDSIZE);
+ rmesa->radeon.query.queryobj.cmd[R100_QUERYOBJ_CMD_0] = CP_PACKET0(RADEON_RB3D_ZPASS_DATA, 0);
+ rmesa->radeon.query.queryobj.cmd[R100_QUERYOBJ_DATA_0] = 0;
+ }
+
+ rmesa->radeon.hw.all_dirty = GL_TRUE;
+
+ rcommonInitCmdBuf(&rmesa->radeon);
}
diff --git a/radeon/radeon_swtcl.c b/radeon/radeon_swtcl.c
index ebea1fe..e61f59e 100644
--- a/radeon/radeon_swtcl.c
+++ b/radeon/radeon_swtcl.c
@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/enums.h"
#include "main/imports.h"
#include "main/macros.h"
+#include "main/simple_list.h"
#include "swrast_setup/swrast_setup.h"
#include "math/m_translate.h"
@@ -50,10 +51,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_state.h"
#include "radeon_swtcl.h"
#include "radeon_tcl.h"
+#include "radeon_debug.h"
-static void flush_last_swtcl_prim( radeonContextPtr rmesa );
-
/* R100: xyzw, c0, c1/fog, stq[0..2] = 4+1+1+3*3 = 15 right? */
/* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
#define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat)) /* for mesa _tnl stage */
@@ -64,18 +64,18 @@ static void flush_last_swtcl_prim( radeonContextPtr rmesa );
#define EMIT_ATTR( ATTR, STYLE, F0 ) \
do { \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \
- rmesa->swtcl.vertex_attr_count++; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \
+ rmesa->radeon.swtcl.vertex_attr_count++; \
fmt_0 |= F0; \
} while (0)
#define EMIT_PAD( N ) \
do { \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \
- rmesa->swtcl.vertex_attr_count++; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \
+ rmesa->radeon.swtcl.vertex_attr_count++; \
} while (0)
static GLuint radeon_cp_vc_frmts[3][2] =
@@ -87,7 +87,7 @@ static GLuint radeon_cp_vc_frmts[3][2] =
static void radeonSetVertexFormat( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+ r100ContextPtr rmesa = R100_CONTEXT( ctx );
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *VB = &tnl->vb;
DECLARE_RENDERINPUTS(index_bitset);
@@ -106,7 +106,7 @@ static void radeonSetVertexFormat( GLcontext *ctx )
}
assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
- rmesa->swtcl.vertex_attr_count = 0;
+ rmesa->radeon.swtcl.vertex_attr_count = 0;
/* EMIT_ATTR's must be in order as they tell t_vertex.c how to
* build up a hardware vertex.
@@ -204,33 +204,52 @@ static void radeonSetVertexFormat( GLcontext *ctx )
}
}
- if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) ||
+ if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
fmt_0 != rmesa->swtcl.vertex_format) {
RADEON_NEWPRIM(rmesa);
rmesa->swtcl.vertex_format = fmt_0;
- rmesa->swtcl.vertex_size =
+ rmesa->radeon.swtcl.vertex_size =
_tnl_install_attrs( ctx,
- rmesa->swtcl.vertex_attrs,
- rmesa->swtcl.vertex_attr_count,
+ rmesa->radeon.swtcl.vertex_attrs,
+ rmesa->radeon.swtcl.vertex_attr_count,
NULL, 0 );
- rmesa->swtcl.vertex_size /= 4;
- RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf( stderr, "%s: vertex_size= %d floats\n",
- __FUNCTION__, rmesa->swtcl.vertex_size);
+ rmesa->radeon.swtcl.vertex_size /= 4;
+ RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
+ radeon_print(RADEON_SWRENDER, RADEON_VERBOSE,
+ "%s: vertex_size= %d floats\n", __FUNCTION__, rmesa->radeon.swtcl.vertex_size);
}
}
+static void radeon_predict_emit_size( r100ContextPtr rmesa )
+{
+
+ if (!rmesa->radeon.swtcl.emit_prediction) {
+ const int state_size = radeonCountStateEmitSize( &rmesa->radeon );
+ const int scissor_size = 8;
+ const int prims_size = 8;
+ const int vertex_size = 7;
+
+ if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+ state_size +
+ (scissor_size + prims_size + vertex_size),
+ __FUNCTION__))
+ rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize( &rmesa->radeon );
+ else
+ rmesa->radeon.swtcl.emit_prediction = state_size;
+ rmesa->radeon.swtcl.emit_prediction += scissor_size + prims_size + vertex_size
+ + rmesa->radeon.cmdbuf.cs->cdw;
+ }
+}
static void radeonRenderStart( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+ r100ContextPtr rmesa = R100_CONTEXT( ctx );
- radeonSetVertexFormat( ctx );
-
- if (rmesa->dma.flush != 0 &&
- rmesa->dma.flush != flush_last_swtcl_prim)
- rmesa->dma.flush( rmesa );
+ radeonSetVertexFormat( ctx );
+
+ if (rmesa->radeon.dma.flush != 0 &&
+ rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
+ rmesa->radeon.dma.flush( ctx );
}
@@ -241,7 +260,7 @@ static void radeonRenderStart( GLcontext *ctx )
*/
void radeonChooseVertexState( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+ r100ContextPtr rmesa = R100_CONTEXT( ctx );
TNLcontext *tnl = TNL_CONTEXT(ctx);
GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
@@ -254,7 +273,7 @@ void radeonChooseVertexState( GLcontext *ctx )
* rasterization fallback. As this function will be called again when we
* leave a rasterization fallback, we can just skip it for now.
*/
- if (rmesa->Fallback != 0)
+ if (rmesa->radeon.Fallback != 0)
return;
/* HW perspective divide is a win, but tiny vertex formats are a
@@ -281,80 +300,33 @@ void radeonChooseVertexState( GLcontext *ctx )
}
}
-
-/* Flush vertices in the current dma region.
- */
-static void flush_last_swtcl_prim( radeonContextPtr rmesa )
+void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
{
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s\n", __FUNCTION__);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
- rmesa->dma.flush = NULL;
- if (rmesa->dma.current.buf) {
- struct radeon_dma_region *current = &rmesa->dma.current;
- GLuint current_offset = (rmesa->radeonScreen->gart_buffer_offset +
- current->buf->buf->idx * RADEON_BUFFER_SIZE +
- current->start);
- assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+ radeonEmitState(&rmesa->radeon);
+ radeonEmitVertexAOS( rmesa,
+ rmesa->radeon.swtcl.vertex_size,
+ first_elem(&rmesa->radeon.dma.reserved)->bo,
+ current_offset);
- assert (current->start +
- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
- current->ptr);
+
+ radeonEmitVbufPrim( rmesa,
+ rmesa->swtcl.vertex_format,
+ rmesa->radeon.swtcl.hw_primitive,
+ rmesa->radeon.swtcl.numverts);
+ if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n",
+ rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
- if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
- radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
- rmesa->hw.max_state_size + VBUF_BUFSZ );
- radeonEmitVertexAOS( rmesa,
- rmesa->swtcl.vertex_size,
- current_offset);
+ rmesa->radeon.swtcl.emit_prediction = 0;
- radeonEmitVbufPrim( rmesa,
- rmesa->swtcl.vertex_format,
- rmesa->swtcl.hw_primitive,
- rmesa->swtcl.numverts);
- }
-
- rmesa->swtcl.numverts = 0;
- current->start = current->ptr;
- }
}
-
-/* Alloc space in the current dma region.
- */
-static INLINE void *
-radeonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
-{
- GLuint bytes = vsize * nverts;
-
- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
- radeonRefillCurrentDmaRegion( rmesa );
-
- if (!rmesa->dma.flush) {
- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
- rmesa->dma.flush = flush_last_swtcl_prim;
- }
-
- assert( vsize == rmesa->swtcl.vertex_size * 4 );
- assert( rmesa->dma.flush == flush_last_swtcl_prim );
- assert (rmesa->dma.current.start +
- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
- rmesa->dma.current.ptr);
-
-
- {
- GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
- rmesa->dma.current.ptr += bytes;
- rmesa->swtcl.numverts += nverts;
- return head;
- }
-
-}
-
-
/*
* Render unclipped vertex buffers by emitting vertices directly to
* dma buffers. Use strip/fan hardware primitives where possible.
@@ -387,22 +359,31 @@ static const GLuint hw_prim[GL_POLYGON+1] = {
};
static INLINE void
-radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
+radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim )
{
RADEON_NEWPRIM( rmesa );
- rmesa->swtcl.hw_primitive = hw_prim[prim];
- assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
+ rmesa->radeon.swtcl.hw_primitive = hw_prim[prim];
+ // assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start);
+}
+
+static void* radeon_alloc_verts( r100ContextPtr rmesa , GLuint nr, GLuint size )
+{
+ void *rv;
+ do {
+ radeon_predict_emit_size( rmesa );
+ rv = rcommonAllocDmaLowVerts( &rmesa->radeon, nr, size );
+ } while (!rv);
+ return rv;
}
-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
#define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
#define FLUSH() RADEON_NEWPRIM( rmesa )
-#define GET_CURRENT_VB_MAX_VERTS() \
- (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
+#define GET_CURRENT_VB_MAX_VERTS() 10\
+// (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4))
#define GET_SUBSEQUENT_VB_MAX_VERTS() \
- ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
-#define ALLOC_VERTS( nr ) \
- radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 )
+ ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4))
+#define ALLOC_VERTS( nr ) radeon_alloc_verts( rmesa, nr, rmesa->radeon.swtcl.vertex_size * 4 )
#define EMIT_VERTS( ctx, j, nr, buf ) \
_tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
@@ -418,16 +399,13 @@ radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
static GLboolean radeon_run_render( GLcontext *ctx,
struct tnl_pipeline_stage *stage )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *VB = &tnl->vb;
tnl_render_func *tab = TAG(render_tab_verts);
GLuint i;
- if (rmesa->swtcl.indexed_verts.buf)
- RELEASE_ELT_VERTS();
-
- if (rmesa->swtcl.RenderIndex != 0 ||
+ if (rmesa->radeon.swtcl.RenderIndex != 0 ||
!radeon_dma_validate_render( ctx, VB ))
return GL_TRUE;
@@ -442,8 +420,8 @@ static GLboolean radeon_run_render( GLcontext *ctx,
if (!length)
continue;
- if (RADEON_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "radeon_render.c: prim %s %d..%d\n",
+ radeon_print(RADEON_SWRENDER, RADEON_NORMAL,
+ "radeon_render.c: prim %s %d..%d\n",
_mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK),
start, start+length);
@@ -496,13 +474,13 @@ static void radeonResetLineStipple( GLcontext *ctx );
#undef LOCAL_VARS
#undef ALLOC_VERTS
-#define CTX_ARG radeonContextPtr rmesa
-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
-#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 )
+#define CTX_ARG r100ContextPtr rmesa
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) radeon_alloc_verts( rmesa, n, (size) * 4 )
#undef LOCAL_VARS
#define LOCAL_VARS \
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \
- const char *radeonverts = (char *)rmesa->swtcl.verts;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx); \
+ const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;
#define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int)))
#define VERTEX radeonVertex
#undef TAG
@@ -560,7 +538,7 @@ static struct {
#define VERT_Y(_v) _v->v.y
#define VERT_Z(_v) _v->v.z
#define AREA_IS_CCW( a ) (a < 0)
-#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int)))
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int)))
#define VERT_SET_RGBA( v, c ) \
do { \
@@ -606,8 +584,8 @@ do { \
#undef INIT
#define LOCAL_VARS(n) \
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \
- GLuint color[n], spec[n]; \
+ r100ContextPtr rmesa = R100_CONTEXT(ctx); \
+ GLuint color[n] = {0}, spec[n] = {0}; \
GLuint coloroffset = rmesa->swtcl.coloroffset; \
GLuint specoffset = rmesa->swtcl.specoffset; \
(void) color; (void) spec; (void) coloroffset; (void) specoffset;
@@ -617,7 +595,7 @@ do { \
***********************************************************************/
#define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
#undef TAG
#define TAG(x) x
#include "tnl_dd/t_dd_unfilled.h"
@@ -673,9 +651,9 @@ static void init_rast_tab( void )
} while (0)
#undef LOCAL_VARS
#define LOCAL_VARS \
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \
- const GLuint vertsize = rmesa->swtcl.vertex_size; \
- const char *radeonverts = (char *)rmesa->swtcl.verts; \
+ r100ContextPtr rmesa = R100_CONTEXT(ctx); \
+ const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \
+ const char *radeonverts = (char *)rmesa->radeon.swtcl.verts; \
const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \
const GLboolean stipple = ctx->Line.StippleFlag; \
(void) elt; (void) stipple;
@@ -700,17 +678,17 @@ static void init_rast_tab( void )
void radeonChooseRenderState( GLcontext *ctx )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint index = 0;
GLuint flags = ctx->_TriangleCaps;
- if (!rmesa->TclFallback || rmesa->Fallback)
+ if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback)
return;
if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
if (flags & DD_TRI_UNFILLED) index |= RADEON_UNFILLED_BIT;
- if (index != rmesa->swtcl.RenderIndex) {
+ if (index != rmesa->radeon.swtcl.RenderIndex) {
tnl->Driver.Render.Points = rast_tab[index].points;
tnl->Driver.Render.Line = rast_tab[index].line;
tnl->Driver.Render.ClippedLine = rast_tab[index].line;
@@ -727,7 +705,7 @@ void radeonChooseRenderState( GLcontext *ctx )
tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
}
- rmesa->swtcl.RenderIndex = index;
+ rmesa->radeon.swtcl.RenderIndex = index;
}
}
@@ -739,18 +717,18 @@ void radeonChooseRenderState( GLcontext *ctx )
static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
- if (rmesa->swtcl.hw_primitive != hwprim) {
+ if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
RADEON_NEWPRIM( rmesa );
- rmesa->swtcl.hw_primitive = hwprim;
+ rmesa->radeon.swtcl.hw_primitive = hwprim;
}
}
static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- rmesa->swtcl.render_primitive = prim;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ rmesa->radeon.swtcl.render_primitive = prim;
if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED))
radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
}
@@ -761,7 +739,7 @@ static void radeonRenderFinish( GLcontext *ctx )
static void radeonResetLineStipple( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
RADEON_STATECHANGE( rmesa, lin );
}
@@ -795,25 +773,25 @@ static const char *getFallbackString(GLuint bit)
void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
TNLcontext *tnl = TNL_CONTEXT(ctx);
- GLuint oldfallback = rmesa->Fallback;
+ GLuint oldfallback = rmesa->radeon.Fallback;
if (mode) {
- rmesa->Fallback |= bit;
+ rmesa->radeon.Fallback |= bit;
if (oldfallback == 0) {
- RADEON_FIREVERTICES( rmesa );
+ radeon_firevertices(&rmesa->radeon);
TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
_swsetup_Wakeup( ctx );
- rmesa->swtcl.RenderIndex = ~0;
- if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+ rmesa->radeon.swtcl.RenderIndex = ~0;
+ if (RADEON_DEBUG & RADEON_FALLBACKS) {
fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
bit, getFallbackString(bit));
}
}
}
else {
- rmesa->Fallback &= ~bit;
+ rmesa->radeon.Fallback &= ~bit;
if (oldfallback == bit) {
_swrast_flush( ctx );
tnl->Driver.Render.Start = radeonRenderStart;
@@ -826,18 +804,18 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
- if (rmesa->TclFallback) {
- /* These are already done if rmesa->TclFallback goes to
+ if (rmesa->radeon.TclFallback) {
+ /* These are already done if rmesa->radeon.TclFallback goes to
* zero above. But not if it doesn't (RADEON_NO_TCL for
* example?)
*/
_tnl_invalidate_vertex_state( ctx, ~0 );
_tnl_invalidate_vertices( ctx, ~0 );
- RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
+ RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
radeonChooseVertexState( ctx );
radeonChooseRenderState( ctx );
}
- if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+ if (RADEON_DEBUG & RADEON_FALLBACKS) {
fprintf(stderr, "Radeon end rasterization fallback: 0x%x %s\n",
bit, getFallbackString(bit));
}
@@ -853,13 +831,14 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
void radeonInitSwtcl( GLcontext *ctx )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
static int firsttime = 1;
if (firsttime) {
init_rast_tab();
firsttime = 0;
}
+ rmesa->radeon.swtcl.emit_prediction = 0;
tnl->Driver.Render.Start = radeonRenderStart;
tnl->Driver.Render.Finish = radeonRenderFinish;
@@ -872,18 +851,9 @@ void radeonInitSwtcl( GLcontext *ctx )
_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
RADEON_MAX_TNL_VERTEX_SIZE);
- rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
- rmesa->swtcl.RenderIndex = ~0;
- rmesa->swtcl.render_primitive = GL_TRIANGLES;
- rmesa->swtcl.hw_primitive = 0;
+ rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+ rmesa->radeon.swtcl.RenderIndex = ~0;
+ rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+ rmesa->radeon.swtcl.hw_primitive = 0;
}
-
-void radeonDestroySwtcl( GLcontext *ctx )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
- if (rmesa->swtcl.indexed_verts.buf)
- radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
- __FUNCTION__ );
-}
diff --git a/radeon/radeon_swtcl.h b/radeon/radeon_swtcl.h
index e485052..da89158 100644
--- a/radeon/radeon_swtcl.h
+++ b/radeon/radeon_swtcl.h
@@ -40,7 +40,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_context.h"
extern void radeonInitSwtcl( GLcontext *ctx );
-extern void radeonDestroySwtcl( GLcontext *ctx );
extern void radeonChooseRenderState( GLcontext *ctx );
extern void radeonChooseVertexState( GLcontext *ctx );
@@ -63,5 +62,5 @@ extern void radeon_translate_vertex( GLcontext *ctx,
extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
-
+extern void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
#endif
diff --git a/radeon/radeon_tcl.c b/radeon/radeon_tcl.c
index 779e9ae..b334ea0 100644
--- a/radeon/radeon_tcl.c
+++ b/radeon/radeon_tcl.c
@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "tnl/tnl.h"
#include "tnl/t_pipeline.h"
+#include "radeon_common.h"
#include "radeon_context.h"
#include "radeon_state.h"
#include "radeon_ioctl.h"
@@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_tcl.h"
#include "radeon_swtcl.h"
#include "radeon_maos.h"
+#include "radeon_common_context.h"
@@ -104,7 +106,7 @@ static GLboolean discrete_prim[0x10] = {
};
-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
#define ELT_TYPE GLushort
#define ELT_INIT(prim, hw_prim) \
@@ -125,7 +127,7 @@ static GLboolean discrete_prim[0x10] = {
#define RESET_STIPPLE() do { \
RADEON_STATECHANGE( rmesa, lin ); \
- radeonEmitState( rmesa ); \
+ radeonEmitState(&rmesa->radeon); \
} while (0)
#define AUTO_STIPPLE( mode ) do { \
@@ -136,31 +138,26 @@ static GLboolean discrete_prim[0x10] = {
else \
rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
~RADEON_LINE_PATTERN_AUTO_RESET; \
- radeonEmitState( rmesa ); \
+ radeonEmitState(&rmesa->radeon); \
} while (0)
#define ALLOC_ELTS(nr) radeonAllocElts( rmesa, nr )
-static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr )
+static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
{
- if (rmesa->dma.flush)
- rmesa->dma.flush( rmesa );
+ if (rmesa->radeon.dma.flush)
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
- rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
+ radeonEmitAOS( rmesa,
+ rmesa->radeon.tcl.aos_count, 0 );
- radeonEmitAOS( rmesa,
- rmesa->tcl.aos_components,
- rmesa->tcl.nr_aos_components, 0 );
-
- return radeonAllocEltsOpenEnded( rmesa,
- rmesa->tcl.vertex_format,
- rmesa->tcl.hw_primitive, nr );
+ return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
+ rmesa->tcl.hw_primitive, nr );
}
-#define CLOSE_ELTS() RADEON_NEWPRIM( rmesa )
+#define CLOSE_ELTS() if (0) RADEON_NEWPRIM( rmesa )
@@ -174,15 +171,11 @@ static void radeonEmitPrim( GLcontext *ctx,
GLuint start,
GLuint count)
{
- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+ r100ContextPtr rmesa = R100_CONTEXT( ctx );
radeonTclPrimitive( ctx, prim, hwprim );
- radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
- rmesa->hw.max_state_size + VBUF_BUFSZ );
-
radeonEmitAOS( rmesa,
- rmesa->tcl.aos_components,
- rmesa->tcl.nr_aos_components,
+ rmesa->radeon.tcl.aos_count,
start );
/* Why couldn't this packet have taken an offset param?
@@ -197,6 +190,8 @@ static void radeonEmitPrim( GLcontext *ctx,
radeonEmitPrim( ctx, prim, hwprim, start, count ); \
(void) rmesa; } while (0)
+#define MAX_CONVERSION_SIZE 40
+
/* Try & join small primitives
*/
#if 0
@@ -254,7 +249,7 @@ void radeonTclPrimitive( GLcontext *ctx,
GLenum prim,
int hw_prim )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint se_cntl;
GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
@@ -361,6 +356,73 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
}
}
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static GLuint radeonEnsureEmitSize( GLcontext * ctx , GLuint inputs )
+{
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint space_required;
+ GLuint state_size;
+ GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
+ int i;
+ /* list of flags that are allocating aos object */
+ const GLuint flags_to_check[] = {
+ VERT_BIT_NORMAL,
+ VERT_BIT_COLOR0,
+ VERT_BIT_COLOR1,
+ VERT_BIT_FOG
+ };
+ /* predict number of aos to emit */
+ for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i)
+ {
+ if (inputs & flags_to_check[i])
+ ++nr_aos;
+ }
+ for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
+ {
+ if (inputs & VERT_BIT_TEX(i))
+ ++nr_aos;
+ }
+
+ {
+ /* count the prediction for state size */
+ space_required = 0;
+ state_size = radeonCountStateEmitSize( &rmesa->radeon );
+ /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
+ if (!rmesa->hw.tcl.dirty)
+ state_size += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl );
+ /* predict size for elements */
+ for (i = 0; i < VB->PrimitiveCount; ++i)
+ {
+ if (!VB->Primitive[i].count)
+ continue;
+ /* If primitive.count is less than MAX_CONVERSION_SIZE
+ rendering code may decide convert to elts.
+ In that case we have to make pessimistic prediction.
+ and use larger of 2 paths. */
+ const GLuint elts = ELTS_BUFSZ(nr_aos);
+ const GLuint index = INDEX_BUFSZ;
+ const GLuint vbuf = VBUF_BUFSZ;
+ if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+ || vbuf > index + elts)
+ space_required += vbuf;
+ else
+ space_required += index + elts;
+ space_required += AOS_BUFSZ(nr_aos);
+ }
+ space_required += SCISSOR_BUFSZ;
+ }
+ /* flush the buffer in case we need more than is left. */
+ if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__))
+ return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+ else
+ return space_required + state_size;
+}
+
/**********************************************************************/
/* Render pipeline stage */
/**********************************************************************/
@@ -371,7 +433,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
static GLboolean radeon_run_tcl_render( GLcontext *ctx,
struct tnl_pipeline_stage *stage )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *VB = &tnl->vb;
GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
@@ -379,7 +441,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
/* TODO: separate this from the swtnl pipeline
*/
- if (rmesa->TclFallback)
+ if (rmesa->radeon.TclFallback)
return GL_TRUE; /* fallback to software t&l */
if (VB->Count == 0)
@@ -411,6 +473,8 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
}
radeonReleaseArrays( ctx, ~0 );
+ GLuint emit_end = radeonEnsureEmitSize( ctx, inputs )
+ + rmesa->radeon.cmdbuf.cs->cdw;
radeonEmitArrays( ctx, inputs );
rmesa->tcl.Elts = VB->Elts;
@@ -430,6 +494,10 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
radeonEmitPrimitive( ctx, start, start+length, prim );
}
+ if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
+
return GL_FALSE; /* finished the pipe */
}
@@ -461,7 +529,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage =
static void transition_to_swtnl( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
TNLcontext *tnl = TNL_CONTEXT(ctx);
GLuint se_cntl;
@@ -490,7 +558,7 @@ static void transition_to_swtnl( GLcontext *ctx )
static void transition_to_hwtnl( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
TNLcontext *tnl = TNL_CONTEXT(ctx);
GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
@@ -509,17 +577,17 @@ static void transition_to_hwtnl( GLcontext *ctx )
tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
- if ( rmesa->dma.flush )
- rmesa->dma.flush( rmesa );
+ if ( rmesa->radeon.dma.flush )
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- rmesa->dma.flush = NULL;
+ rmesa->radeon.dma.flush = NULL;
rmesa->swtcl.vertex_format = 0;
- if (rmesa->swtcl.indexed_verts.buf)
- radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
- __FUNCTION__ );
+ // if (rmesa->swtcl.indexed_verts.buf)
+ // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
+ // __FUNCTION__ );
- if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "Radeon end tcl fallback\n");
}
@@ -550,22 +618,22 @@ static char *getFallbackString(GLuint bit)
void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- GLuint oldfallback = rmesa->TclFallback;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint oldfallback = rmesa->radeon.TclFallback;
if (mode) {
- rmesa->TclFallback |= bit;
+ rmesa->radeon.TclFallback |= bit;
if (oldfallback == 0) {
- if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "Radeon begin tcl fallback %s\n",
getFallbackString( bit ));
transition_to_swtnl( ctx );
}
}
else {
- rmesa->TclFallback &= ~bit;
+ rmesa->radeon.TclFallback &= ~bit;
if (oldfallback == bit) {
- if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "Radeon end tcl fallback %s\n",
getFallbackString( bit ));
transition_to_hwtnl( ctx );
diff --git a/radeon/radeon_tex.c b/radeon/radeon_tex.c
index f2b6deb..99865ff 100644
--- a/radeon/radeon_tex.c
+++ b/radeon/radeon_tex.c
@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/texobj.h"
#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
#include "radeon_state.h"
#include "radeon_ioctl.h"
#include "radeon_swtcl.h"
@@ -170,10 +171,13 @@ static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
{
GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK);
+ /* Force revalidation to account for switches from/to mipmapping. */
+ t->validated = GL_FALSE;
+
t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK);
/* r100 chips can't handle mipmaps/aniso for cubemap/volume textures */
- if ( t->base.tObj->Target == GL_TEXTURE_CUBE_MAP ) {
+ if ( t->base.Target == GL_TEXTURE_CUBE_MAP ) {
switch ( minf ) {
case GL_NEAREST:
case GL_NEAREST_MIPMAP_NEAREST:
@@ -249,437 +253,17 @@ static void radeonSetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] )
t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
}
-
-/**
- * Allocate space for and load the mesa images into the texture memory block.
- * This will happen before drawing with a new texture, or drawing with a
- * texture after it was swapped out or teximaged again.
- */
-
-static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj )
-{
- radeonTexObjPtr t;
-
- t = CALLOC_STRUCT( radeon_tex_obj );
- texObj->DriverData = t;
- if ( t != NULL ) {
- if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, (void *)t );
- }
-
- /* Initialize non-image-dependent parts of the state:
- */
- t->base.tObj = texObj;
- t->border_fallback = GL_FALSE;
-
- t->pp_txfilter = RADEON_BORDER_MODE_OGL;
- t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
- RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
-
- make_empty_list( & t->base );
-
- radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
- radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
- radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
- radeonSetTexBorderColor( t, texObj->BorderColor );
- }
-
- return t;
-}
-
-
-static const struct gl_texture_format *
-radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
- GLenum format, GLenum type )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- const GLboolean do32bpt =
- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
- const GLboolean force16bpt =
- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
- (void) format;
-
- switch ( internalFormat ) {
- case 4:
- case GL_RGBA:
- case GL_COMPRESSED_RGBA:
- switch ( type ) {
- case GL_UNSIGNED_INT_10_10_10_2:
- case GL_UNSIGNED_INT_2_10_10_10_REV:
- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555;
- case GL_UNSIGNED_SHORT_4_4_4_4:
- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
- return _dri_texformat_argb4444;
- case GL_UNSIGNED_SHORT_5_5_5_1:
- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
- return _dri_texformat_argb1555;
- default:
- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb4444;
- }
-
- case 3:
- case GL_RGB:
- case GL_COMPRESSED_RGB:
- switch ( type ) {
- case GL_UNSIGNED_SHORT_4_4_4_4:
- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
- return _dri_texformat_argb4444;
- case GL_UNSIGNED_SHORT_5_5_5_1:
- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
- return _dri_texformat_argb1555;
- case GL_UNSIGNED_SHORT_5_6_5:
- case GL_UNSIGNED_SHORT_5_6_5_REV:
- return _dri_texformat_rgb565;
- default:
- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
- }
-
- case GL_RGBA8:
- case GL_RGB10_A2:
- case GL_RGBA12:
- case GL_RGBA16:
- return !force16bpt ?
- _dri_texformat_argb8888 : _dri_texformat_argb4444;
-
- case GL_RGBA4:
- case GL_RGBA2:
- return _dri_texformat_argb4444;
-
- case GL_RGB5_A1:
- return _dri_texformat_argb1555;
-
- case GL_RGB8:
- case GL_RGB10:
- case GL_RGB12:
- case GL_RGB16:
- return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
-
- case GL_RGB5:
- case GL_RGB4:
- case GL_R3_G3_B2:
- return _dri_texformat_rgb565;
-
- case GL_ALPHA:
- case GL_ALPHA4:
- case GL_ALPHA8:
- case GL_ALPHA12:
- case GL_ALPHA16:
- case GL_COMPRESSED_ALPHA:
- return _dri_texformat_a8;
-
- case 1:
- case GL_LUMINANCE:
- case GL_LUMINANCE4:
- case GL_LUMINANCE8:
- case GL_LUMINANCE12:
- case GL_LUMINANCE16:
- case GL_COMPRESSED_LUMINANCE:
- return _dri_texformat_l8;
-
- case 2:
- case GL_LUMINANCE_ALPHA:
- case GL_LUMINANCE4_ALPHA4:
- case GL_LUMINANCE6_ALPHA2:
- case GL_LUMINANCE8_ALPHA8:
- case GL_LUMINANCE12_ALPHA4:
- case GL_LUMINANCE12_ALPHA12:
- case GL_LUMINANCE16_ALPHA16:
- case GL_COMPRESSED_LUMINANCE_ALPHA:
- return _dri_texformat_al88;
-
- case GL_INTENSITY:
- case GL_INTENSITY4:
- case GL_INTENSITY8:
- case GL_INTENSITY12:
- case GL_INTENSITY16:
- case GL_COMPRESSED_INTENSITY:
- return _dri_texformat_i8;
-
- case GL_YCBCR_MESA:
- if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
- type == GL_UNSIGNED_BYTE)
- return &_mesa_texformat_ycbcr;
- else
- return &_mesa_texformat_ycbcr_rev;
-
- case GL_RGB_S3TC:
- case GL_RGB4_S3TC:
- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
- return &_mesa_texformat_rgb_dxt1;
-
- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
- return &_mesa_texformat_rgba_dxt1;
-
- case GL_RGBA_S3TC:
- case GL_RGBA4_S3TC:
- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
- return &_mesa_texformat_rgba_dxt3;
-
- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
- return &_mesa_texformat_rgba_dxt5;
-
- default:
- _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__);
- return NULL;
- }
-
- return NULL; /* never get here */
-}
-
-
-static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint border,
- GLenum format, GLenum type, const GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
- if ( t ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) radeonAllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
- return;
- }
- }
-
- /* Note, this will call ChooseTextureFormat */
- _mesa_store_teximage1d(ctx, target, level, internalFormat,
- width, border, format, type, pixels,
- &ctx->Unpack, texObj, texImage);
-
- t->dirty_images[0] |= (1 << level);
-}
-
-
-static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
- GLint xoffset,
- GLsizei width,
- GLenum format, GLenum type,
- const GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
- assert( t ); /* this _should_ be true */
- if ( t ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) radeonAllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
- return;
- }
- }
-
- _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
- format, type, pixels, packing, texObj,
- texImage);
-
- t->dirty_images[0] |= (1 << level);
-}
-
-
-static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint border,
- GLenum format, GLenum type, const GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- if ( t != NULL ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) radeonAllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
- return;
- }
- }
-
- /* Note, this will call ChooseTextureFormat */
- _mesa_store_teximage2d(ctx, target, level, internalFormat,
- width, height, border, format, type, pixels,
- &ctx->Unpack, texObj, texImage);
-
- t->dirty_images[face] |= (1 << level);
-}
-
-
-static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
- GLint xoffset, GLint yoffset,
- GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- assert( t ); /* this _should_ be true */
- if ( t ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) radeonAllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
- return;
- }
- }
-
- _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
- height, format, type, pixels, packing, texObj,
- texImage);
-
- t->dirty_images[face] |= (1 << level);
-}
-
-static void radeonCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint border,
- GLsizei imageSize, const GLvoid *data,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- if ( t != NULL ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) radeonAllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
- return;
- }
- }
-
- /* Note, this will call ChooseTextureFormat */
- _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
- height, border, imageSize, data, texObj, texImage);
-
- t->dirty_images[face] |= (1 << level);
-}
-
-
-static void radeonCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
- GLint xoffset, GLint yoffset,
- GLsizei width, GLsizei height,
- GLenum format,
- GLsizei imageSize, const GLvoid *data,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
-{
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
- GLuint face;
-
-
- /* which cube face or ordinary 2D image */
- switch (target) {
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- ASSERT(face < 6);
- break;
- default:
- face = 0;
- }
-
- assert( t ); /* this _should_ be true */
- if ( t ) {
- driSwapOutTextureObject( t );
- }
- else {
- t = (driTextureObject *) radeonAllocTexObj( texObj );
- if (!t) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D");
- return;
- }
- }
-
- _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
- height, format, imageSize, data, texObj, texImage);
-
- t->dirty_images[face] |= (1 << level);
-}
-
#define SCALED_FLOAT_TO_BYTE( x, scale ) \
(((GLuint)((255.0F / scale) * (x))) / 2)
static void radeonTexEnv( GLcontext *ctx, GLenum target,
GLenum pname, const GLfloat *param )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint unit = ctx->Texture.CurrentUnit;
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- if ( RADEON_DEBUG & DEBUG_STATE ) {
+ if ( RADEON_DEBUG & RADEON_STATE ) {
fprintf( stderr, "%s( %s )\n",
__FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
}
@@ -706,7 +290,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
* functions, one mapping [-1.0,0.0] to [-128,0] and one mapping
* [0.0,4.0] to [0,127].
*/
- min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ?
+ min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
0.0 : -1.0;
bias = CLAMP( *param, min, 4.0 );
if ( bias == 0 ) {
@@ -739,12 +323,10 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
struct gl_texture_object *texObj,
GLenum pname, const GLfloat *params )
{
- radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
+ radeonTexObj* t = radeon_tex_obj(texObj);
- if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
- fprintf( stderr, "%s( %s )\n", __FUNCTION__,
+ radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __FUNCTION__,
_mesa_lookup_enum_by_nr( pname ) );
- }
switch ( pname ) {
case GL_TEXTURE_MIN_FILTER:
@@ -767,57 +349,50 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
case GL_TEXTURE_MAX_LEVEL:
case GL_TEXTURE_MIN_LOD:
case GL_TEXTURE_MAX_LOD:
+
/* This isn't the most efficient solution but there doesn't appear to
* be a nice alternative. Since there's no LOD clamping,
* we just have to rely on loading the right subset of mipmap levels
* to simulate a clamped LOD.
*/
- driSwapOutTextureObject( (driTextureObject *) t );
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = 0;
+ t->validated = GL_FALSE;
+ }
break;
default:
return;
}
-
- /* Mark this texobj as dirty (one bit per tex unit)
- */
- t->dirty_state = TEX_ALL;
-}
-
-
-static void radeonBindTexture( GLcontext *ctx, GLenum target,
- struct gl_texture_object *texObj )
-{
- if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
- fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj,
- ctx->Texture.CurrentUnit );
- }
-
- assert( (target != GL_TEXTURE_1D && target != GL_TEXTURE_2D &&
- target != GL_TEXTURE_RECTANGLE_NV && target != GL_TEXTURE_CUBE_MAP) ||
- (texObj->DriverData != NULL) );
}
-
static void radeonDeleteTexture( GLcontext *ctx,
struct gl_texture_object *texObj )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ radeonTexObj* t = radeon_tex_obj(texObj);
+ int i;
- if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
- fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
+ radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+ "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
_mesa_lookup_enum_by_nr( texObj->Target ) );
- }
-
- if ( t != NULL ) {
- if ( rmesa ) {
- RADEON_FIREVERTICES( rmesa );
- }
- driDestroyTextureObject( t );
+ if ( rmesa ) {
+ radeon_firevertices(&rmesa->radeon);
+ for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
+ if ( t == rmesa->state.texture.unit[i].texobj ) {
+ rmesa->state.texture.unit[i].texobj = NULL;
+ rmesa->hw.tex[i].dirty = GL_FALSE;
+ rmesa->hw.cube[i].dirty = GL_FALSE;
+ }
+ }
}
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = 0;
+ }
/* Free mipmap images and the texture object itself */
_mesa_delete_texture_object(ctx, texObj);
}
@@ -837,7 +412,7 @@ static void radeonTexGen( GLcontext *ctx,
GLenum pname,
const GLfloat *params )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint unit = ctx->Texture.CurrentUnit;
rmesa->recheck_texgen[unit] = GL_TRUE;
}
@@ -851,29 +426,40 @@ static void radeonTexGen( GLcontext *ctx,
static struct gl_texture_object *
radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- struct gl_texture_object *obj;
- obj = _mesa_new_texture_object(ctx, name, target);
- if (!obj)
- return NULL;
- obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
- radeonAllocTexObj( obj );
- return obj;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+
+ _mesa_initialize_texture_object(&t->base, name, target);
+ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+
+ t->border_fallback = GL_FALSE;
+
+ t->pp_txfilter = RADEON_BORDER_MODE_OGL;
+ t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+ RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
+
+ radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT );
+ radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
+ radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter );
+ radeonSetTexBorderColor( t, t->base.BorderColor );
+ return &t->base;
}
+
void radeonInitTextureFuncs( struct dd_function_table *functions )
{
- functions->ChooseTextureFormat = radeonChooseTextureFormat;
+ functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
functions->TexImage1D = radeonTexImage1D;
functions->TexImage2D = radeonTexImage2D;
functions->TexSubImage1D = radeonTexSubImage1D;
functions->TexSubImage2D = radeonTexSubImage2D;
+ functions->GetTexImage = radeonGetTexImage;
+ functions->GetCompressedTexImage = radeonGetCompressedTexImage;
functions->NewTextureObject = radeonNewTextureObject;
- functions->BindTexture = radeonBindTexture;
+ // functions->BindTexture = radeonBindTexture;
functions->DeleteTexture = radeonDeleteTexture;
- functions->IsTextureResident = driIsTextureResident;
functions->TexEnv = radeonTexEnv;
functions->TexParameter = radeonTexParameter;
@@ -882,5 +468,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions )
functions->CompressedTexImage2D = radeonCompressedTexImage2D;
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+ functions->GenerateMipmap = radeonGenerateMipmap;
+
+ functions->NewTextureImage = radeonNewTextureImage;
+ functions->FreeTexImageData = radeonFreeTexImageData;
+ functions->MapTexture = radeonMapTexture;
+ functions->UnmapTexture = radeonUnmapTexture;
+
driInitTextureFormats();
}
diff --git a/radeon/radeon_tex.h b/radeon/radeon_tex.h
index 8000880..a4aaddc 100644
--- a/radeon/radeon_tex.h
+++ b/radeon/radeon_tex.h
@@ -41,12 +41,16 @@ extern void radeonSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
unsigned long long offset, GLint depth,
GLuint pitch);
+extern void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv);
+extern void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
+ __DRIdrawable *dPriv);
+
extern void radeonUpdateTextureState( GLcontext *ctx );
-extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t,
+extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t,
GLuint face );
-extern void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t );
+extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t );
extern void radeonInitTextureFuncs( struct dd_function_table *functions );
diff --git a/radeon/radeon_texmem.c b/radeon/radeon_texmem.c
deleted file mode 100644
index 5f7bbe6..0000000
--- a/radeon/radeon_texmem.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/**************************************************************************
-
-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
- VA Linux Systems Inc., Fremont, California.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation on the rights to use, copy, modify, merge, publish,
-distribute, sub license, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Kevin E. Martin <martin@valinux.com>
- * Gareth Hughes <gareth@valinux.com>
- *
- */
-#include <errno.h>
-
-#include "main/glheader.h"
-#include "main/imports.h"
-#include "main/context.h"
-#include "main/macros.h"
-
-#include "radeon_context.h"
-#include "radeon_ioctl.h"
-#include "radeon_tex.h"
-
-#include <unistd.h> /* for usleep() */
-
-
-/**
- * Destroy any device-dependent state associated with the texture. This may
- * include NULLing out hardware state that points to the texture.
- */
-void
-radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
-{
- if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)t, (void *)t->base.tObj );
- }
-
- if ( rmesa != NULL ) {
- unsigned i;
-
-
- for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
- if ( t == rmesa->state.texture.unit[i].texobj ) {
- rmesa->state.texture.unit[i].texobj = NULL;
- }
- }
- }
-}
-
-
-/* ------------------------------------------------------------
- * Texture image conversions
- */
-
-
-static void radeonUploadRectSubImage( radeonContextPtr rmesa,
- radeonTexObjPtr t,
- struct gl_texture_image *texImage,
- GLint x, GLint y,
- GLint width, GLint height )
-{
- const struct gl_texture_format *texFormat = texImage->TexFormat;
- int blit_format, dstPitch, done;
-
- switch ( texFormat->TexelBytes ) {
- case 1:
- blit_format = RADEON_GMC_DST_8BPP_CI;
- break;
- case 2:
- blit_format = RADEON_GMC_DST_16BPP;
- break;
- case 4:
- blit_format = RADEON_GMC_DST_32BPP;
- break;
- default:
- fprintf( stderr, "radeonUploadRectSubImage: unknown blit_format (texelbytes=%d)\n",
- texFormat->TexelBytes);
- return;
- }
-
- t->image[0][0].data = texImage->Data;
-
- /* Currently don't need to cope with small pitches.
- */
- width = texImage->Width;
- height = texImage->Height;
- dstPitch = t->pp_txpitch + 32;
-
- { /* FIXME: prefer GART-texturing if possible */
- /* Data not in GART memory, or bad pitch.
- */
- for (done = 0; done < height ; ) {
- struct radeon_dma_region region;
- int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
- int src_pitch;
- char *tex;
-
- src_pitch = texImage->RowStride * texFormat->TexelBytes;
-
- tex = (char *)texImage->Data + done * src_pitch;
-
- memset(&region, 0, sizeof(region));
- radeonAllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
-
- /* Copy texdata to dma:
- */
- if (0)
- fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
- __FUNCTION__, src_pitch, dstPitch);
-
- if (src_pitch == dstPitch) {
- memcpy( region.address + region.start, tex, lines * src_pitch );
- }
- else {
- char *buf = region.address + region.start;
- int i;
- for (i = 0 ; i < lines ; i++) {
- memcpy( buf, tex, src_pitch );
- buf += dstPitch;
- tex += src_pitch;
- }
- }
-
- radeonEmitWait( rmesa, RADEON_WAIT_3D );
-
-
-
- /* Blit to framebuffer
- */
- radeonEmitBlit( rmesa,
- blit_format,
- dstPitch, GET_START( &region ),
- dstPitch, t->bufAddr,
- 0, 0,
- 0, done,
- width, lines );
-
- radeonEmitWait( rmesa, RADEON_WAIT_2D );
-
- radeonReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
- done += lines;
- }
- }
-}
-
-
-/**
- * Upload the texture image associated with texture \a t at the specified
- * level at the address relative to \a start.
- */
-static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t,
- GLint hwlevel,
- GLint x, GLint y, GLint width, GLint height,
- GLuint face )
-{
- struct gl_texture_image *texImage = NULL;
- GLuint offset;
- GLint imageWidth, imageHeight;
- GLint ret;
- drm_radeon_texture_t tex;
- drm_radeon_tex_image_t tmp;
- const int level = hwlevel + t->base.firstLevel;
-
- if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
- fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
- __FUNCTION__, (void *)t, (void *)t->base.tObj, level, width, height, face );
- }
-
- ASSERT(face < 6);
-
- /* Ensure we have a valid texture to upload */
- if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
- _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
- return;
- }
-
- texImage = t->base.tObj->Image[face][level];
-
- if ( !texImage ) {
- if ( RADEON_DEBUG & DEBUG_TEXTURE )
- fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
- return;
- }
- if ( !texImage->Data ) {
- if ( RADEON_DEBUG & DEBUG_TEXTURE )
- fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
- return;
- }
-
-
- if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- assert(level == 0);
- assert(hwlevel == 0);
- if ( RADEON_DEBUG & DEBUG_TEXTURE )
- fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
- radeonUploadRectSubImage( rmesa, t, texImage, x, y, width, height );
- return;
- }
-
- imageWidth = texImage->Width;
- imageHeight = texImage->Height;
-
- offset = t->bufAddr + t->base.totalSize * face / 6;
-
- if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
- GLint imageX = 0;
- GLint imageY = 0;
- GLint blitX = t->image[face][hwlevel].x;
- GLint blitY = t->image[face][hwlevel].y;
- GLint blitWidth = t->image[face][hwlevel].width;
- GLint blitHeight = t->image[face][hwlevel].height;
- fprintf( stderr, " upload image: %d,%d at %d,%d\n",
- imageWidth, imageHeight, imageX, imageY );
- fprintf( stderr, " upload blit: %d,%d at %d,%d\n",
- blitWidth, blitHeight, blitX, blitY );
- fprintf( stderr, " blit ofs: 0x%07x level: %d/%d\n",
- (GLuint)offset, hwlevel, level );
- }
-
- t->image[face][hwlevel].data = texImage->Data;
-
- /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
- * NOTE: we're always use a 1KB-wide blit and I8 texture format.
- * We used to use 1, 2 and 4-byte texels and used to use the texture
- * width to dictate the blit width - but that won't work for compressed
- * textures. (Brian)
- * NOTE: can't do that with texture tiling. (sroland)
- */
- tex.offset = offset;
- tex.image = &tmp;
- /* copy (x,y,width,height,data) */
- memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
-
- if (texImage->TexFormat->TexelBytes) {
- /* use multi-byte upload scheme */
- tex.height = imageHeight;
- tex.width = imageWidth;
- tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK;
- tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
- tex.offset += tmp.x & ~1023;
- tmp.x = tmp.x % 1024;
- if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
- /* need something like "tiled coordinates" ? */
- tmp.y = tmp.x / (tex.pitch * 128) * 2;
- tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
- tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
- }
- else {
- tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
- }
- if ((t->tile_bits & RADEON_TXO_MACRO_TILE) &&
- (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) {
- /* radeon switches off macro tiling for small textures/mipmaps it seems */
- tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
- }
- }
- else {
- /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
- needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
- /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
- so the kernel module reads the right amount of data. */
- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
- tex.pitch = (BLIT_WIDTH_BYTES / 64);
- tex.height = (imageHeight + 3) / 4;
- tex.width = (imageWidth + 3) / 4;
- switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) {
- case RADEON_TXFORMAT_DXT1:
- tex.width *= 8;
- break;
- case RADEON_TXFORMAT_DXT23:
- case RADEON_TXFORMAT_DXT45:
- tex.width *= 16;
- break;
- }
- }
-
- LOCK_HARDWARE( rmesa );
- do {
- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
- &tex, sizeof(drm_radeon_texture_t) );
- } while ( ret == -EAGAIN );
-
- UNLOCK_HARDWARE( rmesa );
-
- if ( ret ) {
- fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
- fprintf( stderr, " offset=0x%08x\n",
- offset );
- fprintf( stderr, " image width=%d height=%d\n",
- imageWidth, imageHeight );
- fprintf( stderr, " blit width=%d height=%d data=%p\n",
- t->image[face][hwlevel].width, t->image[face][hwlevel].height,
- t->image[face][hwlevel].data );
- exit( 1 );
- }
-}
-
-
-/**
- * Upload the texture images associated with texture \a t. This might
- * require the allocation of texture memory.
- *
- * \param rmesa Context pointer
- * \param t Texture to be uploaded
- * \param face Cube map face to be uploaded. Zero for non-cube maps.
- */
-
-int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint face )
-{
- int numLevels;
-
- if ( !t || t->base.totalSize == 0 || t->image_override )
- return 0;
-
- if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
- fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
- (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize,
- t->base.firstLevel, t->base.lastLevel );
- }
-
- numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-
- if (RADEON_DEBUG & DEBUG_SYNC) {
- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
- radeonFinish( rmesa->glCtx );
- }
-
- LOCK_HARDWARE( rmesa );
-
- if ( t->base.memBlock == NULL ) {
- int heap;
-
- heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
- (driTextureObject *) t );
- if ( heap == -1 ) {
- UNLOCK_HARDWARE( rmesa );
- return -1;
- }
-
- /* Set the base offset of the texture image */
- t->bufAddr = rmesa->radeonScreen->texOffset[heap]
- + t->base.memBlock->ofs;
- t->pp_txoffset = t->bufAddr;
-
- if (!(t->base.tObj->Image[0][0]->IsClientData)) {
- /* hope it's safe to add that here... */
- t->pp_txoffset |= t->tile_bits;
- }
-
- /* Mark this texobj as dirty on all units:
- */
- t->dirty_state = TEX_ALL;
- }
-
-
- /* Let the world know we've used this memory recently.
- */
- driUpdateTextureLRU( (driTextureObject *) t );
- UNLOCK_HARDWARE( rmesa );
-
-
- /* Upload any images that are new */
- if (t->base.dirty_images[face]) {
- int i;
- for ( i = 0 ; i < numLevels ; i++ ) {
- if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
- uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
- t->image[face][i].height, face );
- }
- }
- t->base.dirty_images[face] = 0;
- }
-
- if (RADEON_DEBUG & DEBUG_SYNC) {
- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
- radeonFinish( rmesa->glCtx );
- }
-
- return 0;
-}
diff --git a/radeon/radeon_texstate.c b/radeon/radeon_texstate.c
index b165205..9d252aa 100644
--- a/radeon/radeon_texstate.c
+++ b/radeon/radeon_texstate.c
@@ -39,10 +39,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/context.h"
#include "main/macros.h"
#include "main/texformat.h"
+#include "main/teximage.h"
#include "main/texobj.h"
#include "main/enums.h"
#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
#include "radeon_state.h"
#include "radeon_ioctl.h"
#include "radeon_swtcl.h"
@@ -75,10 +77,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
&& (tx_table[f].format != 0xffffffff) )
-static const struct {
+struct tx_table {
GLuint format, filter;
-}
-tx_table[] =
+};
+
+static const struct tx_table tx_table[] =
{
_ALPHA(RGBA8888),
_ALPHA_REV(RGBA8888),
@@ -111,252 +114,6 @@ tx_table[] =
#undef _ALPHA
#undef _INVALID
-/**
- * This function computes the number of bytes of storage needed for
- * the given texture object (all mipmap levels, all cube faces).
- * The \c image[face][level].x/y/width/height parameters for upload/blitting
- * are computed here. \c pp_txfilter, \c pp_txformat, etc. will be set here
- * too.
- *
- * \param rmesa Context pointer
- * \param tObj GL texture object whose images are to be posted to
- * hardware state.
- */
-static void radeonSetTexImages( radeonContextPtr rmesa,
- struct gl_texture_object *tObj )
-{
- radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
- const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
- GLint curOffset, blitWidth;
- GLint i, texelBytes;
- GLint numLevels;
- GLint log2Width, log2Height, log2Depth;
-
- /* Set the hardware texture format
- */
- if ( !t->image_override ) {
- t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
- RADEON_TXFORMAT_ALPHA_IN_MAP);
- t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
-
- if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
- t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
- t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
- }
- else {
- _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
- return;
- }
- }
-
- texelBytes = baseImage->TexFormat->TexelBytes;
-
- /* Compute which mipmap levels we really want to send to the hardware.
- */
-
- if (tObj->Target != GL_TEXTURE_CUBE_MAP)
- driCalculateTextureFirstLastLevel( (driTextureObject *) t );
- else {
- /* r100 can't handle mipmaps for cube/3d textures, so don't waste
- memory for them */
- t->base.firstLevel = t->base.lastLevel = tObj->BaseLevel;
- }
- log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
- log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
- log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
-
- numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-
- assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
-
- /* Calculate mipmap offsets and dimensions for blitting (uploading)
- * The idea is that we lay out the mipmap levels within a block of
- * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
- */
- curOffset = 0;
- blitWidth = BLIT_WIDTH_BYTES;
- t->tile_bits = 0;
-
- /* figure out if this texture is suitable for tiling. */
- if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
- if (rmesa->texmicrotile && (baseImage->Height > 1)) {
- /* allow 32 (bytes) x 1 mip (which will use two times the space
- the non-tiled version would use) max if base texture is large enough */
- if ((numLevels == 1) ||
- (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
- (baseImage->Width * texelBytes > 64)) ||
- ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
- /* R100 has two microtile bits (only the txoffset reg, not the blitter)
- weird: X2 + OPT: 32bit correct, 16bit completely hosed
- X2: 32bit correct, 16bit correct
- OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
- t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
- }
- }
- if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
- /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
- in the case if height is smaller than 16 (not 100% sure), as does the r200,
- so need to disable macro tiling in that case */
- if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
- t->tile_bits |= RADEON_TXO_MACRO_TILE;
- }
- }
- }
-
- for (i = 0; i < numLevels; i++) {
- const struct gl_texture_image *texImage;
- GLuint size;
-
- texImage = tObj->Image[0][i + t->base.firstLevel];
- if ( !texImage )
- break;
-
- /* find image size in bytes */
- if (texImage->IsCompressed) {
- /* need to calculate the size AFTER padding even though the texture is
- submitted without padding.
- Only handle pot textures currently - don't know if npot is even possible,
- size calculation would certainly need (trivial) adjustments.
- Align (and later pad) to 32byte, not sure what that 64byte blit width is
- good for? */
- if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) {
- /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
- if ((texImage->Width + 3) < 8) /* width one block */
- size = texImage->CompressedSize * 4;
- else if ((texImage->Width + 3) < 16)
- size = texImage->CompressedSize * 2;
- else size = texImage->CompressedSize;
- }
- else /* DXT3/5, 16 bytes per block */
- if ((texImage->Width + 3) < 8)
- size = texImage->CompressedSize * 2;
- else size = texImage->CompressedSize;
- }
- else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
- }
- else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
- though the actual offset may be different (if texture is less than
- 32 bytes width) to the untiled case */
- int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
- size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
- }
- else {
- int w = (texImage->Width * texelBytes + 31) & ~31;
- size = w * texImage->Height * texImage->Depth;
- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
- }
- assert(size > 0);
-
- /* Align to 32-byte offset. It is faster to do this unconditionally
- * (no branch penalty).
- */
-
- curOffset = (curOffset + 0x1f) & ~0x1f;
-
- if (texelBytes) {
- t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
- t->image[0][i].y = 0;
- t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
- t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
- }
- else {
- t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
- t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
- t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
- t->image[0][i].height = size / t->image[0][i].width;
- }
-
-#if 0
- /* for debugging only and only applicable to non-rectangle targets */
- assert(size % t->image[0][i].width == 0);
- assert(t->image[0][i].x == 0
- || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
-#endif
-
- if (0)
- fprintf(stderr,
- "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
- i, texImage->Width, texImage->Height,
- t->image[0][i].x, t->image[0][i].y,
- t->image[0][i].width, t->image[0][i].height, size, curOffset);
-
- curOffset += size;
-
- }
-
- /* Align the total size of texture memory block.
- */
- t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
-
- /* Setup remaining cube face blits, if needed */
- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
- const GLuint faceSize = t->base.totalSize;
- GLuint face;
- /* reuse face 0 x/y/width/height - just update the offset when uploading */
- for (face = 1; face < 6; face++) {
- for (i = 0; i < numLevels; i++) {
- t->image[face][i].x = t->image[0][i].x;
- t->image[face][i].y = t->image[0][i].y;
- t->image[face][i].width = t->image[0][i].width;
- t->image[face][i].height = t->image[0][i].height;
- }
- }
- t->base.totalSize = 6 * faceSize; /* total texmem needed */
- }
-
- /* Hardware state:
- */
- t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
- t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT;
-
- t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
- RADEON_TXFORMAT_HEIGHT_MASK |
- RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
- RADEON_TXFORMAT_F5_WIDTH_MASK |
- RADEON_TXFORMAT_F5_HEIGHT_MASK);
- t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
- (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
-
- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
- assert(log2Width == log2Height);
- t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
- (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
- (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
- t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
- (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
- (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
- (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
- (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
- (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
- (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
- (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
- }
-
- t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
- ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
-
- /* Only need to round to nearest 32 for textures, but the blitter
- * requires 64-byte aligned pitches, and we may/may not need the
- * blitter. NPOT only!
- */
- if ( !t->image_override ) {
- if (baseImage->IsCompressed)
- t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
- else
- t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
- t->pp_txpitch -= 32;
- }
-
- t->dirty_state = TEX_ALL;
-
- /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */
-}
-
-
-
/* ================================================================
* Texture combine functions
*/
@@ -503,7 +260,7 @@ do { \
static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
GLuint color_combine, alpha_combine;
const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
@@ -520,7 +277,7 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
assert( (texUnit->_ReallyEnabled == 0)
|| (texUnit->_Current != NULL) );
- if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+ if ( RADEON_DEBUG & RADEON_TEXTURE ) {
fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit );
}
@@ -846,22 +603,21 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
unsigned long long offset, GLint depth, GLuint pitch)
{
- radeonContextPtr rmesa = pDRICtx->driverPrivate;
+ r100ContextPtr rmesa = pDRICtx->driverPrivate;
struct gl_texture_object *tObj =
- _mesa_lookup_texture(rmesa->glCtx, texname);
- radeonTexObjPtr t;
+ _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+ radeonTexObjPtr t = radeon_tex_obj(tObj);
if (tObj == NULL)
return;
- t = (radeonTexObjPtr) tObj->DriverData;
-
t->image_override = GL_TRUE;
if (!offset)
return;
-
- t->pp_txoffset = offset;
+
+ t->bo = NULL;
+ t->override_offset = offset;
t->pp_txpitch = pitch - 32;
switch (depth) {
@@ -881,6 +637,125 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
}
}
+void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
+ __DRIdrawable *dPriv)
+{
+ struct gl_texture_unit *texUnit;
+ struct gl_texture_object *texObj;
+ struct gl_texture_image *texImage;
+ struct radeon_renderbuffer *rb;
+ radeon_texture_image *rImage;
+ radeonContextPtr radeon;
+ r100ContextPtr rmesa;
+ struct radeon_framebuffer *rfb;
+ radeonTexObjPtr t;
+ uint32_t pitch_val;
+ uint32_t internalFormat, type, format;
+
+ type = GL_BGRA;
+ format = GL_UNSIGNED_BYTE;
+ internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4);
+
+ radeon = pDRICtx->driverPrivate;
+ rmesa = pDRICtx->driverPrivate;
+
+ rfb = dPriv->driverPrivate;
+ texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
+ texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
+ texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
+
+ rImage = get_radeon_texture_image(texImage);
+ t = radeon_tex_obj(texObj);
+ if (t == NULL) {
+ return;
+ }
+
+ radeon_update_renderbuffers(pDRICtx, dPriv);
+ /* back & depth buffer are useless free them right away */
+ rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
+ if (rb && rb->bo) {
+ radeon_bo_unref(rb->bo);
+ rb->bo = NULL;
+ }
+ rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+ if (rb && rb->bo) {
+ radeon_bo_unref(rb->bo);
+ rb->bo = NULL;
+ }
+ rb = rfb->color_rb[0];
+ if (rb->bo == NULL) {
+ /* Failed to BO for the buffer */
+ return;
+ }
+
+ _mesa_lock_texture(radeon->glCtx, texObj);
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+ if (rImage->bo) {
+ radeon_bo_unref(rImage->bo);
+ rImage->bo = NULL;
+ }
+ if (t->mt) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = NULL;
+ }
+ if (rImage->mt) {
+ radeon_miptree_unreference(rImage->mt);
+ rImage->mt = NULL;
+ }
+ _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+ rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
+ texImage->RowStride = rb->pitch / rb->cpp;
+ texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
+ internalFormat,
+ type, format, 0);
+ rImage->bo = rb->bo;
+ radeon_bo_ref(rImage->bo);
+ t->bo = rb->bo;
+ radeon_bo_ref(t->bo);
+ t->tile_bits = 0;
+ t->image_override = GL_TRUE;
+ t->override_offset = 0;
+ t->pp_txpitch &= (1 << 13) -1;
+ pitch_val = rb->pitch;
+ switch (rb->cpp) {
+ case 4:
+ if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT)
+ t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format;
+ else
+ t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format;
+ t->pp_txfilter |= tx_table[MESA_FORMAT_ARGB8888].filter;
+ break;
+ case 3:
+ default:
+ t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format;
+ t->pp_txfilter |= tx_table[MESA_FORMAT_RGB888].filter;
+ break;
+ case 2:
+ t->pp_txformat = tx_table[MESA_FORMAT_RGB565].format;
+ t->pp_txfilter |= tx_table[MESA_FORMAT_RGB565].filter;
+ break;
+ }
+ t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT)
+ | ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT);
+ t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+ t->pp_txpitch = pitch_val;
+ t->pp_txpitch -= 32;
+
+ t->validated = GL_TRUE;
+ _mesa_unlock_texture(radeon->glCtx, texObj);
+ return;
+}
+
+
+void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+ radeonSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
+}
+
+
#define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK | \
RADEON_MIN_FILTER_MASK | \
RADEON_MAG_FILTER_MASK | \
@@ -901,12 +776,53 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
RADEON_TXFORMAT_NON_POWER2)
-static void import_tex_obj_state( radeonContextPtr rmesa,
+static void disable_tex_obj_state( r100ContextPtr rmesa,
+ int unit )
+{
+ RADEON_STATECHANGE( rmesa, tex[unit] );
+
+ RADEON_STATECHANGE( rmesa, tcl );
+ rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
+ RADEON_Q_BIT(unit));
+
+ if (rmesa->radeon.TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
+ TCL_FALLBACK( rmesa->radeon.glCtx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+ rmesa->recheck_texgen[unit] = GL_TRUE;
+ }
+
+ if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
+ /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
+ cubic_map bit on unit 2 when the unit is disabled, otherwise every
+ 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
+ units, better be safe than sorry though).*/
+ RADEON_STATECHANGE( rmesa, tex[unit] );
+ rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
+ }
+
+ {
+ GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+ GLuint tmp = rmesa->TexGenEnabled;
+
+ rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+ rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+ rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+ rmesa->TexGenNeedNormals[unit] = 0;
+ rmesa->TexGenEnabled |=
+ (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+
+ if (tmp != rmesa->TexGenEnabled) {
+ rmesa->recheck_texgen[unit] = GL_TRUE;
+ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+ }
+ }
+}
+
+static void import_tex_obj_state( r100ContextPtr rmesa,
int unit,
radeonTexObjPtr texobj )
{
/* do not use RADEON_DB_STATE to avoid stale texture caches */
- int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+ uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
RADEON_STATECHANGE( rmesa, tex[unit] );
@@ -915,10 +831,9 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
- cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
- if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+ if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
@@ -928,22 +843,12 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
else {
se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit);
- if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
- int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
- GLuint bytesPerFace = texobj->base.totalSize / 6;
- ASSERT(texobj->base.totalSize % 6 == 0);
+ if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
+ uint32_t *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
RADEON_STATECHANGE( rmesa, cube[unit] );
cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
- /* dont know if this setup conforms to OpenGL..
- * at least it matches the behavior of mesa software renderer
- */
- cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */
- cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */
- cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */
- cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */
- cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */
- cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */
+ /* state filled out in the cube_emit */
}
}
@@ -952,13 +857,11 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
}
- texobj->dirty_state &= ~(1<<unit);
+ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
}
-
-
-static void set_texgen_matrix( radeonContextPtr rmesa,
+static void set_texgen_matrix( r100ContextPtr rmesa,
GLuint unit,
const GLfloat *s_plane,
const GLfloat *t_plane,
@@ -986,14 +889,14 @@ static void set_texgen_matrix( radeonContextPtr rmesa,
rmesa->TexGenMatrix[unit].m[15] = q_plane[3];
rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit;
- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
}
/* Returns GL_FALSE if fallback required.
*/
static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
GLuint tmp = rmesa->TexGenEnabled;
@@ -1030,7 +933,7 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
(texUnit->GenS.Mode != texUnit->GenQ.Mode)) ) {
/* Mixed modes, fallback:
*/
- if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "fallback mixed texgen\n");
return GL_FALSE;
}
@@ -1038,7 +941,7 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
}
else {
/* some texgen mode not including both S and T bits */
- if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "fallback mixed texgen/nontexgen\n");
return GL_FALSE;
}
@@ -1088,289 +991,195 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
default:
/* Unsupported mode, fallback:
*/
- if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "fallback GL_SPHERE_MAP\n");
return GL_FALSE;
}
if (tmp != rmesa->TexGenEnabled) {
- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
}
return GL_TRUE;
}
-
-static void disable_tex( GLcontext *ctx, int unit )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
- if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) {
- /* Texture unit disabled */
- if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
- /* The old texture is no longer bound to this texture unit.
- * Mark it as such.
- */
-
- rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
- rmesa->state.texture.unit[unit].texobj = NULL;
- }
-
- RADEON_STATECHANGE( rmesa, ctx );
- rmesa->hw.ctx.cmd[CTX_PP_CNTL] &=
- ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit);
-
- RADEON_STATECHANGE( rmesa, tcl );
- rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
- RADEON_Q_BIT(unit));
-
- if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
- TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
- rmesa->recheck_texgen[unit] = GL_TRUE;
- }
-
- if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
- /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
- cubic_map bit on unit 2 when the unit is disabled, otherwise every
- 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
- units, better be safe than sorry though).*/
- RADEON_STATECHANGE( rmesa, tex[unit] );
- rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
- }
-
- {
- GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
- GLuint tmp = rmesa->TexGenEnabled;
-
- rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
- rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
- rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
- rmesa->TexGenNeedNormals[unit] = 0;
- rmesa->TexGenEnabled |=
- (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
-
- if (tmp != rmesa->TexGenEnabled) {
- rmesa->recheck_texgen[unit] = GL_TRUE;
- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
- }
- }
- }
-}
-
-static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
-
- /* Need to load the 2d images associated with this unit.
- */
- if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
- t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
- t->base.dirty_images[0] = ~0;
- }
-
- ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
-
- if ( t->base.dirty_images[0] ) {
- RADEON_FIREVERTICES( rmesa );
- radeonSetTexImages( rmesa, tObj );
- radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
- if ( !t->base.memBlock && !t->image_override )
- return GL_FALSE;
- }
-
- return GL_TRUE;
-}
-
-static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
- GLuint face;
+ const struct gl_texture_image *firstImage;
+ GLint log2Width, log2Height, log2Depth, texelBytes;
- /* Need to load the 2d images associated with this unit.
- */
- if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
- t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
- for (face = 0; face < 6; face++)
- t->base.dirty_images[face] = ~0;
+ if ( t->bo ) {
+ return GL_TRUE;
}
- ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+ firstImage = t->base.Image[0][t->mt->firstLevel];
- if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
- t->base.dirty_images[2] || t->base.dirty_images[3] ||
- t->base.dirty_images[4] || t->base.dirty_images[5] ) {
- /* flush */
- RADEON_FIREVERTICES( rmesa );
- /* layout memory space, once for all faces */
- radeonSetTexImages( rmesa, tObj );
+ if (firstImage->Border > 0) {
+ fprintf(stderr, "%s: border\n", __FUNCTION__);
+ return GL_FALSE;
}
- /* upload (per face) */
- for (face = 0; face < 6; face++) {
- if (t->base.dirty_images[face]) {
- radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face );
+ log2Width = firstImage->WidthLog2;
+ log2Height = firstImage->HeightLog2;
+ log2Depth = firstImage->DepthLog2;
+ texelBytes = firstImage->TexFormat->TexelBytes;
+
+ if (!t->image_override) {
+ if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
+ const struct tx_table *table = tx_table;
+
+ t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
+ RADEON_TXFORMAT_ALPHA_IN_MAP);
+ t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
+
+ t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
+ t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
+ } else {
+ _mesa_problem(NULL, "unexpected texture format in %s",
+ __FUNCTION__);
+ return GL_FALSE;
}
}
-
- if ( !t->base.memBlock ) {
- /* texmem alloc failed, use s/w fallback */
- return GL_FALSE;
+
+ t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
+ t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << RADEON_MAX_MIP_LEVEL_SHIFT;
+
+ t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
+ RADEON_TXFORMAT_HEIGHT_MASK |
+ RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
+ RADEON_TXFORMAT_F5_WIDTH_MASK |
+ RADEON_TXFORMAT_F5_HEIGHT_MASK);
+ t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
+ (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
+
+ t->tile_bits = 0;
+
+ if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
+ ASSERT(log2Width == log2Height);
+ t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
+ (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
+ /* don't think we need this bit, if it exists at all - fglrx does not set it */
+ (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
+ t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
+ (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
+ (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
+ (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
+ (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
+ (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
+ (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
+ (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
}
- return GL_TRUE;
-}
-
-static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+ t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT)
+ | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT));
- if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) {
- t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
- t->base.dirty_images[0] = ~0;
+ if ( !t->image_override ) {
+ if (firstImage->IsCompressed)
+ t->pp_txpitch = (firstImage->Width + 63) & ~(63);
+ else
+ t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
+ t->pp_txpitch -= 32;
}
- ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
-
- if ( t->base.dirty_images[0] ) {
- RADEON_FIREVERTICES( rmesa );
- radeonSetTexImages( rmesa, tObj );
- radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
- if ( !t->base.memBlock &&
- !t->image_override /* && !rmesa->prefer_gart_client_texturing FIXME */ ) {
- fprintf(stderr, "%s: upload failed\n", __FUNCTION__);
- return GL_FALSE;
- }
+ if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+ t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
}
return GL_TRUE;
}
-
-static GLboolean update_tex_common( GLcontext *ctx, int unit )
+static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *tObj = texUnit->_Current;
- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
- GLenum format;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ radeonTexObj *t = radeon_tex_obj(texObj);
+ int ret;
- /* Fallback if there's a texture border */
- if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
- fprintf(stderr, "%s: border\n", __FUNCTION__);
+ if (!radeon_validate_texture_miptree(ctx, texObj))
return GL_FALSE;
- }
+
+ ret = setup_hardware_state(rmesa, t, unit);
+ if (ret == GL_FALSE)
+ return GL_FALSE;
+
/* yuv conversion only works in first unit */
if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB))
return GL_FALSE;
- /* Update state if this is a different texture object to last
- * time.
- */
- if ( rmesa->state.texture.unit[unit].texobj != t ) {
- if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
- /* The old texture is no longer bound to this texture unit.
- * Mark it as such.
- */
-
- rmesa->state.texture.unit[unit].texobj->base.bound &=
- ~(1UL << unit);
- }
-
- rmesa->state.texture.unit[unit].texobj = t;
- t->base.bound |= (1UL << unit);
- t->dirty_state |= 1<<unit;
- driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
- }
+ RADEON_STATECHANGE( rmesa, ctx );
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=
+ (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+ RADEON_STATECHANGE( rmesa, tcl );
+ rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
- /* Newly enabled?
- */
- if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) {
- RADEON_STATECHANGE( rmesa, ctx );
- rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=
- (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+ rmesa->recheck_texgen[unit] = GL_TRUE;
- RADEON_STATECHANGE( rmesa, tcl );
-
- rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
-
- rmesa->recheck_texgen[unit] = GL_TRUE;
- }
-
- if (t->dirty_state & (1<<unit)) {
- import_tex_obj_state( rmesa, unit, t );
- /* may need to update texture matrix (for texrect adjustments) */
- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
- }
+ import_tex_obj_state( rmesa, unit, t );
if (rmesa->recheck_texgen[unit]) {
GLboolean fallback = !radeon_validate_texgen( ctx, unit );
TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
rmesa->recheck_texgen[unit] = 0;
- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
}
- format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
- if ( rmesa->state.texture.unit[unit].format != format ||
- rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
- rmesa->state.texture.unit[unit].format = format;
- rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
- if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
- return GL_FALSE;
- }
+ if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
+ return GL_FALSE;
}
-
FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
+
+ t->validated = GL_TRUE;
return !t->border_fallback;
}
-
-
static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
{
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
- if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
- return (enable_tex_rect( ctx, unit ) &&
- update_tex_common( ctx, unit ));
- }
- else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
- return (enable_tex_2d( ctx, unit ) &&
- update_tex_common( ctx, unit ));
- }
- else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
- return (enable_tex_cube( ctx, unit ) &&
- update_tex_common( ctx, unit ));
+ if (ctx->Texture.Unit[unit]._ReallyEnabled & TEXTURE_3D_BIT) {
+ rmesa->state.texture.unit[unit].texobj = NULL;
+ return GL_FALSE;
}
- else if ( texUnit->_ReallyEnabled ) {
- return GL_FALSE;
+
+ if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
+ /* disable the unit */
+ disable_tex_obj_state(rmesa, unit);
+ rmesa->state.texture.unit[unit].texobj = NULL;
+ return GL_TRUE;
}
- else {
- disable_tex( ctx, unit );
- return GL_TRUE;
+
+ if (!radeon_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
+ _mesa_warning(ctx,
+ "failed to validate texture for unit %d.\n",
+ unit);
+ rmesa->state.texture.unit[unit].texobj = NULL;
+ return GL_FALSE;
}
+ rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+ return GL_TRUE;
}
void radeonUpdateTextureState( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLboolean ok;
+ /* set the ctx all textures off */
+ RADEON_STATECHANGE( rmesa, ctx );
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((RADEON_TEX_ENABLE_MASK) | (RADEON_TEX_BLEND_ENABLE_MASK));
+
ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
radeonUpdateTextureUnit( ctx, 1 ) &&
radeonUpdateTextureUnit( ctx, 2 ));
FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
- if (rmesa->TclFallback)
+ if (rmesa->radeon.TclFallback)
radeonChooseVertexState( ctx );
}
diff --git a/radeon/radeon_texture.c b/radeon/radeon_texture.c
new file mode 100644
index 0000000..fad3d1c
--- /dev/null
+++ b/radeon/radeon_texture.c
@@ -0,0 +1,1052 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ * Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+ *
+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
+ * initial release of the Radeon 8500 driver under the XFree86 license.
+ * This notice must be preserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/convolve.h"
+#include "main/mipmap.h"
+#include "main/texcompress.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/texgetimage.h"
+
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+
+#include "radeon_common.h"
+
+#include "radeon_mipmap_tree.h"
+
+
+static void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
+ GLuint numrows, GLuint rowsize)
+{
+ assert(rowsize <= dststride);
+ assert(rowsize <= srcstride);
+
+ if (rowsize == srcstride && rowsize == dststride) {
+ memcpy(dst, src, numrows*rowsize);
+ } else {
+ GLuint i;
+ for(i = 0; i < numrows; ++i) {
+ memcpy(dst, src, rowsize);
+ dst += dststride;
+ src += srcstride;
+ }
+ }
+}
+
+/* textures */
+/**
+ * Allocate an empty texture image object.
+ */
+struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx)
+{
+ return CALLOC(sizeof(radeon_texture_image));
+}
+
+/**
+ * Free memory associated with this texture image.
+ */
+void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage)
+{
+ radeon_texture_image* image = get_radeon_texture_image(timage);
+
+ if (image->mt) {
+ radeon_miptree_unreference(image->mt);
+ image->mt = 0;
+ assert(!image->base.Data);
+ } else {
+ _mesa_free_texture_image_data(ctx, timage);
+ }
+ if (image->bo) {
+ radeon_bo_unref(image->bo);
+ image->bo = NULL;
+ }
+ if (timage->Data) {
+ _mesa_free_texmemory(timage->Data);
+ timage->Data = NULL;
+ }
+}
+
+/* Set Data pointer and additional data for mapped texture image */
+static void teximage_set_map_data(radeon_texture_image *image)
+{
+ radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
+
+ image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset;
+ image->base.RowStride = lvl->rowstride / image->mt->bpp;
+}
+
+
+/**
+ * Map a single texture image for glTexImage and friends.
+ */
+void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable)
+{
+ if (image->mt) {
+ assert(!image->base.Data);
+
+ radeon_bo_map(image->mt->bo, write_enable);
+ teximage_set_map_data(image);
+ }
+}
+
+
+void radeon_teximage_unmap(radeon_texture_image *image)
+{
+ if (image->mt) {
+ assert(image->base.Data);
+
+ image->base.Data = 0;
+ radeon_bo_unmap(image->mt->bo);
+ }
+}
+
+static void map_override(GLcontext *ctx, radeonTexObj *t)
+{
+ radeon_texture_image *img = get_radeon_texture_image(t->base.Image[0][0]);
+
+ radeon_bo_map(t->bo, GL_FALSE);
+
+ img->base.Data = t->bo->ptr;
+ _mesa_set_fetch_functions(&img->base, 2);
+}
+
+static void unmap_override(GLcontext *ctx, radeonTexObj *t)
+{
+ radeon_texture_image *img = get_radeon_texture_image(t->base.Image[0][0]);
+
+ radeon_bo_unmap(t->bo);
+
+ img->base.Data = NULL;
+}
+
+/**
+ * Map a validated texture for reading during software rendering.
+ */
+void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
+{
+ radeonTexObj* t = radeon_tex_obj(texObj);
+ int face, level;
+
+ if (!radeon_validate_texture_miptree(ctx, texObj))
+ return;
+
+ /* for r100 3D sw fallbacks don't have mt */
+ if (t->image_override && t->bo)
+ map_override(ctx, t);
+
+ if (!t->mt)
+ return;
+
+ radeon_bo_map(t->mt->bo, GL_FALSE);
+ for(face = 0; face < t->mt->faces; ++face) {
+ for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
+ teximage_set_map_data(get_radeon_texture_image(texObj->Image[face][level]));
+ }
+}
+
+void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
+{
+ radeonTexObj* t = radeon_tex_obj(texObj);
+ int face, level;
+
+ if (t->image_override && t->bo)
+ unmap_override(ctx, t);
+ /* for r100 3D sw fallbacks don't have mt */
+ if (!t->mt)
+ return;
+
+ for(face = 0; face < t->mt->faces; ++face) {
+ for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
+ texObj->Image[face][level]->Data = 0;
+ }
+ radeon_bo_unmap(t->mt->bo);
+}
+
+GLuint radeon_face_for_target(GLenum target)
+{
+ switch (target) {
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * Wraps Mesa's implementation to ensure that the base level image is mapped.
+ *
+ * This relies on internal details of _mesa_generate_mipmap, in particular
+ * the fact that the memory for recreated texture images is always freed.
+ */
+static void radeon_generate_mipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj)
+{
+ radeonTexObj* t = radeon_tex_obj(texObj);
+ GLuint nr_faces = (t->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+ int i, face;
+
+
+ _mesa_generate_mipmap(ctx, target, texObj);
+
+ for (face = 0; face < nr_faces; face++) {
+ for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
+ radeon_texture_image *image;
+
+ image = get_radeon_texture_image(texObj->Image[face][i]);
+
+ if (image == NULL)
+ break;
+
+ image->mtlevel = i;
+ image->mtface = face;
+
+ radeon_miptree_unreference(image->mt);
+ image->mt = NULL;
+ }
+ }
+
+}
+
+void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj)
+{
+ GLuint face = radeon_face_for_target(target);
+ radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[face][texObj->BaseLevel]);
+
+ radeon_teximage_map(baseimage, GL_FALSE);
+ radeon_generate_mipmap(ctx, target, texObj);
+ radeon_teximage_unmap(baseimage);
+}
+
+
+/* try to find a format which will only need a memcopy */
+static const struct gl_texture_format *radeonChoose8888TexFormat(radeonContextPtr rmesa,
+ GLenum srcFormat,
+ GLenum srcType, GLboolean fbo)
+{
+ const GLuint ui = 1;
+ const GLubyte littleEndian = *((const GLubyte *)&ui);
+
+ /* r100 can only do this */
+ if (IS_R100_CLASS(rmesa->radeonScreen) || fbo)
+ return _dri_texformat_argb8888;
+
+ if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+ (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+ (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+ (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
+ return &_mesa_texformat_rgba8888;
+ } else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+ (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+ (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+ (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
+ return &_mesa_texformat_rgba8888_rev;
+ } else if (IS_R200_CLASS(rmesa->radeonScreen)) {
+ return _dri_texformat_argb8888;
+ } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+ srcType == GL_UNSIGNED_INT_8_8_8_8)) {
+ return &_mesa_texformat_argb8888_rev;
+ } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+ srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
+ return &_mesa_texformat_argb8888;
+ } else
+ return _dri_texformat_argb8888;
+}
+
+const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx,
+ GLint internalFormat,
+ GLenum format,
+ GLenum type)
+{
+ return radeonChooseTextureFormat(ctx, internalFormat, format,
+ type, 0);
+}
+
+const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
+ GLint internalFormat,
+ GLenum format,
+ GLenum type, GLboolean fbo)
+{
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ const GLboolean do32bpt =
+ (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
+ const GLboolean force16bpt =
+ (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
+ (void)format;
+
+#if 0
+ fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n",
+ _mesa_lookup_enum_by_nr(internalFormat), internalFormat,
+ _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+ fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt);
+#endif
+
+ switch (internalFormat) {
+ case 4:
+ case GL_RGBA:
+ case GL_COMPRESSED_RGBA:
+ switch (type) {
+ case GL_UNSIGNED_INT_10_10_10_2:
+ case GL_UNSIGNED_INT_2_10_10_10_REV:
+ return do32bpt ? _dri_texformat_argb8888 :
+ _dri_texformat_argb1555;
+ case GL_UNSIGNED_SHORT_4_4_4_4:
+ case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+ return _dri_texformat_argb4444;
+ case GL_UNSIGNED_SHORT_5_5_5_1:
+ case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+ return _dri_texformat_argb1555;
+ default:
+ return do32bpt ? radeonChoose8888TexFormat(rmesa, format, type, fbo) :
+ _dri_texformat_argb4444;
+ }
+
+ case 3:
+ case GL_RGB:
+ case GL_COMPRESSED_RGB:
+ switch (type) {
+ case GL_UNSIGNED_SHORT_4_4_4_4:
+ case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+ return _dri_texformat_argb4444;
+ case GL_UNSIGNED_SHORT_5_5_5_1:
+ case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+ return _dri_texformat_argb1555;
+ case GL_UNSIGNED_SHORT_5_6_5:
+ case GL_UNSIGNED_SHORT_5_6_5_REV:
+ return _dri_texformat_rgb565;
+ default:
+ return do32bpt ? _dri_texformat_argb8888 :
+ _dri_texformat_rgb565;
+ }
+
+ case GL_RGBA8:
+ case GL_RGB10_A2:
+ case GL_RGBA12:
+ case GL_RGBA16:
+ return !force16bpt ?
+ radeonChoose8888TexFormat(rmesa, format, type, fbo) :
+ _dri_texformat_argb4444;
+
+ case GL_RGBA4:
+ case GL_RGBA2:
+ return _dri_texformat_argb4444;
+
+ case GL_RGB5_A1:
+ return _dri_texformat_argb1555;
+
+ case GL_RGB8:
+ case GL_RGB10:
+ case GL_RGB12:
+ case GL_RGB16:
+ return !force16bpt ? _dri_texformat_argb8888 :
+ _dri_texformat_rgb565;
+
+ case GL_RGB5:
+ case GL_RGB4:
+ case GL_R3_G3_B2:
+ return _dri_texformat_rgb565;
+
+ case GL_ALPHA:
+ case GL_ALPHA4:
+ case GL_ALPHA8:
+ case GL_ALPHA12:
+ case GL_ALPHA16:
+ case GL_COMPRESSED_ALPHA:
+ /* r200: can't use a8 format since interpreting hw I8 as a8 would result
+ in wrong rgb values (same as alpha value instead of 0). */
+ if (IS_R200_CLASS(rmesa->radeonScreen))
+ return _dri_texformat_al88;
+ else
+ return _dri_texformat_a8;
+ case 1:
+ case GL_LUMINANCE:
+ case GL_LUMINANCE4:
+ case GL_LUMINANCE8:
+ case GL_LUMINANCE12:
+ case GL_LUMINANCE16:
+ case GL_COMPRESSED_LUMINANCE:
+ return _dri_texformat_l8;
+
+ case 2:
+ case GL_LUMINANCE_ALPHA:
+ case GL_LUMINANCE4_ALPHA4:
+ case GL_LUMINANCE6_ALPHA2:
+ case GL_LUMINANCE8_ALPHA8:
+ case GL_LUMINANCE12_ALPHA4:
+ case GL_LUMINANCE12_ALPHA12:
+ case GL_LUMINANCE16_ALPHA16:
+ case GL_COMPRESSED_LUMINANCE_ALPHA:
+ return _dri_texformat_al88;
+
+ case GL_INTENSITY:
+ case GL_INTENSITY4:
+ case GL_INTENSITY8:
+ case GL_INTENSITY12:
+ case GL_INTENSITY16:
+ case GL_COMPRESSED_INTENSITY:
+ return _dri_texformat_i8;
+
+ case GL_YCBCR_MESA:
+ if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+ type == GL_UNSIGNED_BYTE)
+ return &_mesa_texformat_ycbcr;
+ else
+ return &_mesa_texformat_ycbcr_rev;
+
+ case GL_RGB_S3TC:
+ case GL_RGB4_S3TC:
+ case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+ return &_mesa_texformat_rgb_dxt1;
+
+ case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+ return &_mesa_texformat_rgba_dxt1;
+
+ case GL_RGBA_S3TC:
+ case GL_RGBA4_S3TC:
+ case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+ return &_mesa_texformat_rgba_dxt3;
+
+ case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+ return &_mesa_texformat_rgba_dxt5;
+
+ case GL_ALPHA16F_ARB:
+ return &_mesa_texformat_alpha_float16;
+ case GL_ALPHA32F_ARB:
+ return &_mesa_texformat_alpha_float32;
+ case GL_LUMINANCE16F_ARB:
+ return &_mesa_texformat_luminance_float16;
+ case GL_LUMINANCE32F_ARB:
+ return &_mesa_texformat_luminance_float32;
+ case GL_LUMINANCE_ALPHA16F_ARB:
+ return &_mesa_texformat_luminance_alpha_float16;
+ case GL_LUMINANCE_ALPHA32F_ARB:
+ return &_mesa_texformat_luminance_alpha_float32;
+ case GL_INTENSITY16F_ARB:
+ return &_mesa_texformat_intensity_float16;
+ case GL_INTENSITY32F_ARB:
+ return &_mesa_texformat_intensity_float32;
+ case GL_RGB16F_ARB:
+ return &_mesa_texformat_rgba_float16;
+ case GL_RGB32F_ARB:
+ return &_mesa_texformat_rgba_float32;
+ case GL_RGBA16F_ARB:
+ return &_mesa_texformat_rgba_float16;
+ case GL_RGBA32F_ARB:
+ return &_mesa_texformat_rgba_float32;
+
+ case GL_DEPTH_COMPONENT:
+ case GL_DEPTH_COMPONENT16:
+ case GL_DEPTH_COMPONENT24:
+ case GL_DEPTH_COMPONENT32:
+ case GL_DEPTH_STENCIL_EXT:
+ case GL_DEPTH24_STENCIL8_EXT:
+ return &_mesa_texformat_s8_z24;
+
+ /* EXT_texture_sRGB */
+ case GL_SRGB:
+ case GL_SRGB8:
+ case GL_SRGB_ALPHA:
+ case GL_SRGB8_ALPHA8:
+ case GL_COMPRESSED_SRGB:
+ case GL_COMPRESSED_SRGB_ALPHA:
+ return &_mesa_texformat_srgba8;
+
+ case GL_SLUMINANCE:
+ case GL_SLUMINANCE8:
+ case GL_COMPRESSED_SLUMINANCE:
+ return &_mesa_texformat_sl8;
+
+ case GL_SLUMINANCE_ALPHA:
+ case GL_SLUMINANCE8_ALPHA8:
+ case GL_COMPRESSED_SLUMINANCE_ALPHA:
+ return &_mesa_texformat_sla8;
+
+ default:
+ _mesa_problem(ctx,
+ "unexpected internalFormat 0x%x in %s",
+ (int)internalFormat, __func__);
+ return NULL;
+ }
+
+ return NULL; /* never get here */
+}
+
+/**
+ * All glTexImage calls go through this function.
+ */
+static void radeon_teximage(
+ GLcontext *ctx, int dims,
+ GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint height, GLint depth,
+ GLsizei imageSize,
+ GLenum format, GLenum type, const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage,
+ int compressed)
+{
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ radeonTexObj* t = radeon_tex_obj(texObj);
+ radeon_texture_image* image = get_radeon_texture_image(texImage);
+ GLuint dstRowStride;
+ GLint postConvWidth = width;
+ GLint postConvHeight = height;
+ GLuint texelBytes;
+ GLuint face = radeon_face_for_target(target);
+
+ radeon_firevertices(rmesa);
+
+ t->validated = GL_FALSE;
+
+ if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
+ _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
+ &postConvHeight);
+ }
+
+ /* Choose and fill in the texture format for this image */
+ texImage->TexFormat = radeonChooseTextureFormat(ctx, internalFormat, format, type, 0);
+ _mesa_set_fetch_functions(texImage, dims);
+
+ if (texImage->TexFormat->TexelBytes == 0) {
+ texelBytes = 0;
+ texImage->IsCompressed = GL_TRUE;
+ texImage->CompressedSize =
+ ctx->Driver.CompressedTextureSize(ctx, texImage->Width,
+ texImage->Height, texImage->Depth,
+ texImage->TexFormat->MesaFormat);
+ } else {
+ texImage->IsCompressed = GL_FALSE;
+ texImage->CompressedSize = 0;
+
+ texelBytes = texImage->TexFormat->TexelBytes;
+ /* Minimum pitch of 32 bytes */
+ if (postConvWidth * texelBytes < 32) {
+ postConvWidth = 32 / texelBytes;
+ texImage->RowStride = postConvWidth;
+ }
+ if (!image->mt) {
+ assert(texImage->RowStride == postConvWidth);
+ }
+ }
+
+ /* Allocate memory for image */
+ radeonFreeTexImageData(ctx, texImage); /* Mesa core only clears texImage->Data but not image->mt */
+
+ if (t->mt &&
+ t->mt->firstLevel == level &&
+ t->mt->lastLevel == level &&
+ t->mt->target != GL_TEXTURE_CUBE_MAP_ARB &&
+ !radeon_miptree_matches_image(t->mt, texImage, face, level)) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = NULL;
+ }
+
+ if (!t->mt)
+ radeon_try_alloc_miptree(rmesa, t, image, face, level);
+ if (t->mt && radeon_miptree_matches_image(t->mt, texImage, face, level)) {
+ radeon_mipmap_level *lvl;
+ image->mt = t->mt;
+ image->mtlevel = level - t->mt->firstLevel;
+ image->mtface = face;
+ radeon_miptree_reference(t->mt);
+ lvl = &image->mt->levels[image->mtlevel];
+ dstRowStride = lvl->rowstride;
+ } else {
+ int size;
+ if (texImage->IsCompressed) {
+ size = texImage->CompressedSize;
+ } else {
+ size = texImage->Width * texImage->Height * texImage->Depth * texImage->TexFormat->TexelBytes;
+ }
+ texImage->Data = _mesa_alloc_texmemory(size);
+ }
+
+ /* Upload texture image; note that the spec allows pixels to be NULL */
+ if (compressed) {
+ pixels = _mesa_validate_pbo_compressed_teximage(
+ ctx, imageSize, pixels, packing, "glCompressedTexImage");
+ } else {
+ pixels = _mesa_validate_pbo_teximage(
+ ctx, dims, width, height, depth,
+ format, type, pixels, packing, "glTexImage");
+ }
+
+ if (pixels) {
+ radeon_teximage_map(image, GL_TRUE);
+ if (compressed) {
+ if (image->mt) {
+ uint32_t srcRowStride, bytesPerRow, rows;
+ srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
+ bytesPerRow = srcRowStride;
+ rows = (height + 3) / 4;
+ copy_rows(texImage->Data, image->mt->levels[level].rowstride,
+ pixels, srcRowStride, rows, bytesPerRow);
+ } else {
+ memcpy(texImage->Data, pixels, imageSize);
+ }
+ } else {
+ GLuint dstRowStride;
+ GLuint *dstImageOffsets;
+
+ if (image->mt) {
+ radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
+ dstRowStride = lvl->rowstride;
+ } else {
+ dstRowStride = texImage->Width * texImage->TexFormat->TexelBytes;
+ }
+
+ if (dims == 3) {
+ int i;
+
+ dstImageOffsets = _mesa_malloc(depth * sizeof(GLuint)) ;
+ if (!dstImageOffsets)
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+
+ for (i = 0; i < depth; ++i) {
+ dstImageOffsets[i] = dstRowStride/texImage->TexFormat->TexelBytes * height * i;
+ }
+ } else {
+ dstImageOffsets = texImage->ImageOffsets;
+ }
+
+ if (!texImage->TexFormat->StoreImage(ctx, dims,
+ texImage->_BaseFormat,
+ texImage->TexFormat,
+ texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */
+ dstRowStride,
+ dstImageOffsets,
+ width, height, depth,
+ format, type, pixels, packing))
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+
+ if (dims == 3)
+ _mesa_free(dstImageOffsets);
+ }
+
+ /* SGIS_generate_mipmap */
+ if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+ radeon_generate_mipmap(ctx, target, texObj);
+ }
+ }
+
+ _mesa_unmap_teximage_pbo(ctx, packing);
+
+ if (pixels)
+ radeon_teximage_unmap(image);
+
+
+}
+
+void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint border,
+ GLenum format, GLenum type, const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ radeon_teximage(ctx, 1, target, level, internalFormat, width, 1, 1,
+ 0, format, type, pixels, packing, texObj, texImage, 0);
+}
+
+void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint height, GLint border,
+ GLenum format, GLenum type, const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+
+{
+ radeon_teximage(ctx, 2, target, level, internalFormat, width, height, 1,
+ 0, format, type, pixels, packing, texObj, texImage, 0);
+}
+
+void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target,
+ GLint level, GLint internalFormat,
+ GLint width, GLint height, GLint border,
+ GLsizei imageSize, const GLvoid * data,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ radeon_teximage(ctx, 2, target, level, internalFormat, width, height, 1,
+ imageSize, 0, 0, data, &ctx->Unpack, texObj, texImage, 1);
+}
+
+void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint height, GLint depth,
+ GLint border,
+ GLenum format, GLenum type, const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ radeon_teximage(ctx, 3, target, level, internalFormat, width, height, depth,
+ 0, format, type, pixels, packing, texObj, texImage, 0);
+}
+
+/**
+ * Update a subregion of the given texture image.
+ */
+static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLsizei imageSize,
+ GLenum format, GLenum type,
+ const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage,
+ int compressed)
+{
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ radeonTexObj* t = radeon_tex_obj(texObj);
+ radeon_texture_image* image = get_radeon_texture_image(texImage);
+
+ radeon_firevertices(rmesa);
+
+ t->validated = GL_FALSE;
+ if (compressed) {
+ pixels = _mesa_validate_pbo_compressed_teximage(
+ ctx, imageSize, pixels, packing, "glCompressedTexImage");
+ } else {
+ pixels = _mesa_validate_pbo_teximage(ctx, dims,
+ width, height, depth, format, type, pixels, packing, "glTexSubImage1D");
+ }
+
+ if (pixels) {
+ GLint dstRowStride;
+ radeon_teximage_map(image, GL_TRUE);
+
+ if (image->mt) {
+ radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
+ dstRowStride = lvl->rowstride;
+ } else {
+ dstRowStride = texImage->RowStride * texImage->TexFormat->TexelBytes;
+ }
+
+ if (compressed) {
+ uint32_t srcRowStride, bytesPerRow, rows;
+ GLubyte *img_start;
+ if (!image->mt) {
+ dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width);
+ img_start = _mesa_compressed_image_address(xoffset, yoffset, 0,
+ texImage->TexFormat->MesaFormat,
+ texImage->Width, texImage->Data);
+ }
+ else {
+ uint32_t blocks_x = dstRowStride / (image->mt->bpp * 4);
+ img_start = texImage->Data + image->mt->bpp * 4 * (blocks_x * (yoffset / 4) + xoffset / 4);
+ }
+ srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
+ bytesPerRow = srcRowStride;
+ rows = (height + 3) / 4;
+
+ copy_rows(img_start, dstRowStride, pixels, srcRowStride, rows, bytesPerRow);
+
+ } else {
+ if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
+ texImage->TexFormat, texImage->Data,
+ xoffset, yoffset, zoffset,
+ dstRowStride,
+ texImage->ImageOffsets,
+ width, height, depth,
+ format, type, pixels, packing))
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
+ }
+
+ /* GL_SGIS_generate_mipmap */
+ if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+ radeon_generate_mipmap(ctx, target, texObj);
+ }
+ }
+
+ radeon_teximage_unmap(image);
+
+ _mesa_unmap_teximage_pbo(ctx, packing);
+
+
+}
+
+void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+ GLint xoffset,
+ GLsizei width,
+ GLenum format, GLenum type,
+ const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ radeon_texsubimage(ctx, 1, target, level, xoffset, 0, 0, width, 1, 1, 0,
+ format, type, pixels, packing, texObj, texImage, 0);
+}
+
+void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset,
+ GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ radeon_texsubimage(ctx, 2, target, level, xoffset, yoffset, 0, width, height, 1,
+ 0, format, type, pixels, packing, texObj, texImage,
+ 0);
+}
+
+void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+ GLint level, GLint xoffset,
+ GLint yoffset, GLsizei width,
+ GLsizei height, GLenum format,
+ GLsizei imageSize, const GLvoid * data,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ radeon_texsubimage(ctx, 2, target, level, xoffset, yoffset, 0, width, height, 1,
+ imageSize, format, 0, data, &ctx->Unpack, texObj, texImage, 1);
+}
+
+
+void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type,
+ const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ radeon_texsubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, width, height, depth, 0,
+ format, type, pixels, packing, texObj, texImage, 0);
+}
+
+
+
+/**
+ * Ensure that the given image is stored in the given miptree from now on.
+ */
+static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_texture_image *image, int face, int level)
+{
+ radeon_mipmap_level *dstlvl = &mt->levels[level - mt->firstLevel];
+ unsigned char *dest;
+
+ assert(image->mt != mt);
+ assert(dstlvl->width == image->base.Width);
+ assert(dstlvl->height == image->base.Height);
+ assert(dstlvl->depth == image->base.Depth);
+
+
+ radeon_bo_map(mt->bo, GL_TRUE);
+ dest = mt->bo->ptr + dstlvl->faces[face].offset;
+
+ if (image->mt) {
+ /* Format etc. should match, so we really just need a memcpy().
+ * In fact, that memcpy() could be done by the hardware in many
+ * cases, provided that we have a proper memory manager.
+ */
+ radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel-image->mt->firstLevel];
+
+ assert(srclvl->size == dstlvl->size);
+ assert(srclvl->rowstride == dstlvl->rowstride);
+
+ radeon_bo_map(image->mt->bo, GL_FALSE);
+
+ memcpy(dest,
+ image->mt->bo->ptr + srclvl->faces[face].offset,
+ dstlvl->size);
+ radeon_bo_unmap(image->mt->bo);
+
+ radeon_miptree_unreference(image->mt);
+ } else {
+ uint32_t srcrowstride;
+ uint32_t height;
+ /* need to confirm this value is correct */
+ if (mt->compressed) {
+ height = (image->base.Height + 3) / 4;
+ srcrowstride = _mesa_compressed_row_stride(image->base.TexFormat->MesaFormat, image->base.Width);
+ } else {
+ height = image->base.Height * image->base.Depth;
+ srcrowstride = image->base.Width * image->base.TexFormat->TexelBytes;
+ }
+
+// if (mt->tilebits)
+// WARN_ONCE("%s: tiling not supported yet", __FUNCTION__);
+
+ copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride,
+ height, srcrowstride);
+
+ _mesa_free_texmemory(image->base.Data);
+ image->base.Data = 0;
+ }
+
+ radeon_bo_unmap(mt->bo);
+
+ image->mt = mt;
+ image->mtface = face;
+ image->mtlevel = level;
+ radeon_miptree_reference(image->mt);
+}
+
+int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ radeonTexObj *t = radeon_tex_obj(texObj);
+ radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[0][texObj->BaseLevel]);
+ int face, level;
+
+ if (t->validated || t->image_override)
+ return GL_TRUE;
+
+ if (RADEON_DEBUG & RADEON_TEXTURE)
+ fprintf(stderr, "%s: Validating texture %p now\n", __FUNCTION__, texObj);
+
+ if (baseimage->base.Border > 0)
+ return GL_FALSE;
+
+ /* Ensure a matching miptree exists.
+ *
+ * Differing mipmap trees can result when the app uses TexImage to
+ * change texture dimensions.
+ *
+ * Prefer to use base image's miptree if it
+ * exists, since that most likely contains more valid data (remember
+ * that the base level is usually significantly larger than the rest
+ * of the miptree, so cubemaps are the only possible exception).
+ */
+ if (baseimage->mt &&
+ baseimage->mt != t->mt &&
+ radeon_miptree_matches_texture(baseimage->mt, &t->base)) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = baseimage->mt;
+ radeon_miptree_reference(t->mt);
+ } else if (t->mt && !radeon_miptree_matches_texture(t->mt, &t->base)) {
+ radeon_miptree_unreference(t->mt);
+ t->mt = 0;
+ }
+
+ if (!t->mt) {
+ if (RADEON_DEBUG & RADEON_TEXTURE)
+ fprintf(stderr, " Allocate new miptree\n");
+ radeon_try_alloc_miptree(rmesa, t, baseimage, 0, texObj->BaseLevel);
+ if (!t->mt) {
+ _mesa_problem(ctx, "radeon_validate_texture failed to alloc miptree");
+ return GL_FALSE;
+ }
+ }
+
+ /* Ensure all images are stored in the single main miptree */
+ for(face = 0; face < t->mt->faces; ++face) {
+ for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) {
+ radeon_texture_image *image = get_radeon_texture_image(texObj->Image[face][level]);
+ if (RADEON_DEBUG & RADEON_TEXTURE)
+ fprintf(stderr, " face %i, level %i... %p vs %p ", face, level, t->mt, image->mt);
+ if (t->mt == image->mt) {
+ if (RADEON_DEBUG & RADEON_TEXTURE)
+ fprintf(stderr, "OK\n");
+
+ continue;
+ }
+
+ if (RADEON_DEBUG & RADEON_TEXTURE)
+ fprintf(stderr, "migrating\n");
+ migrate_image_to_miptree(t->mt, image, face, level);
+ }
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Need to map texture image into memory before copying image data,
+ * then unmap it.
+ */
+static void
+radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
+ GLenum format, GLenum type, GLvoid * pixels,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage, int compressed)
+{
+ radeon_texture_image *image = get_radeon_texture_image(texImage);
+
+ if (image->mt) {
+ /* Map the texture image read-only */
+ radeon_teximage_map(image, GL_FALSE);
+ } else {
+ /* Image hasn't been uploaded to a miptree yet */
+ assert(image->base.Data);
+ }
+
+ if (compressed) {
+ /* FIXME: this can't work for small textures (mips) which
+ use different hw stride */
+ _mesa_get_compressed_teximage(ctx, target, level, pixels,
+ texObj, texImage);
+ } else {
+ _mesa_get_teximage(ctx, target, level, format, type, pixels,
+ texObj, texImage);
+ }
+
+ if (image->mt) {
+ radeon_teximage_unmap(image);
+ }
+}
+
+void
+radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level,
+ GLenum format, GLenum type, GLvoid * pixels,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ radeon_get_tex_image(ctx, target, level, format, type, pixels,
+ texObj, texImage, 0);
+}
+
+void
+radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
+ GLvoid *pixels,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ radeon_get_tex_image(ctx, target, level, 0, 0, pixels,
+ texObj, texImage, 1);
+}
diff --git a/radeon/radeon_texture.h b/radeon/radeon_texture.h
new file mode 100644
index 0000000..888a55b
--- /dev/null
+++ b/radeon/radeon_texture.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ * Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+ *
+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
+ * initial release of the Radeon 8500 driver under the XFree86 license.
+ * This notice must be preserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_TEXTURE_H
+#define RADEON_TEXTURE_H
+struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx);
+void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage);
+
+void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable);
+void radeon_teximage_unmap(radeon_texture_image *image);
+void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
+void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
+void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj);
+int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj);
+GLuint radeon_face_for_target(GLenum target);
+const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx,
+ GLint internalFormat,
+ GLenum format,
+ GLenum type);
+const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
+ GLint internalFormat,
+ GLenum format,
+ GLenum type, GLboolean fbo);
+
+void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint border,
+ GLenum format, GLenum type, const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage);
+void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint height, GLint border,
+ GLenum format, GLenum type, const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage);
+void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target,
+ GLint level, GLint internalFormat,
+ GLint width, GLint height, GLint border,
+ GLsizei imageSize, const GLvoid * data,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage);
+void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint height, GLint depth,
+ GLint border,
+ GLenum format, GLenum type, const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage);
+void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+ GLint xoffset,
+ GLsizei width,
+ GLenum format, GLenum type,
+ const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage);
+void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset,
+ GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage);
+void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+ GLint level, GLint xoffset,
+ GLint yoffset, GLsizei width,
+ GLsizei height, GLenum format,
+ GLsizei imageSize, const GLvoid * data,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage);
+
+void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type,
+ const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage);
+
+void radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level,
+ GLenum format, GLenum type, GLvoid * pixels,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage);
+void radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
+ GLvoid *pixels,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage);
+
+#endif
diff --git a/radeon/server/radeon_reg.h b/radeon/server/radeon_reg.h
index ae2ccdf..e81d7fd 100644
--- a/radeon/server/radeon_reg.h
+++ b/radeon/server/radeon_reg.h
@@ -1601,6 +1601,8 @@
# define RADEON_STENCIL_VALUE_MASK (0xff << 16)
# define RADEON_STENCIL_WRITEMASK_SHIFT 24
# define RADEON_STENCIL_WRITE_MASK (0xff << 24)
+#define RADEON_RB3D_ZPASS_DATA 0x3290
+#define RADEON_RB3D_ZPASS_ADDR 0x3294
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
# define RADEON_DEPTH_FORMAT_MASK (0xf << 0)
# define RADEON_DEPTH_FORMAT_16BIT_INT_Z (0 << 0)
@@ -1661,6 +1663,9 @@
# define RADEON_FORCE_Z_DIRTY (1 << 29)
# define RADEON_Z_WRITE_ENABLE (1 << 30)
# define RADEON_Z_DECOMPRESSION_ENABLE (1 << 31)
+
+#define RADEON_RE_STIPPLE_ADDR 0x1cc8
+#define RADEON_RE_STIPPLE_DATA 0x1ccc
#define RADEON_RE_LINE_PATTERN 0x1cd0
# define RADEON_LINE_PATTERN_MASK 0x0000ffff
# define RADEON_LINE_REPEAT_COUNT_SHIFT 16
@@ -2031,6 +2036,9 @@
#define RADEON_CP_PACKET3_3D_DRAW_INDX 0xC0002A00
#define RADEON_CP_PACKET3_LOAD_PALETTE 0xC0002C00
#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00
+#define R200_CP_CMD_3D_DRAW_VBUF_2 0xC0003400
+#define R200_CP_CMD_3D_DRAW_IMMD_2 0xC0003500
+#define R200_CP_CMD_3D_DRAW_INDX_2 0xC0003600
#define RADEON_CP_PACKET3_CNTL_PAINT 0xC0009100
#define RADEON_CP_PACKET3_CNTL_BITBLT 0xC0009200
#define RADEON_CP_PACKET3_CNTL_SMALLTEXT 0xC0009300