summaryrefslogtreecommitdiff
path: root/r300
diff options
context:
space:
mode:
Diffstat (limited to 'r300')
-rwxr-xr-xr300/Lindent2
-rw-r--r--r300/Makefile.am30
-rw-r--r--r300/r300_cmdbuf.c590
-rw-r--r--r300/r300_cmdbuf.h116
-rw-r--r--r300/r300_context.c532
-rw-r--r--r300/r300_context.h916
-rw-r--r--r300/r300_emit.c627
-rw-r--r--r300/r300_emit.h238
-rw-r--r--r300/r300_fragprog.c2472
-rw-r--r--r300/r300_fragprog.h104
-rw-r--r--r300/r300_ioctl.c719
-rw-r--r--r300/r300_ioctl.h59
-rw-r--r--r300/r300_mem.c385
-rw-r--r--r300/r300_mem.h37
-rw-r--r--r300/r300_program.h150
-rw-r--r--r300/r300_reg.h1635
-rw-r--r--r300/r300_render.c536
-rw-r--r--r300/r300_shader.c73
-rw-r--r--r300/r300_state.c2375
-rw-r--r--r300/r300_state.h70
-rw-r--r--r300/r300_tex.c1166
-rw-r--r--r300/r300_tex.h51
-rw-r--r--r300/r300_texmem.c584
-rw-r--r--r300/r300_texstate.c620
-rw-r--r--r300/r300_vertprog.c1305
-rw-r--r--r300/r300_vertprog.h89
-rw-r--r--r300/radeon_context.c327
-rw-r--r--r300/radeon_context.h246
-rw-r--r--r300/radeon_ioctl.c394
-rw-r--r--r300/radeon_ioctl.h57
-rw-r--r--r300/radeon_lock.c137
-rw-r--r--r300/radeon_lock.h118
-rw-r--r--r300/radeon_span.c321
-rw-r--r--r300/radeon_state.c243
-rw-r--r--r300/radeon_state.h43
35 files changed, 17367 insertions, 0 deletions
diff --git a/r300/Lindent b/r300/Lindent
new file mode 100755
index 0000000..7d8d889
--- /dev/null
+++ b/r300/Lindent
@@ -0,0 +1,2 @@
+#!/bin/sh
+indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs "$@"
diff --git a/r300/Makefile.am b/r300/Makefile.am
new file mode 100644
index 0000000..0992115
--- /dev/null
+++ b/r300/Makefile.am
@@ -0,0 +1,30 @@
+AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
+
+R300_CFLAGS = -DCOMPILE_R300 -DR200_MERGED=0 -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300
+R300_CFLAGS += -I../radeon -I../radeon/server
+
+r300_dri_la_LTLIBRARIES = r300_dri.la
+r300_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(R300_CFLAGS)
+r300_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \
+ $(DRM_LIBS) $(DRI_LIBS)
+r300_dri_ladir = @libdir@/dri
+r300_dri_la_SOURCES = \
+ ../radeon/radeon_screen.c \
+ radeon_context.c \
+ radeon_ioctl.c \
+ radeon_lock.c \
+ radeon_span.c \
+ radeon_state.c \
+ r300_mem.c \
+ r300_context.c \
+ r300_ioctl.c \
+ r300_cmdbuf.c \
+ r300_state.c \
+ r300_render.c \
+ r300_texmem.c \
+ r300_tex.c \
+ r300_texstate.c \
+ r300_vertprog.c \
+ r300_fragprog.c \
+ r300_shader.c \
+ r300_emit.c
diff --git a/r300/r300_cmdbuf.c b/r300/r300_cmdbuf.c
new file mode 100644
index 0000000..3befa58
--- /dev/null
+++ b/r300/r300_cmdbuf.c
@@ -0,0 +1,590 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "glheader.h"
+#include "state.h"
+#include "imports.h"
+#include "macros.h"
+#include "context.h"
+#include "swrast/swrast.h"
+#include "simple_list.h"
+
+#include "drm.h"
+#include "radeon_drm.h"
+
+#include "radeon_ioctl.h"
+#include "r300_context.h"
+#include "r300_ioctl.h"
+#include "radeon_reg.h"
+#include "r300_reg.h"
+#include "r300_cmdbuf.h"
+#include "r300_emit.h"
+#include "r300_state.h"
+
+// Set this to 1 for extremely verbose debugging of command buffers
+#define DEBUG_CMDBUF 0
+
+/**
+ * Send the current command buffer via ioctl to the hardware.
+ */
+int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
+{
+ int ret;
+ int i;
+ drm_radeon_cmd_buffer_t cmd;
+ int start;
+
+ if (r300->radeon.lost_context) {
+ start = 0;
+ r300->radeon.lost_context = GL_FALSE;
+ } else
+ start = r300->cmdbuf.count_reemit;
+
+ if (RADEON_DEBUG & DEBUG_IOCTL) {
+ fprintf(stderr, "%s from %s - %i cliprects\n",
+ __FUNCTION__, caller, r300->radeon.numClipRects);
+
+ if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
+ for (i = start; i < r300->cmdbuf.count_used; ++i)
+ fprintf(stderr, "%d: %08x\n", i,
+ r300->cmdbuf.cmd_buf[i]);
+ }
+
+ cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start);
+ cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
+
+ if (r300->radeon.state.scissor.enabled) {
+ cmd.nbox = r300->radeon.state.scissor.numClipRects;
+ cmd.boxes =
+ (drm_clip_rect_t *) r300->radeon.state.scissor.pClipRects;
+ } else {
+ cmd.nbox = r300->radeon.numClipRects;
+ cmd.boxes = (drm_clip_rect_t *) r300->radeon.pClipRects;
+ }
+
+ ret = drmCommandWrite(r300->radeon.dri.fd,
+ DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
+
+ if (RADEON_DEBUG & DEBUG_SYNC) {
+ fprintf(stderr, "Syncing in %s (from %s)\n\n",
+ __FUNCTION__, caller);
+ radeonWaitForIdleLocked(&r300->radeon);
+ }
+
+ r300->dma.nr_released_bufs = 0;
+ r300->cmdbuf.count_used = 0;
+ r300->cmdbuf.count_reemit = 0;
+
+ return ret;
+}
+
+int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
+{
+ int ret;
+
+ LOCK_HARDWARE(&r300->radeon);
+
+ ret = r300FlushCmdBufLocked(r300, caller);
+
+ UNLOCK_HARDWARE(&r300->radeon);
+
+ if (ret) {
+ fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret);
+ _mesa_exit(ret);
+ }
+
+ return ret;
+}
+
+static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state)
+{
+ int i;
+ int dwords = (*state->check) (r300, state);
+
+ fprintf(stderr, " emit %s/%d/%d\n", state->name, dwords,
+ state->cmd_size);
+
+ if (RADEON_DEBUG & DEBUG_VERBOSE)
+ for (i = 0; i < dwords; i++)
+ fprintf(stderr, " %s[%d]: %08X\n",
+ state->name, i, state->cmd[i]);
+}
+
+/**
+ * Emit all atoms with a dirty field equal to dirty.
+ *
+ * The caller must have ensured that there is enough space in the command
+ * buffer.
+ */
+static __inline__ void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
+{
+ struct r300_state_atom *atom;
+ uint32_t *dest;
+
+ dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
+
+ if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
+ foreach(atom, &r300->hw.atomlist) {
+ if ((atom->dirty || r300->hw.all_dirty) == dirty) {
+ int dwords = (*atom->check) (r300, atom);
+
+ if (dwords)
+ r300PrintStateAtom(r300, atom);
+ else
+ fprintf(stderr,
+ " skip state %s\n",
+ atom->name);
+ }
+ }
+ }
+
+ /* Emit WAIT */
+ *dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+ dest++;
+ r300->cmdbuf.count_used++;
+
+ /* Emit cache flush */
+ *dest = cmdpacket0(R300_TX_CNTL, 1);
+ dest++;
+ r300->cmdbuf.count_used++;
+
+ *dest = R300_TX_FLUSH;
+ dest++;
+ r300->cmdbuf.count_used++;
+
+ /* Emit END3D */
+ *dest = cmdpacify();
+ dest++;
+ r300->cmdbuf.count_used++;
+
+ /* Emit actual atoms */
+
+ foreach(atom, &r300->hw.atomlist) {
+ if ((atom->dirty || r300->hw.all_dirty) == dirty) {
+ int dwords = (*atom->check) (r300, atom);
+
+ if (dwords) {
+ memcpy(dest, atom->cmd, dwords * 4);
+ dest += dwords;
+ r300->cmdbuf.count_used += dwords;
+ atom->dirty = GL_FALSE;
+ }
+ }
+ }
+}
+
+/**
+ * Copy dirty hardware state atoms into the command buffer.
+ *
+ * We also copy out clean state if we're at the start of a buffer. That makes
+ * it easy to recover from lost contexts.
+ */
+void r300EmitState(r300ContextPtr r300)
+{
+ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS))
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (r300->cmdbuf.count_used && !r300->hw.is_dirty
+ && !r300->hw.all_dirty)
+ return;
+
+ /* To avoid going across the entire set of states multiple times, just check
+ * for enough space for the case of emitting all state, and inline the
+ * r300AllocCmdBuf code here without all the checks.
+ */
+ r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__);
+
+ if (!r300->cmdbuf.count_used) {
+ if (RADEON_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "Begin reemit state\n");
+
+ r300EmitAtoms(r300, GL_FALSE);
+ r300->cmdbuf.count_reemit = r300->cmdbuf.count_used;
+ }
+
+ if (RADEON_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "Begin dirty state\n");
+
+ r300EmitAtoms(r300, GL_TRUE);
+
+ assert(r300->cmdbuf.count_used < r300->cmdbuf.size);
+
+ r300->hw.is_dirty = GL_FALSE;
+ r300->hw.all_dirty = GL_FALSE;
+}
+
+#define CHECK( NM, COUNT ) \
+static int check_##NM( r300ContextPtr r300, \
+ struct r300_state_atom* atom ) \
+{ \
+ (void) atom; (void) r300; \
+ return (COUNT); \
+}
+
+#define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count)
+#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
+
+CHECK(always, atom->cmd_size)
+ CHECK(variable, packet0_count(atom->cmd) ? (1 + packet0_count(atom->cmd)) : 0)
+ CHECK(vpu, vpu_count(atom->cmd) ? (1 + vpu_count(atom->cmd) * 4) : 0)
+#undef packet0_count
+#undef vpu_count
+#define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \
+ do { \
+ r300->hw.ATOM.cmd_size = (SZ); \
+ r300->hw.ATOM.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t)); \
+ r300->hw.ATOM.name = #ATOM; \
+ r300->hw.ATOM.idx = (IDX); \
+ r300->hw.ATOM.check = check_##CHK; \
+ r300->hw.ATOM.dirty = GL_FALSE; \
+ r300->hw.max_state_size += (SZ); \
+ insert_at_tail(&r300->hw.atomlist, &r300->hw.ATOM); \
+ } while (0)
+/**
+ * Allocate memory for the command buffer and initialize the state atom
+ * list. Note that the initial hardware state is set by r300InitState().
+ */
+void r300InitCmdBuf(r300ContextPtr r300)
+{
+ int size, mtu;
+ int has_tcl = 1;
+
+ if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ has_tcl = 0;
+
+ r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */
+
+ mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
+ if (RADEON_DEBUG & DEBUG_TEXTURE) {
+ fprintf(stderr, "Using %d maximum texture units..\n", mtu);
+ }
+
+ /* Setup the atom linked list */
+ make_empty_list(&r300->hw.atomlist);
+ r300->hw.atomlist.name = "atom-list";
+
+ /* Initialize state atoms */
+ ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0);
+ r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6);
+ ALLOC_STATE(vap_cntl, always, 2, 0);
+ r300->hw.vap_cntl.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1);
+ ALLOC_STATE(vte, always, 3, 0);
+ r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2);
+ ALLOC_STATE(unk2134, always, 3, 0);
+ r300->hw.unk2134.cmd[0] = cmdpacket0(0x2134, 2);
+ ALLOC_STATE(vap_cntl_status, always, 2, 0);
+ r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1);
+ ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0);
+ r300->hw.vir[0].cmd[R300_VIR_CMD_0] =
+ cmdpacket0(R300_VAP_INPUT_ROUTE_0_0, 1);
+ ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1);
+ r300->hw.vir[1].cmd[R300_VIR_CMD_0] =
+ cmdpacket0(R300_VAP_INPUT_ROUTE_1_0, 1);
+ ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0);
+ r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_INPUT_CNTL_0, 2);
+ ALLOC_STATE(unk21DC, always, 2, 0);
+ r300->hw.unk21DC.cmd[0] = cmdpacket0(0x21DC, 1);
+ ALLOC_STATE(vap_clip_cntl, always, 2, 0);
+ r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1);
+ ALLOC_STATE(unk2220, always, 5, 0);
+ r300->hw.unk2220.cmd[0] = cmdpacket0(0x2220, 4);
+ ALLOC_STATE(unk2288, always, 2, 0);
+ r300->hw.unk2288.cmd[0] = cmdpacket0(R300_VAP_UNKNOWN_2288, 1);
+ ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0);
+ r300->hw.vof.cmd[R300_VOF_CMD_0] =
+ cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2);
+
+ if (has_tcl) {
+ ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0);
+ r300->hw.pvs.cmd[R300_PVS_CMD_0] =
+ cmdpacket0(R300_VAP_PVS_CNTL_1, 3);
+ }
+
+ ALLOC_STATE(gb_enable, always, 2, 0);
+ r300->hw.gb_enable.cmd[0] = cmdpacket0(R300_GB_ENABLE, 1);
+ ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
+ r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5);
+ ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
+ r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1);
+ ALLOC_STATE(unk4200, always, 5, 0);
+ r300->hw.unk4200.cmd[0] = cmdpacket0(0x4200, 4);
+ ALLOC_STATE(unk4214, always, 2, 0);
+ r300->hw.unk4214.cmd[0] = cmdpacket0(0x4214, 1);
+ ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0);
+ r300->hw.ps.cmd[0] = cmdpacket0(R300_RE_POINTSIZE, 1);
+ ALLOC_STATE(unk4230, always, 4, 0);
+ r300->hw.unk4230.cmd[0] = cmdpacket0(0x4230, 3);
+ ALLOC_STATE(lcntl, always, 2, 0);
+ r300->hw.lcntl.cmd[0] = cmdpacket0(R300_RE_LINE_CNT, 1);
+ ALLOC_STATE(unk4260, always, 4, 0);
+ r300->hw.unk4260.cmd[0] = cmdpacket0(0x4260, 3);
+ ALLOC_STATE(shade, always, 5, 0);
+ r300->hw.shade.cmd[0] = cmdpacket0(R300_RE_SHADE, 4);
+ ALLOC_STATE(polygon_mode, always, 4, 0);
+ r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3);
+ ALLOC_STATE(fogp, always, 3, 0);
+ r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2);
+ ALLOC_STATE(zbias_cntl, always, 2, 0);
+ r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_RE_ZBIAS_CNTL, 1);
+ ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0);
+ r300->hw.zbs.cmd[R300_ZBS_CMD_0] =
+ cmdpacket0(R300_RE_ZBIAS_T_FACTOR, 4);
+ ALLOC_STATE(occlusion_cntl, always, 2, 0);
+ r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1);
+ ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0);
+ r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_RE_CULL_CNTL, 1);
+ ALLOC_STATE(unk42C0, always, 3, 0);
+ r300->hw.unk42C0.cmd[0] = cmdpacket0(0x42C0, 2);
+ ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0);
+ r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_CNTL_0, 2);
+ ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0);
+ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_INTERP_0, 8);
+ ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, 1);
+ ALLOC_STATE(unk43A4, always, 3, 0);
+ r300->hw.unk43A4.cmd[0] = cmdpacket0(0x43A4, 2);
+ ALLOC_STATE(unk43E8, always, 2, 0);
+ r300->hw.unk43E8.cmd[0] = cmdpacket0(0x43E8, 1);
+ ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
+ r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3);
+ r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4);
+ ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
+ r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0);
+ ALLOC_STATE(unk46A4, always, 6, 0);
+ r300->hw.unk46A4.cmd[0] = cmdpacket0(0x46A4, 5);
+ ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
+ r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1);
+ ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
+ r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1);
+ ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
+ r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1);
+ ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
+ r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1);
+ ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0);
+ r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_RE_FOG_STATE, 1);
+ ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0);
+ r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FOG_COLOR_R, 3);
+ ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0);
+ r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_PP_ALPHA_TEST, 2);
+ ALLOC_STATE(unk4BD8, always, 2, 0);
+ r300->hw.unk4BD8.cmd[0] = cmdpacket0(0x4BD8, 1);
+ ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
+ r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0);
+ ALLOC_STATE(unk4E00, always, 2, 0);
+ r300->hw.unk4E00.cmd[0] = cmdpacket0(0x4E00, 1);
+ ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0);
+ r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2);
+ ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0);
+ r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(R300_RB3D_COLORMASK, 1);
+ ALLOC_STATE(blend_color, always, 4, 0);
+ r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 3);
+ ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
+ r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
+ r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
+ ALLOC_STATE(unk4E50, always, 10, 0);
+ r300->hw.unk4E50.cmd[0] = cmdpacket0(0x4E50, 9);
+ ALLOC_STATE(unk4E88, always, 2, 0);
+ r300->hw.unk4E88.cmd[0] = cmdpacket0(0x4E88, 1);
+ ALLOC_STATE(unk4EA0, always, 3, 0);
+ r300->hw.unk4EA0.cmd[0] = cmdpacket0(0x4EA0, 2);
+ ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
+ r300->hw.zs.cmd[R300_ZS_CMD_0] =
+ cmdpacket0(R300_RB3D_ZSTENCIL_CNTL_0, 3);
+ ALLOC_STATE(zstencil_format, always, 5, 0);
+ r300->hw.zstencil_format.cmd[0] =
+ cmdpacket0(R300_RB3D_ZSTENCIL_FORMAT, 4);
+ ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
+ r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_RB3D_DEPTHOFFSET, 2);
+ ALLOC_STATE(unk4F28, always, 2, 0);
+ r300->hw.unk4F28.cmd[0] = cmdpacket0(0x4F28, 1);
+ ALLOC_STATE(unk4F30, always, 3, 0);
+ r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2);
+ ALLOC_STATE(unk4F44, always, 2, 0);
+ r300->hw.unk4F44.cmd[0] = cmdpacket0(0x4F44, 1);
+ ALLOC_STATE(unk4F54, always, 2, 0);
+ r300->hw.unk4F54.cmd[0] = cmdpacket0(0x4F54, 1);
+
+ /* VPU only on TCL */
+ if (has_tcl) {
+ int i;
+ ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
+ r300->hw.vpi.cmd[R300_VPI_CMD_0] =
+ cmdvpu(R300_PVS_UPLOAD_PROGRAM, 0);
+
+ ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+ r300->hw.vpp.cmd[R300_VPP_CMD_0] =
+ cmdvpu(R300_PVS_UPLOAD_PARAMETERS, 0);
+
+ ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+ r300->hw.vps.cmd[R300_VPS_CMD_0] =
+ cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1);
+
+ for (i = 0; i < 6; i++) {
+ ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+ r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
+ cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1);
+ }
+ }
+
+ /* Textures */
+ ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
+ r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_FILTER_0, 0);
+
+ ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
+ r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_FILTER1_0, 0);
+
+ ALLOC_STATE(tex.size, variable, mtu + 1, 0);
+ r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0);
+
+ ALLOC_STATE(tex.format, variable, mtu + 1, 0);
+ r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_FORMAT_0, 0);
+
+ ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
+ r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_PITCH_0, 0);
+
+ ALLOC_STATE(tex.offset, variable, mtu + 1, 0);
+ r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_OFFSET_0, 0);
+
+ ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
+ r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_CHROMA_KEY_0, 0);
+
+ ALLOC_STATE(tex.border_color, variable, mtu + 1, 0);
+ r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_BORDER_COLOR_0, 0);
+
+ r300->hw.is_dirty = GL_TRUE;
+ r300->hw.all_dirty = GL_TRUE;
+
+ /* Initialize command buffer */
+ size =
+ 256 * driQueryOptioni(&r300->radeon.optionCache,
+ "command_buffer_size");
+ if (size < 2 * r300->hw.max_state_size) {
+ size = 2 * r300->hw.max_state_size + 65535;
+ }
+ if (size > 64 * 256)
+ size = 64 * 256;
+
+ if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) {
+ fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n",
+ sizeof(drm_r300_cmd_header_t));
+ fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n",
+ sizeof(drm_radeon_cmd_buffer_t));
+ fprintf(stderr,
+ "Allocating %d bytes command buffer (max state is %d bytes)\n",
+ size * 4, r300->hw.max_state_size * 4);
+ }
+
+ r300->cmdbuf.size = size;
+ r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4);
+ r300->cmdbuf.count_used = 0;
+ r300->cmdbuf.count_reemit = 0;
+}
+
+/**
+ * Destroy the command buffer and state atoms.
+ */
+void r300DestroyCmdBuf(r300ContextPtr r300)
+{
+ struct r300_state_atom *atom;
+
+ FREE(r300->cmdbuf.cmd_buf);
+
+ foreach(atom, &r300->hw.atomlist) {
+ FREE(atom->cmd);
+ }
+}
+
+void r300EmitBlit(r300ContextPtr rmesa,
+ GLuint color_fmt,
+ GLuint src_pitch,
+ GLuint src_offset,
+ GLuint dst_pitch,
+ GLuint dst_offset,
+ GLint srcx, GLint srcy,
+ GLint dstx, GLint dsty, GLuint w, GLuint h)
+{
+ drm_r300_cmd_header_t *cmd;
+
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr,
+ "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+ __FUNCTION__, src_pitch, src_offset, srcx, srcy,
+ dst_pitch, dst_offset, dstx, dsty, w, h);
+
+ assert((src_pitch & 63) == 0);
+ assert((dst_pitch & 63) == 0);
+ assert((src_offset & 1023) == 0);
+ assert((dst_offset & 1023) == 0);
+ assert(w < (1 << 16));
+ assert(h < (1 << 16));
+
+ cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__);
+
+ cmd[0].header.cmd_type = R300_CMD_PACKET3;
+ cmd[0].header.pad0 = R300_CMD_PACKET3_RAW;
+ cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16);
+ cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+ RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+ RADEON_GMC_BRUSH_NONE |
+ (color_fmt << 8) |
+ RADEON_GMC_SRC_DATATYPE_COLOR |
+ RADEON_ROP3_S |
+ RADEON_DP_SRC_SOURCE_MEMORY |
+ RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
+
+ cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10);
+ cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
+ cmd[5].u = (srcx << 16) | srcy;
+ cmd[6].u = (dstx << 16) | dsty; /* dst */
+ cmd[7].u = (w << 16) | h;
+}
+
+void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
+{
+ drm_r300_cmd_header_t *cmd;
+
+ assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D)));
+
+ cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+ cmd[0].u = 0;
+ cmd[0].wait.cmd_type = R300_CMD_WAIT;
+ cmd[0].wait.flags = flags;
+}
diff --git a/r300/r300_cmdbuf.h b/r300/r300_cmdbuf.h
new file mode 100644
index 0000000..bfb2eda
--- /dev/null
+++ b/r300/r300_cmdbuf.h
@@ -0,0 +1,116 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R300_CMDBUF_H__
+#define __R300_CMDBUF_H__
+
+#include "r300_context.h"
+
+extern int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller);
+extern int r300FlushCmdBuf(r300ContextPtr r300, const char *caller);
+
+extern void r300EmitState(r300ContextPtr r300);
+
+extern void r300InitCmdBuf(r300ContextPtr r300);
+extern void r300DestroyCmdBuf(r300ContextPtr r300);
+
+/**
+ * Make sure that enough space is available in the command buffer
+ * by flushing if necessary.
+ *
+ * \param dwords The number of dwords we need to be free on the command buffer
+ */
+static __inline__ void r300EnsureCmdBufSpace(r300ContextPtr r300,
+ int dwords, const char *caller)
+{
+ assert(dwords < r300->cmdbuf.size);
+
+ if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size)
+ r300FlushCmdBuf(r300, caller);
+}
+
+/**
+ * Allocate the given number of dwords in the command buffer and return
+ * a pointer to the allocated area.
+ * When necessary, these functions cause a flush. r300AllocCmdBuf() also
+ * causes state reemission after a flush. This is necessary to ensure
+ * correct hardware state after an unlock.
+ */
+static __inline__ uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
+ int dwords, const char *caller)
+{
+ uint32_t *ptr;
+
+ r300EnsureCmdBufSpace(r300, dwords, caller);
+
+ ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
+ r300->cmdbuf.count_used += dwords;
+ return ptr;
+}
+
+static __inline__ uint32_t *r300AllocCmdBuf(r300ContextPtr r300,
+ int dwords, const char *caller)
+{
+ uint32_t *ptr;
+
+ r300EnsureCmdBufSpace(r300, dwords, caller);
+
+ if (!r300->cmdbuf.count_used) {
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr,
+ "Reemit state after flush (from %s)\n", caller);
+ r300EmitState(r300);
+ }
+
+ ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
+ r300->cmdbuf.count_used += dwords;
+ return ptr;
+}
+
+extern void r300EmitBlit(r300ContextPtr rmesa,
+ GLuint color_fmt,
+ GLuint src_pitch,
+ GLuint src_offset,
+ GLuint dst_pitch,
+ GLuint dst_offset,
+ GLint srcx, GLint srcy,
+ GLint dstx, GLint dsty, GLuint w, GLuint h);
+
+extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags);
+extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start);
+extern void r300EmitVertexShader(r300ContextPtr rmesa);
+extern void r300EmitPixelShader(r300ContextPtr rmesa);
+
+#endif /* __R300_CMDBUF_H__ */
diff --git a/r300/r300_context.c b/r300/r300_context.c
new file mode 100644
index 0000000..9ea14ab
--- /dev/null
+++ b/r300/r300_context.c
@@ -0,0 +1,532 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "glheader.h"
+#include "api_arrayelt.h"
+#include "context.h"
+#include "simple_list.h"
+#include "imports.h"
+#include "matrix.h"
+#include "extensions.h"
+#include "state.h"
+#include "bufferobj.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vp_build.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "radeon_ioctl.h"
+#include "radeon_span.h"
+#include "r300_context.h"
+#include "r300_cmdbuf.h"
+#include "r300_state.h"
+#include "r300_ioctl.h"
+#include "r300_tex.h"
+#include "r300_emit.h"
+
+#ifdef USER_BUFFERS
+#include "r300_mem.h"
+#endif
+
+#include "vblank.h"
+#include "utils.h"
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+
+/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */
+int future_hw_tcl_on = 1;
+int hw_tcl_on = 1;
+
+#define need_GL_EXT_stencil_two_side
+#define need_GL_ARB_multisample
+#define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
+#define need_GL_ARB_vertex_program
+#define need_GL_EXT_blend_minmax
+//#define need_GL_EXT_fog_coord
+#define need_GL_EXT_secondary_color
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_gpu_program_parameters
+#define need_GL_NV_vertex_program
+#include "extension_helper.h"
+
+const struct dri_extension card_extensions[] = {
+ /* *INDENT-OFF* */
+ {"GL_ARB_multisample", GL_ARB_multisample_functions},
+ {"GL_ARB_multitexture", NULL},
+ {"GL_ARB_texture_border_clamp", NULL},
+ {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions},
+ {"GL_ARB_texture_cube_map", NULL},
+ {"GL_ARB_texture_env_add", NULL},
+ {"GL_ARB_texture_env_combine", NULL},
+ {"GL_ARB_texture_env_crossbar", NULL},
+ {"GL_ARB_texture_env_dot3", NULL},
+ {"GL_ARB_texture_mirrored_repeat", NULL},
+ {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions},
+ {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
+ {"GL_ARB_fragment_program", NULL},
+ {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
+ {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
+ {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
+ {"GL_EXT_blend_subtract", NULL},
+// {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+ {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
+ {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
+ {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions},
+ {"GL_EXT_stencil_wrap", NULL},
+ {"GL_EXT_texture_edge_clamp", NULL},
+ {"GL_EXT_texture_env_combine", NULL},
+ {"GL_EXT_texture_env_dot3", NULL},
+ {"GL_EXT_texture_filter_anisotropic", NULL},
+ {"GL_EXT_texture_lod_bias", NULL},
+ {"GL_EXT_texture_mirror_clamp", NULL},
+ {"GL_EXT_texture_rectangle", NULL},
+ {"GL_ATI_texture_env_combine3", NULL},
+ {"GL_ATI_texture_mirror_once", NULL},
+ {"GL_MESA_pack_invert", NULL},
+ {"GL_MESA_ycbcr_texture", NULL},
+ {"GL_MESAX_texture_float", NULL},
+ {"GL_NV_blend_square", NULL},
+ {"GL_NV_vertex_program", GL_NV_vertex_program_functions},
+ {"GL_SGIS_generate_mipmap", NULL},
+ {NULL, NULL}
+ /* *INDENT-ON* */
+};
+
+extern struct tnl_pipeline_stage _r300_render_stage;
+extern const struct tnl_pipeline_stage _r300_tcl_stage;
+
+static const struct tnl_pipeline_stage *r300_pipeline[] = {
+
+ /* Try and go straight to t&l
+ */
+ &_r300_tcl_stage,
+
+ /* Catch any t&l fallbacks
+ */
+ &_tnl_vertex_transform_stage,
+ &_tnl_normal_transform_stage,
+ &_tnl_lighting_stage,
+ &_tnl_fog_coordinate_stage,
+ &_tnl_texgen_stage,
+ &_tnl_texture_transform_stage,
+ &_tnl_vertex_program_stage,
+
+ /* Try again to go to tcl?
+ * - no good for asymmetric-twoside (do with multipass)
+ * - no good for asymmetric-unfilled (do with multipass)
+ * - good for material
+ * - good for texgen
+ * - need to manipulate a bit of state
+ *
+ * - worth it/not worth it?
+ */
+
+ /* Else do them here.
+ */
+ &_r300_render_stage,
+ &_tnl_render_stage, /* FALLBACK */
+ 0,
+};
+
+/* Create the device specific rendering context.
+ */
+GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ __DRIcontextPrivate * driContextPriv,
+ void *sharedContextPrivate)
+{
+ __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+ radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+ struct dd_function_table functions;
+ r300ContextPtr r300;
+ GLcontext *ctx;
+ int tcl_mode, i;
+
+ assert(glVisual);
+ assert(driContextPriv);
+ assert(screen);
+
+ /* Allocate the R300 context */
+ r300 = (r300ContextPtr) CALLOC(sizeof(*r300));
+ if (!r300)
+ return GL_FALSE;
+
+ if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
+ hw_tcl_on = future_hw_tcl_on = 0;
+
+ /* Parse configuration files.
+ * Do this here so that initialMaxAnisotropy is set before we create
+ * the default textures.
+ */
+ driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache,
+ screen->driScreen->myNum, "r300");
+ r300->initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache,
+ "def_max_anisotropy");
+
+ /* Init default driver functions then plug in our R300-specific functions
+ * (the texture functions are especially important)
+ */
+ _mesa_init_driver_functions(&functions);
+ r300InitIoctlFuncs(&functions);
+ r300InitStateFuncs(&functions);
+ r300InitTextureFuncs(&functions);
+ r300InitShaderFuncs(&functions);
+
+#ifdef USER_BUFFERS
+ r300_mem_init(r300);
+#endif
+
+ if (!radeonInitContext(&r300->radeon, &functions,
+ glVisual, driContextPriv,
+ sharedContextPrivate)) {
+ FREE(r300);
+ return GL_FALSE;
+ }
+
+ /* Init r300 context data */
+ r300->dma.buf0_address =
+ r300->radeon.radeonScreen->buffers->list[0].address;
+
+ (void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps));
+ make_empty_list(&r300->swapped);
+
+ r300->nr_heaps = 1 /* screen->numTexHeaps */ ;
+ assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS);
+ for (i = 0; i < r300->nr_heaps; i++) {
+ /* *INDENT-OFF* */
+ r300->texture_heaps[i] = driCreateTextureHeap(i, r300,
+ screen->
+ texSize[i], 12,
+ RADEON_NR_TEX_REGIONS,
+ (drmTextureRegionPtr)
+ r300->radeon.sarea->
+ tex_list[i],
+ &r300->radeon.sarea->
+ tex_age[i],
+ &r300->swapped,
+ sizeof
+ (r300TexObj),
+ (destroy_texture_object_t
+ *)
+ r300DestroyTexObj);
+ /* *INDENT-ON* */
+ }
+ r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache,
+ "texture_depth");
+ if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+ r300->texture_depth = (screen->cpp == 4) ?
+ DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+
+ /* Set the maximum texture size small enough that we can guarentee that
+ * all texture units can bind a maximal texture and have them both in
+ * texturable memory at once.
+ */
+
+ ctx = r300->radeon.glCtx;
+
+ ctx->Const.MaxTextureImageUnits =
+ driQueryOptioni(&r300->radeon.optionCache, "texture_image_units");
+ ctx->Const.MaxTextureCoordUnits =
+ driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units");
+ ctx->Const.MaxTextureUnits =
+ MIN2(ctx->Const.MaxTextureImageUnits,
+ ctx->Const.MaxTextureCoordUnits);
+ ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
+ ctx->Const.MinPointSize = 1.0;
+ ctx->Const.MinPointSizeAA = 1.0;
+ ctx->Const.MaxPointSize = R300_POINTSIZE_MAX;
+ ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX;
+
+ ctx->Const.MinLineWidth = 1.0;
+ ctx->Const.MinLineWidthAA = 1.0;
+ ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
+ ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
+
+#ifdef USER_BUFFERS
+ /* Needs further modifications */
+#if 0
+ ctx->Const.MaxArrayLockSize =
+ ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
+#endif
+#endif
+
+ /* Initialize the software rasterizer and helper modules.
+ */
+ _swrast_CreateContext(ctx);
+ _vbo_CreateContext(ctx);
+ _tnl_CreateContext(ctx);
+ _swsetup_CreateContext(ctx);
+ _swsetup_Wakeup(ctx);
+ _ae_create_context(ctx);
+
+ /* Install the customized pipeline:
+ */
+ _tnl_destroy_pipeline(ctx);
+ _tnl_install_pipeline(ctx, r300_pipeline);
+
+ /* Try and keep materials and vertices separate:
+ */
+/* _tnl_isolate_materials(ctx, GL_TRUE); */
+
+ /* Configure swrast and TNL to match hardware characteristics:
+ */
+ _swrast_allow_pixel_fog(ctx, GL_FALSE);
+ _swrast_allow_vertex_fog(ctx, GL_TRUE);
+ _tnl_allow_pixel_fog(ctx, GL_FALSE);
+ _tnl_allow_vertex_fog(ctx, GL_TRUE);
+
+ /* currently bogus data */
+ ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
+ ctx->Const.VertexProgram.MaxNativeInstructions =
+ VSF_MAX_FRAGMENT_LENGTH / 4;
+ ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */
+ ctx->Const.VertexProgram.MaxTemps = 32;
+ ctx->Const.VertexProgram.MaxNativeTemps =
+ /*VSF_MAX_FRAGMENT_TEMPS */ 32;
+ ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */
+ ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+
+ ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS;
+ ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */
+ ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS;
+ ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST;
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST;
+ ctx->Const.FragmentProgram.MaxNativeInstructions =
+ PFS_MAX_ALU_INST + PFS_MAX_TEX_INST;
+ ctx->Const.FragmentProgram.MaxNativeTexIndirections =
+ PFS_MAX_TEX_INDIRECT;
+ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */
+ _tnl_ProgramCacheInit(ctx);
+ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+
+ driInitExtensions(ctx, card_extensions, GL_TRUE);
+
+ if (driQueryOptionb
+ (&r300->radeon.optionCache, "disable_stencil_two_side"))
+ _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
+
+ if (r300->radeon.glCtx->Mesa_DXTn
+ && !driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc")) {
+ _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+ _mesa_enable_extension(ctx, "GL_S3_s3tc");
+ } else
+ if (driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable"))
+ {
+ _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+ }
+
+ r300->disable_lowimpact_fallback =
+ driQueryOptionb(&r300->radeon.optionCache,
+ "disable_lowimpact_fallback");
+
+ radeonInitSpanFuncs(ctx);
+ r300InitCmdBuf(r300);
+ r300InitState(r300);
+
+ TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+
+ tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode");
+ if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) {
+ fprintf(stderr, "disabling 3D acceleration\n");
+#if R200_MERGED
+ FALLBACK(&r300->radeon, RADEON_FALLBACK_DISABLE, 1);
+#endif
+ }
+ if (tcl_mode == DRI_CONF_TCL_SW ||
+ !(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+ if (r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ r300->radeon.radeonScreen->chip_flags &=
+ ~RADEON_CHIPSET_TCL;
+ fprintf(stderr, "Disabling HW TCL support\n");
+ }
+ TCL_FALLBACK(r300->radeon.glCtx,
+ RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
+ }
+
+ return GL_TRUE;
+}
+
+static void r300FreeGartAllocations(r300ContextPtr r300)
+{
+ int i, ret, tries = 0, done_age, in_use = 0;
+ drm_radeon_mem_free_t memfree;
+
+ memfree.region = RADEON_MEM_REGION_GART;
+
+#ifdef USER_BUFFERS
+ for (i = r300->rmm->u_last; i > 0; i--) {
+ if (r300->rmm->u_list[i].ptr == NULL) {
+ continue;
+ }
+
+ /* check whether this buffer is still in use */
+ if (r300->rmm->u_list[i].pending) {
+ in_use++;
+ }
+ }
+ /* Cannot flush/lock if no context exists. */
+ if (in_use)
+ r300FlushCmdBuf(r300, __FUNCTION__);
+
+ done_age = radeonGetAge((radeonContextPtr) r300);
+
+ for (i = r300->rmm->u_last; i > 0; i--) {
+ if (r300->rmm->u_list[i].ptr == NULL) {
+ continue;
+ }
+
+ /* check whether this buffer is still in use */
+ if (!r300->rmm->u_list[i].pending) {
+ continue;
+ }
+
+ assert(r300->rmm->u_list[i].h_pending == 0);
+
+ tries = 0;
+ while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
+ usleep(10);
+ done_age = radeonGetAge((radeonContextPtr) r300);
+ }
+ if (tries >= 1000) {
+ WARN_ONCE("Failed to idle region!");
+ }
+
+ memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
+ (char *)r300->radeon.radeonScreen->gartTextures.map;
+
+ ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
+ DRM_RADEON_FREE, &memfree,
+ sizeof(memfree));
+ if (ret) {
+ fprintf(stderr, "Failed to free at %p\nret = %s\n",
+ r300->rmm->u_list[i].ptr, strerror(-ret));
+ } else {
+ if (i == r300->rmm->u_last)
+ r300->rmm->u_last--;
+
+ r300->rmm->u_list[i].pending = 0;
+ r300->rmm->u_list[i].ptr = NULL;
+ }
+ }
+ r300->rmm->u_head = i;
+#endif /* USER_BUFFERS */
+}
+
+/* Destroy the device specific context.
+ */
+void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ r300ContextPtr r300 = (r300ContextPtr) driContextPriv->driverPrivate;
+ radeonContextPtr radeon = (radeonContextPtr) r300;
+ radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
+
+ if (RADEON_DEBUG & DEBUG_DRI) {
+ fprintf(stderr, "Destroying context !\n");
+ }
+
+ /* check if we're deleting the currently bound context */
+ if (&r300->radeon == current) {
+ radeonFlush(r300->radeon.glCtx);
+ _mesa_make_current(NULL, NULL, NULL);
+ }
+
+ /* Free r300 context resources */
+ assert(r300); /* should never be null */
+
+ if (r300) {
+ GLboolean release_texture_heaps;
+
+ release_texture_heaps =
+ (r300->radeon.glCtx->Shared->RefCount == 1);
+ _swsetup_DestroyContext(r300->radeon.glCtx);
+ _tnl_ProgramCacheDestroy(r300->radeon.glCtx);
+ _tnl_DestroyContext(r300->radeon.glCtx);
+ _vbo_DestroyContext(r300->radeon.glCtx);
+ _swrast_DestroyContext(r300->radeon.glCtx);
+
+ if (r300->dma.current.buf) {
+ r300ReleaseDmaRegion(r300, &r300->dma.current,
+ __FUNCTION__);
+#ifndef USER_BUFFERS
+ r300FlushCmdBuf(r300, __FUNCTION__);
+#endif
+ }
+ r300FreeGartAllocations(r300);
+ r300DestroyCmdBuf(r300);
+
+ if (radeon->state.scissor.pClipRects) {
+ FREE(radeon->state.scissor.pClipRects);
+ radeon->state.scissor.pClipRects = NULL;
+ }
+
+ if (release_texture_heaps) {
+ /* This share group is about to go away, free our private
+ * texture object data.
+ */
+ int i;
+
+ for (i = 0; i < r300->nr_heaps; i++) {
+ driDestroyTextureHeap(r300->texture_heaps[i]);
+ r300->texture_heaps[i] = NULL;
+ }
+
+ assert(is_empty_list(&r300->swapped));
+ }
+
+ radeonCleanupContext(&r300->radeon);
+
+#ifdef USER_BUFFERS
+ /* the memory manager might be accessed when Mesa frees the shared
+ * state, so don't destroy it earlier
+ */
+ r300_mem_destroy(r300);
+#endif
+
+ /* free the option cache */
+ driDestroyOptionCache(&r300->radeon.optionCache);
+
+ FREE(r300);
+ }
+}
diff --git a/r300/r300_context.h b/r300/r300_context.h
new file mode 100644
index 0000000..6b0a588
--- /dev/null
+++ b/r300/r300_context.h
@@ -0,0 +1,916 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R300_CONTEXT_H__
+#define __R300_CONTEXT_H__
+
+#include "tnl/t_vertex.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "dri_util.h"
+#include "texmem.h"
+
+#include "macros.h"
+#include "mtypes.h"
+#include "colormac.h"
+
+#define USER_BUFFERS
+
+//#define OPTIMIZE_ELTS
+
+struct r300_context;
+typedef struct r300_context r300ContextRec;
+typedef struct r300_context *r300ContextPtr;
+
+#include "radeon_lock.h"
+#include "mm.h"
+
+/* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
+ I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
+ with other compilers ... GLUE!
+*/
+#define WARN_ONCE(a, ...) { \
+ static int warn##__LINE__=1; \
+ if(warn##__LINE__){ \
+ fprintf(stderr, "*********************************WARN_ONCE*********************************\n"); \
+ fprintf(stderr, "File %s function %s line %d\n", \
+ __FILE__, __FUNCTION__, __LINE__); \
+ fprintf(stderr, a, ## __VA_ARGS__);\
+ fprintf(stderr, "***************************************************************************\n"); \
+ warn##__LINE__=0;\
+ } \
+ }
+
+#include "r300_vertprog.h"
+#include "r300_fragprog.h"
+
+/**
+ * This function takes a float and packs it into a uint32_t
+ */
+static __inline__ uint32_t r300PackFloat32(float fl)
+{
+ union {
+ float fl;
+ uint32_t u;
+ } u;
+
+ u.fl = fl;
+ return u.u;
+}
+
+/* This is probably wrong for some values, I need to test this
+ * some more. Range checking would be a good idea also..
+ *
+ * But it works for most things. I'll fix it later if someone
+ * else with a better clue doesn't
+ */
+static __inline__ uint32_t r300PackFloat24(float f)
+{
+ float mantissa;
+ int exponent;
+ uint32_t float24 = 0;
+
+ if (f == 0.0)
+ return 0;
+
+ mantissa = frexpf(f, &exponent);
+
+ /* Handle -ve */
+ if (mantissa < 0) {
+ float24 |= (1 << 23);
+ mantissa = mantissa * -1.0;
+ }
+ /* Handle exponent, bias of 63 */
+ exponent += 62;
+ float24 |= (exponent << 16);
+ /* Kill 7 LSB of mantissa */
+ float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF) >> 7;
+
+ return float24;
+}
+
+/************ DMA BUFFERS **************/
+
+/* Need refcounting on dma buffers:
+ */
+struct r300_dma_buffer {
+ int refcount; /**< the number of retained regions in buf */
+ drmBufPtr buf;
+ int id;
+};
+#undef GET_START
+#ifdef USER_BUFFERS
+#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start))
+#else
+#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset + \
+ (rvb)->address - rmesa->dma.buf0_address + \
+ (rvb)->start)
+#endif
+/* A retained region, eg vertices for indexed vertices.
+ */
+struct r300_dma_region {
+ struct r300_dma_buffer *buf;
+ char *address; /* == buf->address */
+ int start, end, ptr; /* offsets from start of buf */
+
+ int aos_offset; /* address in GART memory */
+ int aos_stride; /* distance between elements, in dwords */
+ int aos_size; /* number of components (1-4) */
+ int aos_reg; /* VAP register assignment */
+};
+
+struct r300_dma {
+ /* Active dma region. Allocations for vertices and retained
+ * regions come from here. Also used for emitting random vertices,
+ * these may be flushed by calling flush_current();
+ */
+ struct r300_dma_region current;
+
+ void (*flush) (r300ContextPtr);
+
+ char *buf0_address; /* start of buf[0], for index calcs */
+
+ /* Number of "in-flight" DMA buffers, i.e. the number of buffers
+ * for which a DISCARD command is currently queued in the command buffer.
+ */
+ GLuint nr_released_bufs;
+};
+
+ /* Texture related */
+
+typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr;
+
+/* Texture object in locally shared texture space.
+ */
+struct r300_tex_obj {
+ driTextureObject base;
+
+ GLuint bufAddr; /* Offset to start of locally
+ shared texture block */
+
+ GLuint dirty_state; /* Flags (1 per texunit) for
+ whether or not this texobj
+ has dirty hardware state
+ (pp_*) that needs to be
+ brought into the
+ texunit. */
+
+ drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
+ /* Six, for the cube faces */
+
+ GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
+
+ GLuint pitch; /* this isn't sent to hardware just used in calculations */
+ /* hardware register values */
+ /* Note that R200 has 8 registers per texture and R300 only 7 */
+ GLuint filter;
+ GLuint filter_1;
+ GLuint pitch_reg;
+ GLuint size; /* npot only */
+ GLuint format;
+ GLuint offset; /* Image location in the card's address space.
+ All cube faces follow. */
+ GLuint unknown4;
+ GLuint unknown5;
+ /* end hardware registers */
+
+ /* registers computed by r200 code - keep them here to
+ compare against what is actually written.
+
+ to be removed later.. */
+ GLuint pp_border_color;
+ GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */
+ GLuint format_x;
+
+ GLboolean border_fallback;
+
+ GLuint tile_bits; /* hw texture tile bits used on this texture */
+};
+
+struct r300_texture_env_state {
+ r300TexObjPtr texobj;
+ GLenum format;
+ GLenum envMode;
+};
+
+/* The blit width for texture uploads
+ */
+#define R300_BLIT_WIDTH_BYTES 1024
+#define R300_MAX_TEXTURE_UNITS 8
+
+struct r300_texture_state {
+ struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS];
+ int tc_count; /* number of incoming texture coordinates from VAP */
+};
+
+/**
+ * A block of hardware state.
+ *
+ * When check returns non-zero, the returned number of dwords must be
+ * copied verbatim into the command buffer in order to update a state atom
+ * when it is dirty.
+ */
+struct r300_state_atom {
+ struct r300_state_atom *next, *prev;
+ const char *name; /* for debug */
+ int cmd_size; /* maximum size in dwords */
+ GLuint idx; /* index in an array (e.g. textures) */
+ uint32_t *cmd;
+ GLboolean dirty;
+
+ int (*check) (r300ContextPtr, struct r300_state_atom * atom);
+};
+
+#define R300_VPT_CMD_0 0
+#define R300_VPT_XSCALE 1
+#define R300_VPT_XOFFSET 2
+#define R300_VPT_YSCALE 3
+#define R300_VPT_YOFFSET 4
+#define R300_VPT_ZSCALE 5
+#define R300_VPT_ZOFFSET 6
+#define R300_VPT_CMDSIZE 7
+
+#define R300_VIR_CMD_0 0 /* vir is variable size (at least 1) */
+#define R300_VIR_CNTL_0 1
+#define R300_VIR_CNTL_1 2
+#define R300_VIR_CNTL_2 3
+#define R300_VIR_CNTL_3 4
+#define R300_VIR_CNTL_4 5
+#define R300_VIR_CNTL_5 6
+#define R300_VIR_CNTL_6 7
+#define R300_VIR_CNTL_7 8
+#define R300_VIR_CMDSIZE 9
+
+#define R300_VIC_CMD_0 0
+#define R300_VIC_CNTL_0 1
+#define R300_VIC_CNTL_1 2
+#define R300_VIC_CMDSIZE 3
+
+#define R300_VOF_CMD_0 0
+#define R300_VOF_CNTL_0 1
+#define R300_VOF_CNTL_1 2
+#define R300_VOF_CMDSIZE 3
+
+#define R300_PVS_CMD_0 0
+#define R300_PVS_CNTL_1 1
+#define R300_PVS_CNTL_2 2
+#define R300_PVS_CNTL_3 3
+#define R300_PVS_CMDSIZE 4
+
+#define R300_GB_MISC_CMD_0 0
+#define R300_GB_MISC_MSPOS_0 1
+#define R300_GB_MISC_MSPOS_1 2
+#define R300_GB_MISC_TILE_CONFIG 3
+#define R300_GB_MISC_SELECT 4
+#define R300_GB_MISC_AA_CONFIG 5
+#define R300_GB_MISC_CMDSIZE 6
+
+#define R300_TXE_CMD_0 0
+#define R300_TXE_ENABLE 1
+#define R300_TXE_CMDSIZE 2
+
+#define R300_PS_CMD_0 0
+#define R300_PS_POINTSIZE 1
+#define R300_PS_CMDSIZE 2
+
+#define R300_ZBS_CMD_0 0
+#define R300_ZBS_T_FACTOR 1
+#define R300_ZBS_T_CONSTANT 2
+#define R300_ZBS_W_FACTOR 3
+#define R300_ZBS_W_CONSTANT 4
+#define R300_ZBS_CMDSIZE 5
+
+#define R300_CUL_CMD_0 0
+#define R300_CUL_CULL 1
+#define R300_CUL_CMDSIZE 2
+
+#define R300_RC_CMD_0 0
+#define R300_RC_CNTL_0 1
+#define R300_RC_CNTL_1 2
+#define R300_RC_CMDSIZE 3
+
+#define R300_RI_CMD_0 0
+#define R300_RI_INTERP_0 1
+#define R300_RI_INTERP_1 2
+#define R300_RI_INTERP_2 3
+#define R300_RI_INTERP_3 4
+#define R300_RI_INTERP_4 5
+#define R300_RI_INTERP_5 6
+#define R300_RI_INTERP_6 7
+#define R300_RI_INTERP_7 8
+#define R300_RI_CMDSIZE 9
+
+#define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */
+#define R300_RR_ROUTE_0 1
+#define R300_RR_ROUTE_1 2
+#define R300_RR_ROUTE_2 3
+#define R300_RR_ROUTE_3 4
+#define R300_RR_ROUTE_4 5
+#define R300_RR_ROUTE_5 6
+#define R300_RR_ROUTE_6 7
+#define R300_RR_ROUTE_7 8
+#define R300_RR_CMDSIZE 9
+
+#define R300_FP_CMD_0 0
+#define R300_FP_CNTL0 1
+#define R300_FP_CNTL1 2
+#define R300_FP_CNTL2 3
+#define R300_FP_CMD_1 4
+#define R300_FP_NODE0 5
+#define R300_FP_NODE1 6
+#define R300_FP_NODE2 7
+#define R300_FP_NODE3 8
+#define R300_FP_CMDSIZE 9
+
+#define R300_FPT_CMD_0 0
+#define R300_FPT_INSTR_0 1
+#define R300_FPT_CMDSIZE 65
+
+#define R300_FPI_CMD_0 0
+#define R300_FPI_INSTR_0 1
+#define R300_FPI_CMDSIZE 65
+
+#define R300_FPP_CMD_0 0
+#define R300_FPP_PARAM_0 1
+#define R300_FPP_CMDSIZE (32*4+1)
+
+#define R300_FOGS_CMD_0 0
+#define R300_FOGS_STATE 1
+#define R300_FOGS_CMDSIZE 2
+
+#define R300_FOGC_CMD_0 0
+#define R300_FOGC_R 1
+#define R300_FOGC_G 2
+#define R300_FOGC_B 3
+#define R300_FOGC_CMDSIZE 4
+
+#define R300_FOGP_CMD_0 0
+#define R300_FOGP_SCALE 1
+#define R300_FOGP_START 2
+#define R300_FOGP_CMDSIZE 3
+
+#define R300_AT_CMD_0 0
+#define R300_AT_ALPHA_TEST 1
+#define R300_AT_UNKNOWN 2
+#define R300_AT_CMDSIZE 3
+
+#define R300_BLD_CMD_0 0
+#define R300_BLD_CBLEND 1
+#define R300_BLD_ABLEND 2
+#define R300_BLD_CMDSIZE 3
+
+#define R300_CMK_CMD_0 0
+#define R300_CMK_COLORMASK 1
+#define R300_CMK_CMDSIZE 2
+
+#define R300_CB_CMD_0 0
+#define R300_CB_OFFSET 1
+#define R300_CB_CMD_1 2
+#define R300_CB_PITCH 3
+#define R300_CB_CMDSIZE 4
+
+#define R300_ZS_CMD_0 0
+#define R300_ZS_CNTL_0 1
+#define R300_ZS_CNTL_1 2
+#define R300_ZS_CNTL_2 3
+#define R300_ZS_CMDSIZE 4
+
+#define R300_ZB_CMD_0 0
+#define R300_ZB_OFFSET 1
+#define R300_ZB_PITCH 2
+#define R300_ZB_CMDSIZE 3
+
+#define R300_VPI_CMD_0 0
+#define R300_VPI_INSTR_0 1
+#define R300_VPI_CMDSIZE 1025 /* 256 16 byte instructions */
+
+#define R300_VPP_CMD_0 0
+#define R300_VPP_PARAM_0 1
+#define R300_VPP_CMDSIZE 1025 /* 256 4-component parameters */
+
+#define R300_VPUCP_CMD_0 0
+#define R300_VPUCP_X 1
+#define R300_VPUCP_Y 2
+#define R300_VPUCP_Z 3
+#define R300_VPUCP_W 4
+#define R300_VPUCP_CMDSIZE 5 /* 256 4-component parameters */
+
+#define R300_VPS_CMD_0 0
+#define R300_VPS_ZERO_0 1
+#define R300_VPS_ZERO_1 2
+#define R300_VPS_POINTSIZE 3
+#define R300_VPS_ZERO_3 4
+#define R300_VPS_CMDSIZE 5
+
+ /* the layout is common for all fields inside tex */
+#define R300_TEX_CMD_0 0
+#define R300_TEX_VALUE_0 1
+/* We don't really use this, instead specify mtu+1 dynamically
+#define R300_TEX_CMDSIZE (MAX_TEXTURE_UNITS+1)
+*/
+
+/**
+ * Cache for hardware register state.
+ */
+struct r300_hw_state {
+ struct r300_state_atom atomlist;
+
+ GLboolean is_dirty;
+ GLboolean all_dirty;
+ int max_state_size; /* in dwords */
+
+ struct r300_state_atom vpt; /* viewport (1D98) */
+ struct r300_state_atom vap_cntl;
+ struct r300_state_atom vof; /* VAP output format register 0x2090 */
+ struct r300_state_atom vte; /* (20B0) */
+ struct r300_state_atom unk2134; /* (2134) */
+ struct r300_state_atom vap_cntl_status;
+ struct r300_state_atom vir[2]; /* vap input route (2150/21E0) */
+ struct r300_state_atom vic; /* vap input control (2180) */
+ struct r300_state_atom unk21DC; /* (21DC) */
+ struct r300_state_atom vap_clip_cntl;
+ struct r300_state_atom unk2220; /* (2220) */
+ struct r300_state_atom unk2288; /* (2288) */
+ struct r300_state_atom pvs; /* pvs_cntl (22D0) */
+ struct r300_state_atom gb_enable; /* (4008) */
+ struct r300_state_atom gb_misc; /* Multisampling position shifts ? (4010) */
+ struct r300_state_atom unk4200; /* (4200) */
+ struct r300_state_atom unk4214; /* (4214) */
+ struct r300_state_atom ps; /* pointsize (421C) */
+ struct r300_state_atom unk4230; /* (4230) */
+ struct r300_state_atom lcntl; /* line control */
+ struct r300_state_atom unk4260; /* (4260) */
+ struct r300_state_atom shade;
+ struct r300_state_atom polygon_mode;
+ struct r300_state_atom fogp; /* fog parameters (4294) */
+ struct r300_state_atom unk429C; /* (429C) */
+ struct r300_state_atom zbias_cntl;
+ struct r300_state_atom zbs; /* zbias (42A4) */
+ struct r300_state_atom occlusion_cntl;
+ struct r300_state_atom cul; /* cull cntl (42B8) */
+ struct r300_state_atom unk42C0; /* (42C0) */
+ struct r300_state_atom rc; /* rs control (4300) */
+ struct r300_state_atom ri; /* rs interpolators (4310) */
+ struct r300_state_atom rr; /* rs route (4330) */
+ struct r300_state_atom unk43A4; /* (43A4) */
+ struct r300_state_atom unk43E8; /* (43E8) */
+ struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */
+ struct r300_state_atom fpt; /* texi - (4620) */
+ struct r300_state_atom unk46A4; /* (46A4) */
+ struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */
+ struct r300_state_atom fogs; /* fog state (4BC0) */
+ struct r300_state_atom fogc; /* fog color (4BC8) */
+ struct r300_state_atom at; /* alpha test (4BD4) */
+ struct r300_state_atom unk4BD8; /* (4BD8) */
+ struct r300_state_atom fpp; /* 0x4C00 and following */
+ struct r300_state_atom unk4E00; /* (4E00) */
+ struct r300_state_atom bld; /* blending (4E04) */
+ struct r300_state_atom cmk; /* colormask (4E0C) */
+ struct r300_state_atom blend_color; /* constant blend color */
+ struct r300_state_atom cb; /* colorbuffer (4E28) */
+ struct r300_state_atom unk4E50; /* (4E50) */
+ struct r300_state_atom unk4E88; /* (4E88) */
+ struct r300_state_atom unk4EA0; /* (4E88) I saw it only written on RV350 hardware.. */
+ struct r300_state_atom zs; /* zstencil control (4F00) */
+ struct r300_state_atom zstencil_format;
+ struct r300_state_atom zb; /* z buffer (4F20) */
+ struct r300_state_atom unk4F28; /* (4F28) */
+ struct r300_state_atom unk4F30; /* (4F30) */
+ struct r300_state_atom unk4F44; /* (4F44) */
+ struct r300_state_atom unk4F54; /* (4F54) */
+
+ struct r300_state_atom vpi; /* vp instructions */
+ struct r300_state_atom vpp; /* vp parameters */
+ struct r300_state_atom vps; /* vertex point size (?) */
+ struct r300_state_atom vpucp[6]; /* vp user clip plane - 6 */
+ /* 8 texture units */
+ /* the state is grouped by function and not by
+ texture unit. This makes single unit updates
+ really awkward - we are much better off
+ updating the whole thing at once */
+ struct {
+ struct r300_state_atom filter;
+ struct r300_state_atom filter_1;
+ struct r300_state_atom size;
+ struct r300_state_atom format;
+ struct r300_state_atom pitch;
+ struct r300_state_atom offset;
+ struct r300_state_atom chroma_key;
+ struct r300_state_atom border_color;
+ } tex;
+ struct r300_state_atom txe; /* tex enable (4104) */
+};
+
+/**
+ * This structure holds the command buffer while it is being constructed.
+ *
+ * The first batch of commands in the buffer is always the state that needs
+ * to be re-emitted when the context is lost. This batch can be skipped
+ * otherwise.
+ */
+struct r300_cmdbuf {
+ int size; /* DWORDs allocated for buffer */
+ uint32_t *cmd_buf;
+ int count_used; /* DWORDs filled so far */
+ int count_reemit; /* size of re-emission batch */
+};
+
+/**
+ * State cache
+ */
+
+struct r300_depthbuffer_state {
+ GLfloat scale;
+};
+
+struct r300_stencilbuffer_state {
+ GLuint clear;
+ GLboolean hw_stencil;
+
+};
+
+/* Vertex shader state */
+
+/* Perhaps more if we store programs in vmem? */
+/* drm_r300_cmd_header_t->vpu->count is unsigned char */
+#define VSF_MAX_FRAGMENT_LENGTH (255*4)
+
+/* Can be tested with colormat currently. */
+#define VSF_MAX_FRAGMENT_TEMPS (14)
+
+#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
+
+struct r300_vertex_shader_fragment {
+ int length;
+ union {
+ GLuint d[VSF_MAX_FRAGMENT_LENGTH];
+ float f[VSF_MAX_FRAGMENT_LENGTH];
+ VERTEX_SHADER_INSTRUCTION i[VSF_MAX_FRAGMENT_LENGTH / 4];
+ } body;
+};
+
+#define VSF_DEST_PROGRAM 0x0
+#define VSF_DEST_MATRIX0 0x200
+#define VSF_DEST_MATRIX1 0x204
+#define VSF_DEST_MATRIX2 0x208
+#define VSF_DEST_VECTOR0 0x20c
+#define VSF_DEST_VECTOR1 0x20d
+#define VSF_DEST_UNKNOWN1 0x400
+#define VSF_DEST_UNKNOWN2 0x406
+
+struct r300_vertex_shader_state {
+ struct r300_vertex_shader_fragment program;
+
+ struct r300_vertex_shader_fragment unknown1;
+ struct r300_vertex_shader_fragment unknown2;
+
+ int program_start;
+ int unknown_ptr1; /* pointer within program space */
+ int program_end;
+
+ int param_offset;
+ int param_count;
+
+ int unknown_ptr2; /* pointer within program space */
+ int unknown_ptr3; /* pointer within program space */
+};
+
+extern int hw_tcl_on;
+
+//#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current)
+#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp)
+
+/* Should but doesnt work */
+//#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp)
+
+/* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday.
+ * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly.
+ */
+
+struct r300_vertex_program_key {
+ GLuint InputsRead;
+ GLuint OutputsWritten;
+};
+
+struct r300_vertex_program {
+ struct r300_vertex_program *next;
+ struct r300_vertex_program_key key;
+ int translated;
+
+ struct r300_vertex_shader_fragment program;
+
+ int pos_end;
+ int num_temporaries; /* Number of temp vars used by program */
+ int wpos_idx;
+ int inputs[VERT_ATTRIB_MAX];
+ int outputs[VERT_RESULT_MAX];
+ int native;
+ int ref_count;
+ int use_ref_count;
+};
+
+struct r300_vertex_program_cont {
+ struct gl_vertex_program mesa_program; /* Must be first */
+ struct r300_vertex_shader_fragment params;
+ struct r300_vertex_program *progs;
+};
+
+#define PFS_MAX_ALU_INST 64
+#define PFS_MAX_TEX_INST 64
+#define PFS_MAX_TEX_INDIRECT 4
+#define PFS_NUM_TEMP_REGS 32
+#define PFS_NUM_CONST_REGS 16
+
+/* Mapping Mesa registers to R300 temporaries */
+struct reg_acc {
+ int reg; /* Assigned hw temp */
+ unsigned int refcount; /* Number of uses by mesa program */
+};
+
+/**
+ * Describe the current lifetime information for an R300 temporary
+ */
+struct reg_lifetime {
+ /* Index of the first slot where this register is free in the sense
+ that it can be used as a new destination register.
+ This is -1 if the register has been assigned to a Mesa register
+ and the last access to the register has not yet been emitted */
+ int free;
+
+ /* Index of the first slot where this register is currently reserved.
+ This is used to stop e.g. a scalar operation from being moved
+ before the allocation time of a register that was first allocated
+ for a vector operation. */
+ int reserved;
+
+ /* Index of the first slot in which the register can be used as a
+ source without losing the value that is written by the last
+ emitted instruction that writes to the register */
+ int vector_valid;
+ int scalar_valid;
+
+ /* Index to the slot where the register was last read.
+ This is also the first slot in which the register may be written again */
+ int vector_lastread;
+ int scalar_lastread;
+};
+
+/**
+ * Store usage information about an ALU instruction slot during the
+ * compilation of a fragment program.
+ */
+#define SLOT_SRC_VECTOR (1<<0)
+#define SLOT_SRC_SCALAR (1<<3)
+#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR)
+#define SLOT_OP_VECTOR (1<<16)
+#define SLOT_OP_SCALAR (1<<17)
+#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR)
+
+struct r300_pfs_compile_slot {
+ /* Bitmask indicating which parts of the slot are used, using SLOT_ constants
+ defined above */
+ unsigned int used;
+
+ /* Selected sources */
+ int vsrc[3];
+ int ssrc[3];
+};
+
+/**
+ * Store information during compilation of fragment programs.
+ */
+struct r300_pfs_compile_state {
+ int nrslots; /* number of ALU slots used so far */
+
+ /* Track which (parts of) slots are already filled with instructions */
+ struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST];
+
+ /* Track the validity of R300 temporaries */
+ struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS];
+
+ /* Used to map Mesa's inputs/temps onto hardware temps */
+ int temp_in_use;
+ struct reg_acc temps[PFS_NUM_TEMP_REGS];
+ struct reg_acc inputs[32]; /* don't actually need 32... */
+
+ /* Track usage of hardware temps, for register allocation,
+ * indirection detection, etc. */
+ GLuint used_in_node;
+ GLuint dest_in_node;
+};
+
+/**
+ * Store everything about a fragment program that is needed
+ * to render with that program.
+ */
+struct r300_fragment_program {
+ struct gl_fragment_program mesa_program;
+
+ GLcontext *ctx;
+ GLboolean translated;
+ GLboolean error;
+ struct r300_pfs_compile_state *cs;
+
+ struct {
+ int length;
+ GLuint inst[PFS_MAX_TEX_INST];
+ } tex;
+
+ struct {
+ struct {
+ GLuint inst0;
+ GLuint inst1;
+ GLuint inst2;
+ GLuint inst3;
+ } inst[PFS_MAX_ALU_INST];
+ } alu;
+
+ struct {
+ int tex_offset;
+ int tex_end;
+ int alu_offset;
+ int alu_end;
+ int flags;
+ } node[4];
+ int cur_node;
+ int first_node_has_tex;
+
+ int alu_offset;
+ int alu_end;
+ int tex_offset;
+ int tex_end;
+
+ /* Hardware constants.
+ * Contains a pointer to the value. The destination of the pointer
+ * is supposed to be updated when GL state changes.
+ * Typically, this is either a pointer into
+ * gl_program_parameter_list::ParameterValues, or a pointer to a
+ * global constant (e.g. for sin/cos-approximation)
+ */
+ const GLfloat *constant[PFS_NUM_CONST_REGS];
+ int const_nr;
+
+ int max_temp_idx;
+
+ GLuint optimization;
+};
+
+#define R300_MAX_AOS_ARRAYS 16
+
+#define AOS_FORMAT_USHORT 0
+#define AOS_FORMAT_FLOAT 1
+#define AOS_FORMAT_UBYTE 2
+#define AOS_FORMAT_FLOAT_COLOR 3
+
+#define REG_COORDS 0
+#define REG_COLOR0 1
+#define REG_TEX0 2
+
+struct dt {
+ GLint size;
+ GLenum type;
+ GLsizei stride;
+ void *data;
+};
+
+struct radeon_vertex_buffer {
+ int Count;
+ void *Elts;
+ int elt_size;
+ int elt_min, elt_max; /* debug */
+
+ struct dt AttribPtr[VERT_ATTRIB_MAX];
+
+ const struct _mesa_prim *Primitive;
+ GLuint PrimitiveCount;
+ GLint LockFirst;
+ GLsizei LockCount;
+ int lock_uptodate;
+};
+
+struct r300_state {
+ struct r300_depthbuffer_state depth;
+ struct r300_texture_state texture;
+ int sw_tcl_inputs[VERT_ATTRIB_MAX];
+ struct r300_vertex_shader_state vertex_shader;
+ struct r300_pfs_compile_state pfs_compile;
+ struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
+ int aos_count;
+ struct radeon_vertex_buffer VB;
+
+ GLuint *Elts;
+ struct r300_dma_region elt_dma;
+
+ DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for.
+ They are the same as tnl->render_inputs for fixed pipeline */
+
+ struct {
+ int transform_offset; /* Transform matrix offset, -1 if none */
+ } vap_param; /* vertex processor parameter allocation - tells where to write parameters */
+
+ struct r300_stencilbuffer_state stencil;
+
+};
+
+#define R300_FALLBACK_NONE 0
+#define R300_FALLBACK_TCL 1
+#define R300_FALLBACK_RAST 2
+
+/**
+ * \brief R300 context structure.
+ */
+struct r300_context {
+ struct radeon_context radeon; /* parent class, must be first */
+
+ struct r300_hw_state hw;
+ struct r300_cmdbuf cmdbuf;
+ struct r300_state state;
+ struct gl_vertex_program *curr_vp;
+ struct r300_vertex_program *selected_vp;
+
+ /* Vertex buffers
+ */
+ struct r300_dma dma;
+ GLboolean save_on_next_unlock;
+ GLuint NewGLState;
+
+ /* Texture object bookkeeping
+ */
+ unsigned nr_heaps;
+ driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
+ driTextureObject swapped;
+ int texture_depth;
+ float initialMaxAnisotropy;
+
+ /* Clientdata textures;
+ */
+ GLuint prefer_gart_client_texturing;
+
+#ifdef USER_BUFFERS
+ struct r300_memory_manager *rmm;
+#endif
+
+ GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
+ GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
+
+ GLboolean disable_lowimpact_fallback;
+};
+
+struct r300_buffer_object {
+ struct gl_buffer_object mesa_obj;
+ int id;
+};
+
+#define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx))
+
+extern void r300DestroyContext(__DRIcontextPrivate * driContextPriv);
+extern GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ __DRIcontextPrivate * driContextPriv,
+ void *sharedContextPrivate);
+
+extern void r300SelectVertexShader(r300ContextPtr r300);
+extern void r300InitShaderFuncs(struct dd_function_table *functions);
+extern int r300VertexProgUpdateParams(GLcontext * ctx,
+ struct r300_vertex_program_cont *vp,
+ float *dst);
+
+#define RADEON_D_CAPTURE 0
+#define RADEON_D_PLAYBACK 1
+#define RADEON_D_PLAYBACK_RAW 2
+#define RADEON_D_T 3
+
+#endif /* __R300_CONTEXT_H__ */
diff --git a/r300/r300_emit.c b/r300/r300_emit.c
new file mode 100644
index 0000000..2c26069
--- /dev/null
+++ b/r300/r300_emit.c
@@ -0,0 +1,627 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "imports.h"
+#include "macros.h"
+#include "image.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+#include "r300_context.h"
+#include "radeon_ioctl.h"
+#include "r300_state.h"
+#include "r300_emit.h"
+#include "r300_ioctl.h"
+
+#ifdef USER_BUFFERS
+#include "r300_mem.h"
+#endif
+
+#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
+ SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
+ SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
+ SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
+ SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
+ SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
+#error Cannot change these!
+#endif
+
+#define DEBUG_ALL DEBUG_VERTS
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr ) \
+do { \
+ int __tmp; \
+ __asm__ __volatile__( "rep ; movsl" \
+ : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
+ : "0" (nr), \
+ "D" ((long)dst), \
+ "S" ((long)src) ); \
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr ) \
+do { \
+ int j; \
+ for ( j = 0 ; j < nr ; j++ ) \
+ dst[j] = ((int *)src)[j]; \
+ dst += nr; \
+} while (0)
+#endif
+
+static void r300EmitVec4(GLcontext * ctx,
+ struct r300_dma_region *rvb,
+ GLvoid * data, int stride, int count)
+{
+ int i;
+ int *out = (int *)(rvb->address + rvb->start);
+
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s count %d stride %d\n",
+ __FUNCTION__, count, stride);
+
+ if (stride == 4)
+ COPY_DWORDS(out, data, count);
+ else
+ for (i = 0; i < count; i++) {
+ out[0] = *(int *)data;
+ out++;
+ data += stride;
+ }
+}
+
+static void r300EmitVec8(GLcontext * ctx,
+ struct r300_dma_region *rvb,
+ GLvoid * data, int stride, int count)
+{
+ int i;
+ int *out = (int *)(rvb->address + rvb->start);
+
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s count %d stride %d\n",
+ __FUNCTION__, count, stride);
+
+ if (stride == 8)
+ COPY_DWORDS(out, data, count * 2);
+ else
+ for (i = 0; i < count; i++) {
+ out[0] = *(int *)data;
+ out[1] = *(int *)(data + 4);
+ out += 2;
+ data += stride;
+ }
+}
+
+static void r300EmitVec12(GLcontext * ctx,
+ struct r300_dma_region *rvb,
+ GLvoid * data, int stride, int count)
+{
+ int i;
+ int *out = (int *)(rvb->address + rvb->start);
+
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+ __FUNCTION__, count, stride, (void *)out, (void *)data);
+
+ if (stride == 12)
+ COPY_DWORDS(out, data, count * 3);
+ else
+ for (i = 0; i < count; i++) {
+ out[0] = *(int *)data;
+ out[1] = *(int *)(data + 4);
+ out[2] = *(int *)(data + 8);
+ out += 3;
+ data += stride;
+ }
+}
+
+static void r300EmitVec16(GLcontext * ctx,
+ struct r300_dma_region *rvb,
+ GLvoid * data, int stride, int count)
+{
+ int i;
+ int *out = (int *)(rvb->address + rvb->start);
+
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s count %d stride %d\n",
+ __FUNCTION__, count, stride);
+
+ if (stride == 16)
+ COPY_DWORDS(out, data, count * 4);
+ else
+ for (i = 0; i < count; i++) {
+ out[0] = *(int *)data;
+ out[1] = *(int *)(data + 4);
+ out[2] = *(int *)(data + 8);
+ out[3] = *(int *)(data + 12);
+ out += 4;
+ data += stride;
+ }
+}
+
+static void r300EmitVec(GLcontext * ctx,
+ struct r300_dma_region *rvb,
+ GLvoid * data, int size, int stride, int count)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s count %d size %d stride %d\n",
+ __FUNCTION__, count, size, stride);
+
+ /* Gets triggered when playing with future_hw_tcl_on ... */
+ //assert(!rvb->buf);
+
+ if (stride == 0) {
+ r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
+ count = 1;
+ rvb->aos_offset = GET_START(rvb);
+ rvb->aos_stride = 0;
+ } else {
+ r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); /* alignment? */
+ rvb->aos_offset = GET_START(rvb);
+ rvb->aos_stride = size;
+ }
+
+ /* Emit the data
+ */
+ switch (size) {
+ case 1:
+ r300EmitVec4(ctx, rvb, data, stride, count);
+ break;
+ case 2:
+ r300EmitVec8(ctx, rvb, data, stride, count);
+ break;
+ case 3:
+ r300EmitVec12(ctx, rvb, data, stride, count);
+ break;
+ case 4:
+ r300EmitVec16(ctx, rvb, data, stride, count);
+ break;
+ default:
+ assert(0);
+ _mesa_exit(-1);
+ break;
+ }
+
+}
+
+static GLuint t_type(struct dt *dt)
+{
+ switch (dt->type) {
+ case GL_UNSIGNED_BYTE:
+ return AOS_FORMAT_UBYTE;
+ case GL_SHORT:
+ return AOS_FORMAT_USHORT;
+ case GL_FLOAT:
+ return AOS_FORMAT_FLOAT;
+ default:
+ assert(0);
+ break;
+ }
+
+ return AOS_FORMAT_FLOAT;
+}
+
+static GLuint t_vir0_size(struct dt *dt)
+{
+ switch (dt->type) {
+ case GL_UNSIGNED_BYTE:
+ return 4;
+ case GL_SHORT:
+ return 7;
+ case GL_FLOAT:
+ return dt->size - 1;
+ default:
+ assert(0);
+ break;
+ }
+
+ return 0;
+}
+
+static GLuint t_aos_size(struct dt *dt)
+{
+ switch (dt->type) {
+ case GL_UNSIGNED_BYTE:
+ return 1;
+ case GL_SHORT:
+ return 2;
+ case GL_FLOAT:
+ return dt->size;
+ default:
+ assert(0);
+ break;
+ }
+
+ return 0;
+}
+
+static GLuint t_vir0(uint32_t * dst, struct dt *dt, int *inputs,
+ GLint * tab, GLuint nr)
+{
+ GLuint i, dw;
+
+ for (i = 0; i + 1 < nr; i += 2) {
+ dw = t_vir0_size(&dt[tab[i]]) | (inputs[tab[i]] << 8) |
+ (t_type(&dt[tab[i]]) << 14);
+ dw |=
+ (t_vir0_size(&dt[tab[i + 1]]) |
+ (inputs[tab[i + 1]] << 8) | (t_type(&dt[tab[i + 1]])
+ << 14)) << 16;
+
+ if (i + 2 == nr) {
+ dw |= (1 << (13 + 16));
+ }
+ dst[i >> 1] = dw;
+ }
+
+ if (nr & 1) {
+ dw = t_vir0_size(&dt[tab[nr - 1]]) | (inputs[tab[nr - 1]]
+ << 8) |
+ (t_type(&dt[tab[nr - 1]]) << 14);
+ dw |= 1 << 13;
+
+ dst[nr >> 1] = dw;
+ }
+
+ return (nr + 1) >> 1;
+}
+
+static GLuint t_swizzle(int swizzle[4])
+{
+ return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) |
+ (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) |
+ (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) |
+ (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT);
+}
+
+static GLuint t_vir1(uint32_t * dst, int swizzle[][4], GLuint nr)
+{
+ GLuint i;
+
+ for (i = 0; i + 1 < nr; i += 2) {
+ dst[i >> 1] = t_swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE;
+ dst[i >> 1] |=
+ (t_swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE)
+ << 16;
+ }
+
+ if (nr & 1)
+ dst[nr >> 1] =
+ t_swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE;
+
+ return (nr + 1) >> 1;
+}
+
+static GLuint t_emit_size(struct dt *dt)
+{
+ return dt->size;
+}
+
+static GLuint t_vic(GLcontext * ctx, GLuint InputsRead)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLuint i, vic_1 = 0;
+
+ if (InputsRead & (1 << VERT_ATTRIB_POS))
+ vic_1 |= R300_INPUT_CNTL_POS;
+
+ if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
+ vic_1 |= R300_INPUT_CNTL_NORMAL;
+
+ if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
+ vic_1 |= R300_INPUT_CNTL_COLOR;
+
+ r300->state.texture.tc_count = 0;
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+ if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
+ r300->state.texture.tc_count++;
+ vic_1 |= R300_INPUT_CNTL_TC0 << i;
+ }
+
+ return vic_1;
+}
+
+/* Emit vertex data to GART memory
+ * Route inputs to the vertex processor
+ * This function should never return R300_FALLBACK_TCL when using software tcl.
+ */
+
+int r300EmitArrays(GLcontext * ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ r300ContextPtr r300 = rmesa;
+ struct radeon_vertex_buffer *VB = &rmesa->state.VB;
+ GLuint nr;
+ GLuint count = VB->Count;
+ GLuint i;
+ GLuint InputsRead = 0, OutputsWritten = 0;
+ int *inputs = NULL;
+ int vir_inputs[VERT_ATTRIB_MAX];
+ GLint tab[VERT_ATTRIB_MAX];
+ int swizzle[VERT_ATTRIB_MAX][4];
+
+ if (hw_tcl_on) {
+ struct r300_vertex_program *prog =
+ (struct r300_vertex_program *)
+ CURRENT_VERTEX_SHADER(ctx);
+ inputs = prog->inputs;
+ InputsRead = CURRENT_VERTEX_SHADER(ctx)->key.InputsRead;
+ OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+ } else {
+ DECLARE_RENDERINPUTS(inputs_bitset);
+ inputs = r300->state.sw_tcl_inputs;
+
+ RENDERINPUTS_COPY(inputs_bitset,
+ TNL_CONTEXT(ctx)->render_inputs_bitset);
+
+ assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_POS));
+ InputsRead |= 1 << VERT_ATTRIB_POS;
+ OutputsWritten |= 1 << VERT_RESULT_HPOS;
+
+ assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_NORMAL)
+ == 0);
+
+ assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR0));
+ InputsRead |= 1 << VERT_ATTRIB_COLOR0;
+ OutputsWritten |= 1 << VERT_RESULT_COL0;
+
+ if (RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR1)) {
+ InputsRead |= 1 << VERT_ATTRIB_COLOR1;
+ OutputsWritten |= 1 << VERT_RESULT_COL1;
+ }
+
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+ if (RENDERINPUTS_TEST
+ (inputs_bitset, _TNL_ATTRIB_TEX(i))) {
+ InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
+ OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
+ }
+
+ for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
+ if (InputsRead & (1 << i))
+ inputs[i] = nr++;
+ else
+ inputs[i] = -1;
+
+ if (!
+ (r300->radeon.radeonScreen->
+ chip_flags & RADEON_CHIPSET_TCL)) {
+ /* Fixed, apply to vir0 only */
+ memcpy(vir_inputs, inputs,
+ VERT_ATTRIB_MAX * sizeof(int));
+ inputs = vir_inputs;
+
+ if (InputsRead & VERT_ATTRIB_POS)
+ inputs[VERT_ATTRIB_POS] = 0;
+
+ if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
+ inputs[VERT_ATTRIB_COLOR0] = 2;
+
+ if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
+ inputs[VERT_ATTRIB_COLOR1] = 3;
+
+ for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
+ if (InputsRead & (1 << i))
+ inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+ }
+
+ RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset,
+ inputs_bitset);
+ }
+ assert(InputsRead);
+ assert(OutputsWritten);
+
+ for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
+ if (InputsRead & (1 << i))
+ tab[nr++] = i;
+
+ if (nr > R300_MAX_AOS_ARRAYS)
+ return R300_FALLBACK_TCL;
+
+ for (i = 0; i < nr; i++) {
+ int ci;
+ int comp_size, fix, found = 0;
+
+ swizzle[i][0] = SWIZZLE_ZERO;
+ swizzle[i][1] = SWIZZLE_ZERO;
+ swizzle[i][2] = SWIZZLE_ZERO;
+ swizzle[i][3] = SWIZZLE_ONE;
+
+ for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++)
+ swizzle[i][ci] = ci;
+
+#if MESA_BIG_ENDIAN
+#define SWAP_INT(a, b) do { \
+ int __temp; \
+ __temp = a;\
+ a = b; \
+ b = __temp; \
+} while (0)
+
+ if (VB->AttribPtr[tab[i]].type == GL_UNSIGNED_BYTE) {
+ SWAP_INT(swizzle[i][0], swizzle[i][3]);
+ SWAP_INT(swizzle[i][1], swizzle[i][2]);
+ }
+#endif /* MESA_BIG_ENDIAN */
+
+ if (r300IsGartMemory(rmesa, VB->AttribPtr[tab[i]].data,
+ /*(count-1)*stride */ 4)) {
+ if (VB->AttribPtr[tab[i]].stride % 4)
+ return R300_FALLBACK_TCL;
+
+ rmesa->state.aos[i].address =
+ VB->AttribPtr[tab[i]].data;
+ rmesa->state.aos[i].start = 0;
+ rmesa->state.aos[i].aos_offset =
+ r300GartOffsetFromVirtual(rmesa,
+ VB->
+ AttribPtr[tab[i]].data);
+ rmesa->state.aos[i].aos_stride =
+ VB->AttribPtr[tab[i]].stride / 4;
+
+ rmesa->state.aos[i].aos_size =
+ t_emit_size(&VB->AttribPtr[tab[i]]);
+ } else {
+ /* TODO: r300EmitVec can only handle 4 byte vectors */
+ if (VB->AttribPtr[tab[i]].type != GL_FLOAT)
+ return R300_FALLBACK_TCL;
+
+ r300EmitVec(ctx, &rmesa->state.aos[i],
+ VB->AttribPtr[tab[i]].data,
+ t_emit_size(&VB->AttribPtr[tab[i]]),
+ VB->AttribPtr[tab[i]].stride, count);
+ }
+
+ rmesa->state.aos[i].aos_size =
+ t_aos_size(&VB->AttribPtr[tab[i]]);
+
+ comp_size = _mesa_sizeof_type(VB->AttribPtr[tab[i]].type);
+
+ for (fix = 0; fix <= 4 - VB->AttribPtr[tab[i]].size; fix++) {
+ if ((rmesa->state.aos[i].aos_offset -
+ comp_size * fix) % 4)
+ continue;
+
+ found = 1;
+ break;
+ }
+
+ if (found) {
+ if (fix > 0) {
+ WARN_ONCE("Feeling lucky?\n");
+ }
+
+ rmesa->state.aos[i].aos_offset -= comp_size * fix;
+
+ for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++)
+ swizzle[i][ci] += fix;
+ } else {
+ WARN_ONCE
+ ("Cannot handle offset %x with stride %d, comp %d\n",
+ rmesa->state.aos[i].aos_offset,
+ rmesa->state.aos[i].aos_stride,
+ VB->AttribPtr[tab[i]].size);
+ return R300_FALLBACK_TCL;
+ }
+ }
+
+ /* setup INPUT_ROUTE */
+ R300_STATECHANGE(r300, vir[0]);
+ ((drm_r300_cmd_header_t *) r300->hw.vir[0].cmd)->packet0.count =
+ t_vir0(&r300->hw.vir[0].cmd[R300_VIR_CNTL_0], VB->AttribPtr,
+ inputs, tab, nr);
+
+ R300_STATECHANGE(r300, vir[1]);
+ ((drm_r300_cmd_header_t *) r300->hw.vir[1].cmd)->packet0.count =
+ t_vir1(&r300->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr);
+
+ /* Set up input_cntl */
+ /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
+ R300_STATECHANGE(r300, vic);
+ r300->hw.vic.cmd[R300_VIC_CNTL_0] = 0x5555; /* Hard coded value, no idea what it means */
+ r300->hw.vic.cmd[R300_VIC_CNTL_1] = t_vic(ctx, InputsRead);
+
+ /* Stage 3: VAP output */
+
+ R300_STATECHANGE(r300, vof);
+
+ r300->hw.vof.cmd[R300_VOF_CNTL_0] = 0;
+ r300->hw.vof.cmd[R300_VOF_CNTL_1] = 0;
+
+ if (OutputsWritten & (1 << VERT_RESULT_HPOS))
+ r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
+ R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+
+ if (OutputsWritten & (1 << VERT_RESULT_COL0))
+ r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
+
+ if (OutputsWritten & (1 << VERT_RESULT_COL1))
+ r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
+
+ /*if(OutputsWritten & (1 << VERT_RESULT_BFC0))
+ r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
+
+ if(OutputsWritten & (1 << VERT_RESULT_BFC1))
+ r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; */
+ //if(OutputsWritten & (1 << VERT_RESULT_FOGC))
+
+ if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
+ R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+ if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i)))
+ r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4 << (3 * i));
+
+ rmesa->state.aos_count = nr;
+
+ return R300_FALLBACK_NONE;
+}
+
+#ifdef USER_BUFFERS
+void r300UseArrays(GLcontext * ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ int i;
+
+ if (rmesa->state.elt_dma.buf)
+ r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
+
+ for (i = 0; i < rmesa->state.aos_count; i++) {
+ if (rmesa->state.aos[i].buf)
+ r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
+ }
+}
+#endif
+
+void r300ReleaseArrays(GLcontext * ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ int i;
+
+ r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
+ for (i = 0; i < rmesa->state.aos_count; i++) {
+ r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
+ }
+}
diff --git a/r300/r300_emit.h b/r300/r300_emit.h
new file mode 100644
index 0000000..7be098f
--- /dev/null
+++ b/r300/r300_emit.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2005 Vladimir Dergachev.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Vladimir Dergachev <volodya@mindspring.com>
+ * Nicolai Haehnle <prefect_@gmx.net>
+ * Aapo Tahkola <aet@rasterburn.org>
+ * Ben Skeggs <darktama@iinet.net.au>
+ * Jerome Glisse <j.glisse@gmail.com>
+ */
+
+/* This files defines functions for accessing R300 hardware.
+ */
+#ifndef __R300_EMIT_H__
+#define __R300_EMIT_H__
+
+#include "glheader.h"
+#include "r300_context.h"
+#include "r300_cmdbuf.h"
+#include "radeon_reg.h"
+
+/*
+ * CP type-3 packets
+ */
+#define RADEON_CP_PACKET3_UNK1B 0xC0001B00
+#define RADEON_CP_PACKET3_INDX_BUFFER 0xC0003300
+#define RADEON_CP_PACKET3_3D_DRAW_VBUF_2 0xC0003400
+#define RADEON_CP_PACKET3_3D_DRAW_IMMD_2 0xC0003500
+#define RADEON_CP_PACKET3_3D_DRAW_INDX_2 0xC0003600
+#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00
+#define RADEON_CP_PACKET3_3D_CLEAR_ZMASK 0xC0003202
+#define RADEON_CP_PACKET3_3D_CLEAR_CMASK 0xC0003802
+#define RADEON_CP_PACKET3_3D_CLEAR_HIZ 0xC0003702
+
+#define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
+
+static __inline__ uint32_t cmdpacket0(int reg, int count)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.packet0.cmd_type = R300_CMD_PACKET0;
+ cmd.packet0.count = count;
+ cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
+ cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
+
+ return cmd.u;
+}
+
+static __inline__ uint32_t cmdvpu(int addr, int count)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.vpu.cmd_type = R300_CMD_VPU;
+ cmd.vpu.count = count;
+ cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8;
+ cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF);
+
+ return cmd.u;
+}
+
+static __inline__ uint32_t cmdpacket3(int packet)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.packet3.cmd_type = R300_CMD_PACKET3;
+ cmd.packet3.packet = packet;
+
+ return cmd.u;
+}
+
+static __inline__ uint32_t cmdcpdelay(unsigned short count)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.delay.cmd_type = R300_CMD_CP_DELAY;
+ cmd.delay.count = count;
+
+ return cmd.u;
+}
+
+static __inline__ uint32_t cmdwait(unsigned char flags)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.wait.cmd_type = R300_CMD_WAIT;
+ cmd.wait.flags = flags;
+
+ return cmd.u;
+}
+
+static __inline__ uint32_t cmdpacify(void)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.header.cmd_type = R300_CMD_END3D;
+
+ return cmd.u;
+}
+
+/**
+ * Prepare to write a register value to register at address reg.
+ * If num_extra > 0 then the following extra values are written
+ * to registers with address +4, +8 and so on..
+ */
+#define reg_start(reg, num_extra) \
+ do { \
+ int _n; \
+ _n=(num_extra); \
+ cmd = (drm_radeon_cmd_header_t*) \
+ r300AllocCmdBuf(rmesa, \
+ (_n+2), \
+ __FUNCTION__); \
+ cmd_reserved=_n+2; \
+ cmd_written=1; \
+ cmd[0].i=cmdpacket0((reg), _n+1); \
+ } while (0);
+
+/**
+ * Emit GLuint freestyle
+ */
+#define e32(dword) \
+ do { \
+ if(cmd_written<cmd_reserved) { \
+ cmd[cmd_written].i=(dword); \
+ cmd_written++; \
+ } else { \
+ fprintf(stderr, \
+ "e32 but no previous packet " \
+ "declaration.\n" \
+ "Aborting! in %s::%s at line %d, " \
+ "cmd_written=%d cmd_reserved=%d\n", \
+ __FILE__, __FUNCTION__, __LINE__, \
+ cmd_written, cmd_reserved); \
+ _mesa_exit(-1); \
+ } \
+ } while(0)
+
+#define efloat(f) e32(r300PackFloat32(f))
+
+#define vsf_start_fragment(dest, length) \
+ do { \
+ int _n; \
+ _n = (length); \
+ cmd = (drm_radeon_cmd_header_t*) \
+ r300AllocCmdBuf(rmesa, \
+ (_n+1), \
+ __FUNCTION__); \
+ cmd_reserved = _n+2; \
+ cmd_written =1; \
+ cmd[0].i = cmdvpu((dest), _n/4); \
+ } while (0);
+
+#define start_packet3(packet, count) \
+ { \
+ int _n; \
+ GLuint _p; \
+ _n = (count); \
+ _p = (packet); \
+ cmd = (drm_radeon_cmd_header_t*) \
+ r300AllocCmdBuf(rmesa, \
+ (_n+3), \
+ __FUNCTION__); \
+ cmd_reserved = _n+3; \
+ cmd_written = 2; \
+ if(_n > 0x3fff) { \
+ fprintf(stderr,"Too big packet3 %08x: cannot " \
+ "store %d dwords\n", \
+ _p, _n); \
+ _mesa_exit(-1); \
+ } \
+ cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW); \
+ cmd[1].i = _p | ((_n & 0x3fff)<<16); \
+ }
+
+/**
+ * Must be sent to switch to 2d commands
+ */
+void static inline end_3d(r300ContextPtr rmesa)
+{
+ drm_radeon_cmd_header_t *cmd = NULL;
+
+ cmd =
+ (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+ cmd[0].header.cmd_type = R300_CMD_END3D;
+}
+
+void static inline cp_delay(r300ContextPtr rmesa, unsigned short count)
+{
+ drm_radeon_cmd_header_t *cmd = NULL;
+
+ cmd =
+ (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+ cmd[0].i = cmdcpdelay(count);
+}
+
+void static inline cp_wait(r300ContextPtr rmesa, unsigned char flags)
+{
+ drm_radeon_cmd_header_t *cmd = NULL;
+
+ cmd =
+ (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+ cmd[0].i = cmdwait(flags);
+}
+
+extern int r300EmitArrays(GLcontext * ctx);
+
+#ifdef USER_BUFFERS
+void r300UseArrays(GLcontext * ctx);
+#endif
+
+extern void r300ReleaseArrays(GLcontext * ctx);
+
+#endif
diff --git a/r300/r300_fragprog.c b/r300/r300_fragprog.c
new file mode 100644
index 0000000..cce8e68
--- /dev/null
+++ b/r300/r300_fragprog.c
@@ -0,0 +1,2472 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \todo Depth write, WPOS/FOGC inputs
+ *
+ * \todo FogOption
+ *
+ * \todo Verify results of opcodes for accuracy, I've only checked them in
+ * specific cases.
+ */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+
+#include "r300_context.h"
+#include "r300_fragprog.h"
+#include "r300_reg.h"
+#include "r300_state.h"
+
+/*
+ * Usefull macros and values
+ */
+#define ERROR(fmt, args...) do { \
+ fprintf(stderr, "%s::%s(): " fmt "\n", \
+ __FILE__, __FUNCTION__, ##args); \
+ fp->error = GL_TRUE; \
+ } while(0)
+
+#define PFS_INVAL 0xFFFFFFFF
+#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
+
+#define SWIZZLE_XYZ 0
+#define SWIZZLE_XXX 1
+#define SWIZZLE_YYY 2
+#define SWIZZLE_ZZZ 3
+#define SWIZZLE_WWW 4
+#define SWIZZLE_YZX 5
+#define SWIZZLE_ZXY 6
+#define SWIZZLE_WZY 7
+#define SWIZZLE_111 8
+#define SWIZZLE_000 9
+#define SWIZZLE_HHH 10
+
+#define swizzle(r, x, y, z, w) do_swizzle(fp, r, \
+ ((SWIZZLE_##x<<0)| \
+ (SWIZZLE_##y<<3)| \
+ (SWIZZLE_##z<<6)| \
+ (SWIZZLE_##w<<9)), \
+ 0)
+
+#define REG_TYPE_INPUT 0
+#define REG_TYPE_OUTPUT 1
+#define REG_TYPE_TEMP 2
+#define REG_TYPE_CONST 3
+
+#define REG_TYPE_SHIFT 0
+#define REG_INDEX_SHIFT 2
+#define REG_VSWZ_SHIFT 8
+#define REG_SSWZ_SHIFT 13
+#define REG_NEGV_SHIFT 18
+#define REG_NEGS_SHIFT 19
+#define REG_ABS_SHIFT 20
+#define REG_NO_USE_SHIFT 21 // Hack for refcounting
+#define REG_VALID_SHIFT 22 // Does the register contain a defined value?
+#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)?
+
+#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT)
+#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT)
+#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT)
+#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT)
+#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT)
+#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT)
+#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT)
+#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT)
+#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT)
+#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT)
+
+#define REG(type, index, vswz, sswz, nouse, valid, builtin) \
+ (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \
+ ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \
+ ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \
+ ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \
+ ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \
+ ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \
+ ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
+#define REG_GET_TYPE(reg) \
+ ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT)
+#define REG_GET_INDEX(reg) \
+ ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT)
+#define REG_GET_VSWZ(reg) \
+ ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT)
+#define REG_GET_SSWZ(reg) \
+ ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT)
+#define REG_GET_NO_USE(reg) \
+ ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT)
+#define REG_GET_VALID(reg) \
+ ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT)
+#define REG_GET_BUILTIN(reg) \
+ ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT)
+#define REG_SET_TYPE(reg, type) \
+ reg = ((reg & ~REG_TYPE_MASK) | \
+ ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK))
+#define REG_SET_INDEX(reg, index) \
+ reg = ((reg & ~REG_INDEX_MASK) | \
+ ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK))
+#define REG_SET_VSWZ(reg, vswz) \
+ reg = ((reg & ~REG_VSWZ_MASK) | \
+ ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK))
+#define REG_SET_SSWZ(reg, sswz) \
+ reg = ((reg & ~REG_SSWZ_MASK) | \
+ ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
+#define REG_SET_NO_USE(reg, nouse) \
+ reg = ((reg & ~REG_NO_USE_MASK) | \
+ ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK))
+#define REG_SET_VALID(reg, valid) \
+ reg = ((reg & ~REG_VALID_MASK) | \
+ ((valid << REG_VALID_SHIFT) & REG_VALID_MASK))
+#define REG_SET_BUILTIN(reg, builtin) \
+ reg = ((reg & ~REG_BUILTIN_MASK) | \
+ ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK))
+#define REG_ABS(reg) \
+ reg = (reg | REG_ABS_MASK)
+#define REG_NEGV(reg) \
+ reg = (reg | REG_NEGV_MASK)
+#define REG_NEGS(reg) \
+ reg = (reg | REG_NEGS_MASK)
+
+/*
+ * Datas structures for fragment program generation
+ */
+
+/* description of r300 native hw instructions */
+static const struct {
+ const char *name;
+ int argc;
+ int v_op;
+ int s_op;
+} r300_fpop[] = {
+ /* *INDENT-OFF* */
+ {"MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD},
+ {"DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4},
+ {"DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4},
+ {"MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN},
+ {"MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX},
+ {"CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP},
+ {"FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC},
+ {"EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2},
+ {"LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2},
+ {"RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP},
+ {"RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ},
+ {"REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL},
+ {"CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL},
+ /* *INDENT-ON* */
+};
+
+/* vector swizzles r300 can support natively, with a couple of
+ * cases we handle specially
+ *
+ * REG_VSWZ/REG_SSWZ is an index into this table
+ */
+
+/* mapping from SWIZZLE_* to r300 native values for scalar insns */
+#define SWIZZLE_HALF 6
+
+#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
+ SWIZZLE_##y, \
+ SWIZZLE_##z, \
+ SWIZZLE_ZERO))
+/* native swizzles */
+static const struct r300_pfs_swizzle {
+ GLuint hash; /* swizzle value this matches */
+ GLuint base; /* base value for hw swizzle */
+ GLuint stride; /* difference in base between arg0/1/2 */
+ GLuint flags;
+} v_swiz[] = {
+ /* *INDENT-OFF* */
+ {MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR},
+ {MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR},
+ {MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR},
+ {MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR},
+ {MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR},
+ {MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR},
+ {MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR},
+ {MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH},
+ {MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0},
+ {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0},
+ {MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0},
+ {PFS_INVAL, 0, 0, 0},
+ /* *INDENT-ON* */
+};
+
+/* used during matching of non-native swizzles */
+#define SWZ_X_MASK (7 << 0)
+#define SWZ_Y_MASK (7 << 3)
+#define SWZ_Z_MASK (7 << 6)
+#define SWZ_W_MASK (7 << 9)
+static const struct {
+ GLuint hash; /* used to mask matching swizzle components */
+ int mask; /* actual outmask */
+ int count; /* count of components matched */
+} s_mask[] = {
+ /* *INDENT-OFF* */
+ {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3},
+ {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2},
+ {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2},
+ {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2},
+ {SWZ_X_MASK, 1, 1},
+ {SWZ_Y_MASK, 2, 1},
+ {SWZ_Z_MASK, 4, 1},
+ {PFS_INVAL, PFS_INVAL, PFS_INVAL}
+ /* *INDENT-ON* */
+};
+
+static const struct {
+ int base; /* hw value of swizzle */
+ int stride; /* difference between SRC0/1/2 */
+ GLuint flags;
+} s_swiz[] = {
+ /* *INDENT-OFF* */
+ {R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR},
+ {R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR},
+ {R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR},
+ {R300_FPI2_ARGA_SRC0A, 1, SLOT_SRC_SCALAR},
+ {R300_FPI2_ARGA_ZERO, 0, 0},
+ {R300_FPI2_ARGA_ONE, 0, 0},
+ {R300_FPI2_ARGA_HALF, 0, 0}
+ /* *INDENT-ON* */
+};
+
+/* boiler-plate reg, for convenience */
+static const GLuint undef = REG(REG_TYPE_TEMP,
+ 0,
+ SWIZZLE_XYZ,
+ SWIZZLE_W,
+ GL_FALSE,
+ GL_FALSE,
+ GL_FALSE);
+
+/* constant one source */
+static const GLuint pfs_one = REG(REG_TYPE_CONST,
+ 0,
+ SWIZZLE_111,
+ SWIZZLE_ONE,
+ GL_FALSE,
+ GL_TRUE,
+ GL_TRUE);
+
+/* constant half source */
+static const GLuint pfs_half = REG(REG_TYPE_CONST,
+ 0,
+ SWIZZLE_HHH,
+ SWIZZLE_HALF,
+ GL_FALSE,
+ GL_TRUE,
+ GL_TRUE);
+
+/* constant zero source */
+static const GLuint pfs_zero = REG(REG_TYPE_CONST,
+ 0,
+ SWIZZLE_000,
+ SWIZZLE_ZERO,
+ GL_FALSE,
+ GL_TRUE,
+ GL_TRUE);
+
+/*
+ * Common functions prototypes
+ */
+static void dump_program(struct r300_fragment_program *fp);
+static void emit_arith(struct r300_fragment_program *fp, int op,
+ GLuint dest, int mask,
+ GLuint src0, GLuint src1, GLuint src2, int flags);
+
+/**
+ * Get an R300 temporary that can be written to in the given slot.
+ */
+static int get_hw_temp(struct r300_fragment_program *fp, int slot)
+{
+ COMPILE_STATE;
+ int r;
+
+ for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
+ if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot)
+ break;
+ }
+
+ if (r >= PFS_NUM_TEMP_REGS) {
+ ERROR("Out of hardware temps\n");
+ return 0;
+ }
+ // Reserved is used to avoid the following scenario:
+ // R300 temporary X is first assigned to Mesa temporary Y during vector ops
+ // R300 temporary X is then assigned to Mesa temporary Z for further vector ops
+ // Then scalar ops on Mesa temporary Z are emitted and move back in time
+ // to overwrite the value of temporary Y.
+ // End scenario.
+ cs->hwtemps[r].reserved = cs->hwtemps[r].free;
+ cs->hwtemps[r].free = -1;
+
+ // Reset to some value that won't mess things up when the user
+ // tries to read from a temporary that hasn't been assigned a value yet.
+ // In the normal case, vector_valid and scalar_valid should be set to
+ // a sane value by the first emit that writes to this temporary.
+ cs->hwtemps[r].vector_valid = 0;
+ cs->hwtemps[r].scalar_valid = 0;
+
+ if (r > fp->max_temp_idx)
+ fp->max_temp_idx = r;
+
+ return r;
+}
+
+/**
+ * Get an R300 temporary that will act as a TEX destination register.
+ */
+static int get_hw_temp_tex(struct r300_fragment_program *fp)
+{
+ COMPILE_STATE;
+ int r;
+
+ for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
+ if (cs->used_in_node & (1 << r))
+ continue;
+
+ // Note: Be very careful here
+ if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0)
+ break;
+ }
+
+ if (r >= PFS_NUM_TEMP_REGS)
+ return get_hw_temp(fp, 0); /* Will cause an indirection */
+
+ cs->hwtemps[r].reserved = cs->hwtemps[r].free;
+ cs->hwtemps[r].free = -1;
+
+ // Reset to some value that won't mess things up when the user
+ // tries to read from a temporary that hasn't been assigned a value yet.
+ // In the normal case, vector_valid and scalar_valid should be set to
+ // a sane value by the first emit that writes to this temporary.
+ cs->hwtemps[r].vector_valid = cs->nrslots;
+ cs->hwtemps[r].scalar_valid = cs->nrslots;
+
+ if (r > fp->max_temp_idx)
+ fp->max_temp_idx = r;
+
+ return r;
+}
+
+/**
+ * Mark the given hardware register as free.
+ */
+static void free_hw_temp(struct r300_fragment_program *fp, int idx)
+{
+ COMPILE_STATE;
+
+ // Be very careful here. Consider sequences like
+ // MAD r0, r1,r2,r3
+ // TEX r4, ...
+ // The TEX instruction may be moved in front of the MAD instruction
+ // due to the way nodes work. We don't want to alias r1 and r4 in
+ // this case.
+ // I'm certain the register allocation could be further sanitized,
+ // but it's tricky because of stuff that can happen inside emit_tex
+ // and emit_arith.
+ cs->hwtemps[idx].free = cs->nrslots + 1;
+}
+
+/**
+ * Create a new Mesa temporary register.
+ */
+static GLuint get_temp_reg(struct r300_fragment_program *fp)
+{
+ COMPILE_STATE;
+ GLuint r = undef;
+ GLuint index;
+
+ index = ffs(~cs->temp_in_use);
+ if (!index) {
+ ERROR("Out of program temps\n");
+ return r;
+ }
+
+ cs->temp_in_use |= (1 << --index);
+ cs->temps[index].refcount = 0xFFFFFFFF;
+ cs->temps[index].reg = -1;
+
+ REG_SET_TYPE(r, REG_TYPE_TEMP);
+ REG_SET_INDEX(r, index);
+ REG_SET_VALID(r, GL_TRUE);
+ return r;
+}
+
+/**
+ * Create a new Mesa temporary register that will act as the destination
+ * register for a texture read.
+ */
+static GLuint get_temp_reg_tex(struct r300_fragment_program *fp)
+{
+ COMPILE_STATE;
+ GLuint r = undef;
+ GLuint index;
+
+ index = ffs(~cs->temp_in_use);
+ if (!index) {
+ ERROR("Out of program temps\n");
+ return r;
+ }
+
+ cs->temp_in_use |= (1 << --index);
+ cs->temps[index].refcount = 0xFFFFFFFF;
+ cs->temps[index].reg = get_hw_temp_tex(fp);
+
+ REG_SET_TYPE(r, REG_TYPE_TEMP);
+ REG_SET_INDEX(r, index);
+ REG_SET_VALID(r, GL_TRUE);
+ return r;
+}
+
+/**
+ * Free a Mesa temporary and the associated R300 temporary.
+ */
+static void free_temp(struct r300_fragment_program *fp, GLuint r)
+{
+ COMPILE_STATE;
+ GLuint index = REG_GET_INDEX(r);
+
+ if (!(cs->temp_in_use & (1 << index)))
+ return;
+
+ if (REG_GET_TYPE(r) == REG_TYPE_TEMP) {
+ free_hw_temp(fp, cs->temps[index].reg);
+ cs->temps[index].reg = -1;
+ cs->temp_in_use &= ~(1 << index);
+ } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) {
+ free_hw_temp(fp, cs->inputs[index].reg);
+ cs->inputs[index].reg = -1;
+ }
+}
+
+/**
+ * Emit a hardware constant/parameter.
+ *
+ * \p cp Stable pointer to an array of 4 floats.
+ * The pointer must be stable in the sense that it remains to be valid
+ * and hold the contents of the constant/parameter throughout the lifetime
+ * of the fragment program (actually, up until the next time the fragment
+ * program is translated).
+ */
+static GLuint emit_const4fv(struct r300_fragment_program *fp,
+ const GLfloat * cp)
+{
+ GLuint reg = undef;
+ int index;
+
+ for (index = 0; index < fp->const_nr; ++index) {
+ if (fp->constant[index] == cp)
+ break;
+ }
+
+ if (index >= fp->const_nr) {
+ if (index >= PFS_NUM_CONST_REGS) {
+ ERROR("Out of hw constants!\n");
+ return reg;
+ }
+
+ fp->const_nr++;
+ fp->constant[index] = cp;
+ }
+
+ REG_SET_TYPE(reg, REG_TYPE_CONST);
+ REG_SET_INDEX(reg, index);
+ REG_SET_VALID(reg, GL_TRUE);
+ return reg;
+}
+
+static inline GLuint negate(GLuint r)
+{
+ REG_NEGS(r);
+ REG_NEGV(r);
+ return r;
+}
+
+/* Hack, to prevent clobbering sources used multiple times when
+ * emulating non-native instructions
+ */
+static inline GLuint keep(GLuint r)
+{
+ REG_SET_NO_USE(r, GL_TRUE);
+ return r;
+}
+
+static inline GLuint absolute(GLuint r)
+{
+ REG_ABS(r);
+ return r;
+}
+
+static int swz_native(struct r300_fragment_program *fp,
+ GLuint src, GLuint * r, GLuint arbneg)
+{
+ /* Native swizzle, handle negation */
+ src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT);
+
+ if ((arbneg & 0x7) == 0x0) {
+ src = src & ~REG_NEGV_MASK;
+ *r = src;
+ } else if ((arbneg & 0x7) == 0x7) {
+ src |= REG_NEGV_MASK;
+ *r = src;
+ } else {
+ if (!REG_GET_VALID(*r))
+ *r = get_temp_reg(fp);
+ src |= REG_NEGV_MASK;
+ emit_arith(fp,
+ PFS_OP_MAD,
+ *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0);
+ src = src & ~REG_NEGV_MASK;
+ emit_arith(fp,
+ PFS_OP_MAD,
+ *r,
+ (arbneg ^ 0x7) | WRITEMASK_W,
+ src, pfs_one, pfs_zero, 0);
+ }
+
+ return 3;
+}
+
+static int swz_emit_partial(struct r300_fragment_program *fp,
+ GLuint src,
+ GLuint * r, int mask, int mc, GLuint arbneg)
+{
+ GLuint tmp;
+ GLuint wmask = 0;
+
+ if (!REG_GET_VALID(*r))
+ *r = get_temp_reg(fp);
+
+ /* A partial match, VSWZ/mask define what parts of the
+ * desired swizzle we match
+ */
+ if (mc + s_mask[mask].count == 3) {
+ wmask = WRITEMASK_W;
+ src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT;
+ }
+
+ tmp = arbneg & s_mask[mask].mask;
+ if (tmp) {
+ tmp = tmp ^ s_mask[mask].mask;
+ if (tmp) {
+ emit_arith(fp,
+ PFS_OP_MAD,
+ *r,
+ arbneg & s_mask[mask].mask,
+ keep(src) | REG_NEGV_MASK,
+ pfs_one, pfs_zero, 0);
+ if (!wmask) {
+ REG_SET_NO_USE(src, GL_TRUE);
+ } else {
+ REG_SET_NO_USE(src, GL_FALSE);
+ }
+ emit_arith(fp,
+ PFS_OP_MAD,
+ *r, tmp | wmask, src, pfs_one, pfs_zero, 0);
+ } else {
+ if (!wmask) {
+ REG_SET_NO_USE(src, GL_TRUE);
+ } else {
+ REG_SET_NO_USE(src, GL_FALSE);
+ }
+ emit_arith(fp,
+ PFS_OP_MAD,
+ *r,
+ (arbneg & s_mask[mask].mask) | wmask,
+ src | REG_NEGV_MASK, pfs_one, pfs_zero, 0);
+ }
+ } else {
+ if (!wmask) {
+ REG_SET_NO_USE(src, GL_TRUE);
+ } else {
+ REG_SET_NO_USE(src, GL_FALSE);
+ }
+ emit_arith(fp, PFS_OP_MAD,
+ *r,
+ s_mask[mask].mask | wmask,
+ src, pfs_one, pfs_zero, 0);
+ }
+
+ return s_mask[mask].count;
+}
+
+static GLuint do_swizzle(struct r300_fragment_program *fp,
+ GLuint src, GLuint arbswz, GLuint arbneg)
+{
+ GLuint r = undef;
+ GLuint vswz;
+ int c_mask = 0;
+ int v_match = 0;
+
+ /* If swizzling from something without an XYZW native swizzle,
+ * emit result to a temp, and do new swizzle from the temp.
+ */
+#if 0
+ if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) {
+ GLuint temp = get_temp_reg(fp);
+ emit_arith(fp,
+ PFS_OP_MAD,
+ temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0);
+ src = temp;
+ }
+#endif
+
+ if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) {
+ GLuint vsrcswz =
+ (v_swiz[REG_GET_VSWZ(src)].
+ hash & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK)) |
+ REG_GET_SSWZ(src) << 9;
+ GLint i;
+
+ GLuint newswz = 0;
+ GLuint offset;
+ for (i = 0; i < 4; ++i) {
+ offset = GET_SWZ(arbswz, i);
+
+ newswz |=
+ (offset <= 3) ? GET_SWZ(vsrcswz,
+ offset) << i *
+ 3 : offset << i * 3;
+ }
+
+ arbswz = newswz & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK);
+ REG_SET_SSWZ(src, GET_SWZ(newswz, 3));
+ } else {
+ /* set scalar swizzling */
+ REG_SET_SSWZ(src, GET_SWZ(arbswz, 3));
+
+ }
+ do {
+ vswz = REG_GET_VSWZ(src);
+ do {
+ int chash;
+
+ REG_SET_VSWZ(src, vswz);
+ chash = v_swiz[REG_GET_VSWZ(src)].hash &
+ s_mask[c_mask].hash;
+
+ if (chash == (arbswz & s_mask[c_mask].hash)) {
+ if (s_mask[c_mask].count == 3) {
+ v_match += swz_native(fp,
+ src, &r, arbneg);
+ } else {
+ v_match += swz_emit_partial(fp,
+ src,
+ &r,
+ c_mask,
+ v_match,
+ arbneg);
+ }
+
+ if (v_match == 3)
+ return r;
+
+ /* Fill with something invalid.. all 0's was
+ * wrong before, matched SWIZZLE_X. So all
+ * 1's will be okay for now
+ */
+ arbswz |= (PFS_INVAL & s_mask[c_mask].hash);
+ }
+ } while (v_swiz[++vswz].hash != PFS_INVAL);
+ REG_SET_VSWZ(src, SWIZZLE_XYZ);
+ } while (s_mask[++c_mask].hash != PFS_INVAL);
+
+ ERROR("should NEVER get here\n");
+ return r;
+}
+
+static GLuint t_src(struct r300_fragment_program *fp,
+ struct prog_src_register fpsrc)
+{
+ GLuint r = undef;
+
+ switch (fpsrc.File) {
+ case PROGRAM_TEMPORARY:
+ REG_SET_INDEX(r, fpsrc.Index);
+ REG_SET_VALID(r, GL_TRUE);
+ REG_SET_TYPE(r, REG_TYPE_TEMP);
+ break;
+ case PROGRAM_INPUT:
+ REG_SET_INDEX(r, fpsrc.Index);
+ REG_SET_VALID(r, GL_TRUE);
+ REG_SET_TYPE(r, REG_TYPE_INPUT);
+ break;
+ case PROGRAM_LOCAL_PARAM:
+ r = emit_const4fv(fp,
+ fp->mesa_program.Base.LocalParams[fpsrc.
+ Index]);
+ break;
+ case PROGRAM_ENV_PARAM:
+ r = emit_const4fv(fp,
+ fp->ctx->FragmentProgram.Parameters[fpsrc.
+ Index]);
+ break;
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ r = emit_const4fv(fp,
+ fp->mesa_program.Base.Parameters->
+ ParameterValues[fpsrc.Index]);
+ break;
+ default:
+ ERROR("unknown SrcReg->File %x\n", fpsrc.File);
+ return r;
+ }
+
+ /* no point swizzling ONE/ZERO/HALF constants... */
+ if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO)
+ r = do_swizzle(fp, r, fpsrc.Swizzle, fpsrc.NegateBase);
+ return r;
+}
+
+static GLuint t_scalar_src(struct r300_fragment_program *fp,
+ struct prog_src_register fpsrc)
+{
+ struct prog_src_register src = fpsrc;
+ int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */
+
+ src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9));
+
+ return t_src(fp, src);
+}
+
+static GLuint t_dst(struct r300_fragment_program *fp,
+ struct prog_dst_register dest)
+{
+ GLuint r = undef;
+
+ switch (dest.File) {
+ case PROGRAM_TEMPORARY:
+ REG_SET_INDEX(r, dest.Index);
+ REG_SET_VALID(r, GL_TRUE);
+ REG_SET_TYPE(r, REG_TYPE_TEMP);
+ return r;
+ case PROGRAM_OUTPUT:
+ REG_SET_TYPE(r, REG_TYPE_OUTPUT);
+ switch (dest.Index) {
+ case FRAG_RESULT_COLR:
+ case FRAG_RESULT_DEPR:
+ REG_SET_INDEX(r, dest.Index);
+ REG_SET_VALID(r, GL_TRUE);
+ return r;
+ default:
+ ERROR("Bad DstReg->Index 0x%x\n", dest.Index);
+ return r;
+ }
+ default:
+ ERROR("Bad DstReg->File 0x%x\n", dest.File);
+ return r;
+ }
+}
+
+static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex)
+{
+ COMPILE_STATE;
+ int idx;
+ int index = REG_GET_INDEX(src);
+
+ switch (REG_GET_TYPE(src)) {
+ case REG_TYPE_TEMP:
+ /* NOTE: if reg==-1 here, a source is being read that
+ * hasn't been written to. Undefined results.
+ */
+ if (cs->temps[index].reg == -1)
+ cs->temps[index].reg = get_hw_temp(fp, cs->nrslots);
+
+ idx = cs->temps[index].reg;
+
+ if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0))
+ free_temp(fp, src);
+ break;
+ case REG_TYPE_INPUT:
+ idx = cs->inputs[index].reg;
+
+ if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0))
+ free_hw_temp(fp, cs->inputs[index].reg);
+ break;
+ case REG_TYPE_CONST:
+ return (index | SRC_CONST);
+ default:
+ ERROR("Invalid type for source reg\n");
+ return (0 | SRC_CONST);
+ }
+
+ if (!tex)
+ cs->used_in_node |= (1 << idx);
+
+ return idx;
+}
+
+static int t_hw_dst(struct r300_fragment_program *fp,
+ GLuint dest, GLboolean tex, int slot)
+{
+ COMPILE_STATE;
+ int idx;
+ GLuint index = REG_GET_INDEX(dest);
+ assert(REG_GET_VALID(dest));
+
+ switch (REG_GET_TYPE(dest)) {
+ case REG_TYPE_TEMP:
+ if (cs->temps[REG_GET_INDEX(dest)].reg == -1) {
+ if (!tex) {
+ cs->temps[index].reg = get_hw_temp(fp, slot);
+ } else {
+ cs->temps[index].reg = get_hw_temp_tex(fp);
+ }
+ }
+ idx = cs->temps[index].reg;
+
+ if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0))
+ free_temp(fp, dest);
+
+ cs->dest_in_node |= (1 << idx);
+ cs->used_in_node |= (1 << idx);
+ break;
+ case REG_TYPE_OUTPUT:
+ switch (index) {
+ case FRAG_RESULT_COLR:
+ fp->node[fp->cur_node].flags |=
+ R300_PFS_NODE_OUTPUT_COLOR;
+ break;
+ case FRAG_RESULT_DEPR:
+ fp->node[fp->cur_node].flags |=
+ R300_PFS_NODE_OUTPUT_DEPTH;
+ break;
+ }
+ return index;
+ break;
+ default:
+ ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest));
+ return 0;
+ }
+
+ return idx;
+}
+
+static void emit_nop(struct r300_fragment_program *fp)
+{
+ COMPILE_STATE;
+
+ if (cs->nrslots >= PFS_MAX_ALU_INST) {
+ ERROR("Out of ALU instruction slots\n");
+ return;
+ }
+
+ fp->alu.inst[cs->nrslots].inst0 = NOP_INST0;
+ fp->alu.inst[cs->nrslots].inst1 = NOP_INST1;
+ fp->alu.inst[cs->nrslots].inst2 = NOP_INST2;
+ fp->alu.inst[cs->nrslots].inst3 = NOP_INST3;
+ cs->nrslots++;
+}
+
+static void emit_tex(struct r300_fragment_program *fp,
+ struct prog_instruction *fpi, int opcode)
+{
+ COMPILE_STATE;
+ GLuint coord = t_src(fp, fpi->SrcReg[0]);
+ GLuint dest = undef, rdest = undef;
+ GLuint din, uin;
+ int unit = fpi->TexSrcUnit;
+ int hwsrc, hwdest;
+ GLuint tempreg = 0;
+
+ uin = cs->used_in_node;
+ din = cs->dest_in_node;
+
+ /* Resolve source/dest to hardware registers */
+ if (opcode != R300_FPITX_OP_KIL) {
+ if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) {
+ /**
+ * Hardware uses [0..1]x[0..1] range for rectangle textures
+ * instead of [0..Width]x[0..Height].
+ * Add a scaling instruction.
+ *
+ * \todo Refactor this once we have proper rewriting/optimization
+ * support for programs.
+ */
+ gl_state_index tokens[STATE_LENGTH] = {
+ STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
+ 0
+ };
+ int factor_index;
+ GLuint factorreg;
+
+ tokens[2] = unit;
+ factor_index =
+ _mesa_add_state_reference(fp->mesa_program.Base.
+ Parameters, tokens);
+ factorreg =
+ emit_const4fv(fp,
+ fp->mesa_program.Base.Parameters->
+ ParameterValues[factor_index]);
+ tempreg = keep(get_temp_reg(fp));
+
+ emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
+ coord, factorreg, pfs_zero, 0);
+
+ /* Ensure correct node indirection */
+ uin = cs->used_in_node;
+ din = cs->dest_in_node;
+
+ hwsrc = t_hw_src(fp, tempreg, GL_TRUE);
+ } else {
+ hwsrc = t_hw_src(fp, coord, GL_TRUE);
+ }
+
+ dest = t_dst(fp, fpi->DstReg);
+
+ /* r300 doesn't seem to be able to do TEX->output reg */
+ if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
+ rdest = dest;
+ dest = get_temp_reg_tex(fp);
+ }
+ hwdest =
+ t_hw_dst(fp, dest, GL_TRUE,
+ fp->node[fp->cur_node].alu_offset);
+
+ /* Use a temp that hasn't been used in this node, rather
+ * than causing an indirection
+ */
+ if (uin & (1 << hwdest)) {
+ free_hw_temp(fp, hwdest);
+ hwdest = get_hw_temp_tex(fp);
+ cs->temps[REG_GET_INDEX(dest)].reg = hwdest;
+ }
+ } else {
+ hwdest = 0;
+ unit = 0;
+ hwsrc = t_hw_src(fp, coord, GL_TRUE);
+ }
+
+ /* Indirection if source has been written in this node, or if the
+ * dest has been read/written in this node
+ */
+ if ((REG_GET_TYPE(coord) != REG_TYPE_CONST &&
+ (din & (1 << hwsrc))) || (uin & (1 << hwdest))) {
+
+ /* Finish off current node */
+ if (fp->node[fp->cur_node].alu_offset == cs->nrslots)
+ emit_nop(fp);
+
+ fp->node[fp->cur_node].alu_end =
+ cs->nrslots - fp->node[fp->cur_node].alu_offset - 1;
+ assert(fp->node[fp->cur_node].alu_end >= 0);
+
+ if (++fp->cur_node >= PFS_MAX_TEX_INDIRECT) {
+ ERROR("too many levels of texture indirection\n");
+ return;
+ }
+
+ /* Start new node */
+ fp->node[fp->cur_node].tex_offset = fp->tex.length;
+ fp->node[fp->cur_node].alu_offset = cs->nrslots;
+ fp->node[fp->cur_node].tex_end = -1;
+ fp->node[fp->cur_node].alu_end = -1;
+ fp->node[fp->cur_node].flags = 0;
+ cs->used_in_node = 0;
+ cs->dest_in_node = 0;
+ }
+
+ if (fp->cur_node == 0)
+ fp->first_node_has_tex = 1;
+
+ fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT)
+ | (hwdest << R300_FPITX_DST_SHIFT)
+ | (unit << R300_FPITX_IMAGE_SHIFT)
+ /* not entirely sure about this */
+ | (opcode << R300_FPITX_OPCODE_SHIFT);
+
+ cs->dest_in_node |= (1 << hwdest);
+ if (REG_GET_TYPE(coord) != REG_TYPE_CONST)
+ cs->used_in_node |= (1 << hwsrc);
+
+ fp->node[fp->cur_node].tex_end++;
+
+ /* Copy from temp to output if needed */
+ if (REG_GET_VALID(rdest)) {
+ emit_arith(fp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest,
+ pfs_one, pfs_zero, 0);
+ free_temp(fp, dest);
+ }
+
+ /* Free temp register */
+ if (tempreg != 0)
+ free_temp(fp, tempreg);
+}
+
+/**
+ * Returns the first slot where we could possibly allow writing to dest,
+ * according to register allocation.
+ */
+static int get_earliest_allowed_write(struct r300_fragment_program *fp,
+ GLuint dest, int mask)
+{
+ COMPILE_STATE;
+ int idx;
+ int pos;
+ GLuint index = REG_GET_INDEX(dest);
+ assert(REG_GET_VALID(dest));
+
+ switch (REG_GET_TYPE(dest)) {
+ case REG_TYPE_TEMP:
+ if (cs->temps[index].reg == -1)
+ return 0;
+
+ idx = cs->temps[index].reg;
+ break;
+ case REG_TYPE_OUTPUT:
+ return 0;
+ default:
+ ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest));
+ return 0;
+ }
+
+ pos = cs->hwtemps[idx].reserved;
+ if (mask & WRITEMASK_XYZ) {
+ if (pos < cs->hwtemps[idx].vector_lastread)
+ pos = cs->hwtemps[idx].vector_lastread;
+ }
+ if (mask & WRITEMASK_W) {
+ if (pos < cs->hwtemps[idx].scalar_lastread)
+ pos = cs->hwtemps[idx].scalar_lastread;
+ }
+
+ return pos;
+}
+
+/**
+ * Allocates a slot for an ALU instruction that can consist of
+ * a vertex part or a scalar part or both.
+ *
+ * Sources from src (src[0] to src[argc-1]) are added to the slot in the
+ * appropriate position (vector and/or scalar), and their positions are
+ * recorded in the srcpos array.
+ *
+ * This function emits instruction code for the source fetch and the
+ * argument selection. It does not emit instruction code for the
+ * opcode or the destination selection.
+ *
+ * @return the index of the slot
+ */
+static int find_and_prepare_slot(struct r300_fragment_program *fp,
+ GLboolean emit_vop,
+ GLboolean emit_sop,
+ int argc, GLuint * src, GLuint dest, int mask)
+{
+ COMPILE_STATE;
+ int hwsrc[3];
+ int srcpos[3];
+ unsigned int used;
+ int tempused;
+ int tempvsrc[3];
+ int tempssrc[3];
+ int pos;
+ int regnr;
+ int i, j;
+
+ // Determine instruction slots, whether sources are required on
+ // vector or scalar side, and the smallest slot number where
+ // all source registers are available
+ used = 0;
+ if (emit_vop)
+ used |= SLOT_OP_VECTOR;
+ if (emit_sop)
+ used |= SLOT_OP_SCALAR;
+
+ pos = get_earliest_allowed_write(fp, dest, mask);
+
+ if (fp->node[fp->cur_node].alu_offset > pos)
+ pos = fp->node[fp->cur_node].alu_offset;
+ for (i = 0; i < argc; ++i) {
+ if (!REG_GET_BUILTIN(src[i])) {
+ if (emit_vop)
+ used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i;
+ if (emit_sop)
+ used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i;
+ }
+
+ hwsrc[i] = t_hw_src(fp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */
+ regnr = hwsrc[i] & 31;
+
+ if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) {
+ if (used & (SLOT_SRC_VECTOR << i)) {
+ if (cs->hwtemps[regnr].vector_valid > pos)
+ pos = cs->hwtemps[regnr].vector_valid;
+ }
+ if (used & (SLOT_SRC_SCALAR << i)) {
+ if (cs->hwtemps[regnr].scalar_valid > pos)
+ pos = cs->hwtemps[regnr].scalar_valid;
+ }
+ }
+ }
+
+ // Find a slot that fits
+ for (;; ++pos) {
+ if (cs->slot[pos].used & used & SLOT_OP_BOTH)
+ continue;
+
+ if (pos >= cs->nrslots) {
+ if (cs->nrslots >= PFS_MAX_ALU_INST) {
+ ERROR("Out of ALU instruction slots\n");
+ return -1;
+ }
+
+ fp->alu.inst[pos].inst0 = NOP_INST0;
+ fp->alu.inst[pos].inst1 = NOP_INST1;
+ fp->alu.inst[pos].inst2 = NOP_INST2;
+ fp->alu.inst[pos].inst3 = NOP_INST3;
+
+ cs->nrslots++;
+ }
+ // Note: When we need both parts (vector and scalar) of a source,
+ // we always try to put them into the same position. This makes the
+ // code easier to read, and it is optimal (i.e. one doesn't gain
+ // anything by splitting the parts).
+ // It also avoids headaches with swizzles that access both parts (i.e WXY)
+ tempused = cs->slot[pos].used;
+ for (i = 0; i < 3; ++i) {
+ tempvsrc[i] = cs->slot[pos].vsrc[i];
+ tempssrc[i] = cs->slot[pos].ssrc[i];
+ }
+
+ for (i = 0; i < argc; ++i) {
+ int flags = (used >> i) & SLOT_SRC_BOTH;
+
+ if (!flags) {
+ srcpos[i] = 0;
+ continue;
+ }
+
+ for (j = 0; j < 3; ++j) {
+ if ((tempused >> j) & flags & SLOT_SRC_VECTOR) {
+ if (tempvsrc[j] != hwsrc[i])
+ continue;
+ }
+
+ if ((tempused >> j) & flags & SLOT_SRC_SCALAR) {
+ if (tempssrc[j] != hwsrc[i])
+ continue;
+ }
+
+ break;
+ }
+
+ if (j == 3)
+ break;
+
+ srcpos[i] = j;
+ tempused |= flags << j;
+ if (flags & SLOT_SRC_VECTOR)
+ tempvsrc[j] = hwsrc[i];
+ if (flags & SLOT_SRC_SCALAR)
+ tempssrc[j] = hwsrc[i];
+ }
+
+ if (i == argc)
+ break;
+ }
+
+ // Found a slot, reserve it
+ cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH);
+ for (i = 0; i < 3; ++i) {
+ cs->slot[pos].vsrc[i] = tempvsrc[i];
+ cs->slot[pos].ssrc[i] = tempssrc[i];
+ }
+
+ for (i = 0; i < argc; ++i) {
+ if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) {
+ int regnr = hwsrc[i] & 31;
+
+ if (used & (SLOT_SRC_VECTOR << i)) {
+ if (cs->hwtemps[regnr].vector_lastread < pos)
+ cs->hwtemps[regnr].vector_lastread =
+ pos;
+ }
+ if (used & (SLOT_SRC_SCALAR << i)) {
+ if (cs->hwtemps[regnr].scalar_lastread < pos)
+ cs->hwtemps[regnr].scalar_lastread =
+ pos;
+ }
+ }
+ }
+
+ // Emit the source fetch code
+ fp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK;
+ fp->alu.inst[pos].inst1 |=
+ ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) |
+ (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) |
+ (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT));
+
+ fp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK;
+ fp->alu.inst[pos].inst3 |=
+ ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) |
+ (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) |
+ (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT));
+
+ // Emit the argument selection code
+ if (emit_vop) {
+ int swz[3];
+
+ for (i = 0; i < 3; ++i) {
+ if (i < argc) {
+ swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base +
+ (srcpos[i] *
+ v_swiz[REG_GET_VSWZ(src[i])].
+ stride)) | ((src[i] & REG_NEGV_MASK)
+ ? ARG_NEG : 0) | ((src[i]
+ &
+ REG_ABS_MASK)
+ ?
+ ARG_ABS
+ : 0);
+ } else {
+ swz[i] = R300_FPI0_ARGC_ZERO;
+ }
+ }
+
+ fp->alu.inst[pos].inst0 &=
+ ~(R300_FPI0_ARG0C_MASK | R300_FPI0_ARG1C_MASK |
+ R300_FPI0_ARG2C_MASK);
+ fp->alu.inst[pos].inst0 |=
+ (swz[0] << R300_FPI0_ARG0C_SHIFT) | (swz[1] <<
+ R300_FPI0_ARG1C_SHIFT)
+ | (swz[2] << R300_FPI0_ARG2C_SHIFT);
+ }
+
+ if (emit_sop) {
+ int swz[3];
+
+ for (i = 0; i < 3; ++i) {
+ if (i < argc) {
+ swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base +
+ (srcpos[i] *
+ s_swiz[REG_GET_SSWZ(src[i])].
+ stride)) | ((src[i] & REG_NEGV_MASK)
+ ? ARG_NEG : 0) | ((src[i]
+ &
+ REG_ABS_MASK)
+ ?
+ ARG_ABS
+ : 0);
+ } else {
+ swz[i] = R300_FPI2_ARGA_ZERO;
+ }
+ }
+
+ fp->alu.inst[pos].inst2 &=
+ ~(R300_FPI2_ARG0A_MASK | R300_FPI2_ARG1A_MASK |
+ R300_FPI2_ARG2A_MASK);
+ fp->alu.inst[pos].inst2 |=
+ (swz[0] << R300_FPI2_ARG0A_SHIFT) | (swz[1] <<
+ R300_FPI2_ARG1A_SHIFT)
+ | (swz[2] << R300_FPI2_ARG2A_SHIFT);
+ }
+
+ return pos;
+}
+
+/**
+ * Append an ALU instruction to the instruction list.
+ */
+static void emit_arith(struct r300_fragment_program *fp,
+ int op,
+ GLuint dest,
+ int mask,
+ GLuint src0, GLuint src1, GLuint src2, int flags)
+{
+ COMPILE_STATE;
+ GLuint src[3] = { src0, src1, src2 };
+ int hwdest;
+ GLboolean emit_vop, emit_sop;
+ int vop, sop, argc;
+ int pos;
+
+ vop = r300_fpop[op].v_op;
+ sop = r300_fpop[op].s_op;
+ argc = r300_fpop[op].argc;
+
+ if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT &&
+ REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) {
+ if (mask & WRITEMASK_Z) {
+ mask = WRITEMASK_W;
+ } else {
+ return;
+ }
+ }
+
+ emit_vop = GL_FALSE;
+ emit_sop = GL_FALSE;
+ if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3)
+ emit_vop = GL_TRUE;
+ if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA)
+ emit_sop = GL_TRUE;
+
+ pos =
+ find_and_prepare_slot(fp, emit_vop, emit_sop, argc, src, dest,
+ mask);
+ if (pos < 0)
+ return;
+
+ hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */
+
+ if (flags & PFS_FLAG_SAT) {
+ vop |= R300_FPI0_OUTC_SAT;
+ sop |= R300_FPI2_OUTA_SAT;
+ }
+
+ /* Throw the pieces together and get FPI0/1 */
+ if (emit_vop) {
+ fp->alu.inst[pos].inst0 |= vop;
+
+ fp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT;
+
+ if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
+ if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
+ fp->alu.inst[pos].inst1 |=
+ (mask & WRITEMASK_XYZ) <<
+ R300_FPI1_DSTC_OUTPUT_MASK_SHIFT;
+ } else
+ assert(0);
+ } else {
+ fp->alu.inst[pos].inst1 |=
+ (mask & WRITEMASK_XYZ) <<
+ R300_FPI1_DSTC_REG_MASK_SHIFT;
+
+ cs->hwtemps[hwdest].vector_valid = pos + 1;
+ }
+ }
+
+ /* And now FPI2/3 */
+ if (emit_sop) {
+ fp->alu.inst[pos].inst2 |= sop;
+
+ if (mask & WRITEMASK_W) {
+ if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
+ if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
+ fp->alu.inst[pos].inst3 |=
+ (hwdest << R300_FPI3_DSTA_SHIFT) |
+ R300_FPI3_DSTA_OUTPUT;
+ } else if (REG_GET_INDEX(dest) ==
+ FRAG_RESULT_DEPR) {
+ fp->alu.inst[pos].inst3 |=
+ R300_FPI3_DSTA_DEPTH;
+ } else
+ assert(0);
+ } else {
+ fp->alu.inst[pos].inst3 |=
+ (hwdest << R300_FPI3_DSTA_SHIFT) |
+ R300_FPI3_DSTA_REG;
+
+ cs->hwtemps[hwdest].scalar_valid = pos + 1;
+ }
+ }
+ }
+
+ return;
+}
+
+#if 0
+static GLuint get_attrib(struct r300_fragment_program *fp, GLuint attr)
+{
+ struct gl_fragment_program *mp = &fp->mesa_program;
+ GLuint r = undef;
+
+ if (!(mp->Base.InputsRead & (1 << attr))) {
+ ERROR("Attribute %d was not provided!\n", attr);
+ return undef;
+ }
+
+ REG_SET_TYPE(r, REG_TYPE_INPUT);
+ REG_SET_INDEX(r, attr);
+ REG_SET_VALID(r, GL_TRUE);
+ return r;
+}
+#endif
+
+static GLfloat SinCosConsts[2][4] = {
+ {
+ 1.273239545, // 4/PI
+ -0.405284735, // -4/(PI*PI)
+ 3.141592654, // PI
+ 0.2225 // weight
+ },
+ {
+ 0.75,
+ 0.0,
+ 0.159154943, // 1/(2*PI)
+ 6.283185307 // 2*PI
+ }
+};
+
+/**
+ * Emit a LIT instruction.
+ * \p flags may be PFS_FLAG_SAT
+ *
+ * Definition of LIT (from ARB_fragment_program):
+ * tmp = VectorLoad(op0);
+ * if (tmp.x < 0) tmp.x = 0;
+ * if (tmp.y < 0) tmp.y = 0;
+ * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ * result.x = 1.0;
+ * result.y = tmp.x;
+ * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ * result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots. So unless there's some special undocumented opcode,
+ * this implementation is potentially optimal. Unfortunately,
+ * emit_arith is a bit too conservative because it doesn't understand
+ * partial writes to the vector component.
+ */
+static const GLfloat LitConst[4] =
+ { 127.999999, 127.999999, 127.999999, -127.999999 };
+
+static void emit_lit(struct r300_fragment_program *fp,
+ GLuint dest, int mask, GLuint src, int flags)
+{
+ COMPILE_STATE;
+ GLuint cnst;
+ int needTemporary;
+ GLuint temp;
+
+ cnst = emit_const4fv(fp, LitConst);
+
+ needTemporary = 0;
+ if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) {
+ needTemporary = 1;
+ } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
+ // LIT is typically followed by DP3/DP4, so there's no point
+ // in creating special code for this case
+ needTemporary = 1;
+ }
+
+ if (needTemporary) {
+ temp = keep(get_temp_reg(fp));
+ } else {
+ temp = keep(dest);
+ }
+
+ // Note: The order of emit_arith inside the slots is relevant,
+ // because emit_arith only looks at scalar vs. vector when resolving
+ // dependencies, and it does not consider individual vector components,
+ // so swizzling between the two parts can create fake dependencies.
+
+ // First slot
+ emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_XY,
+ keep(src), pfs_zero, undef, 0);
+ emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0);
+
+ // Second slot
+ emit_arith(fp, PFS_OP_MIN, temp, WRITEMASK_Z,
+ swizzle(temp, W, W, W, W), cnst, undef, 0);
+ emit_arith(fp, PFS_OP_LG2, temp, WRITEMASK_W,
+ swizzle(temp, Y, Y, Y, Y), undef, undef, 0);
+
+ // Third slot
+ // If desired, we saturate the y result here.
+ // This does not affect the use as a condition variable in the CMP later
+ emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W,
+ temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0);
+ emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_Y,
+ swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags);
+
+ // Fourth slot
+ emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_X,
+ pfs_one, pfs_one, pfs_zero, 0);
+ emit_arith(fp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0);
+
+ // Fifth slot
+ emit_arith(fp, PFS_OP_CMP, temp, WRITEMASK_Z,
+ pfs_zero, swizzle(temp, W, W, W, W),
+ negate(swizzle(temp, Y, Y, Y, Y)), flags);
+ emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one,
+ pfs_zero, 0);
+
+ if (needTemporary) {
+ emit_arith(fp, PFS_OP_MAD, dest, mask,
+ temp, pfs_one, pfs_zero, flags);
+ free_temp(fp, temp);
+ } else {
+ // Decrease refcount of the destination
+ t_hw_dst(fp, dest, GL_FALSE, cs->nrslots);
+ }
+}
+
+static GLboolean parse_program(struct r300_fragment_program *fp)
+{
+ struct gl_fragment_program *mp = &fp->mesa_program;
+ const struct prog_instruction *inst = mp->Base.Instructions;
+ struct prog_instruction *fpi;
+ GLuint src[3], dest, temp[2];
+ int flags, mask = 0;
+ int const_sin[2];
+
+ if (!inst || inst[0].Opcode == OPCODE_END) {
+ ERROR("empty program?\n");
+ return GL_FALSE;
+ }
+
+ for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
+ if (fpi->SaturateMode == SATURATE_ZERO_ONE)
+ flags = PFS_FLAG_SAT;
+ else
+ flags = 0;
+
+ if (fpi->Opcode != OPCODE_KIL) {
+ dest = t_dst(fp, fpi->DstReg);
+ mask = fpi->DstReg.WriteMask;
+ }
+
+ switch (fpi->Opcode) {
+ case OPCODE_ABS:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ emit_arith(fp, PFS_OP_MAD, dest, mask,
+ absolute(src[0]), pfs_one, pfs_zero, flags);
+ break;
+ case OPCODE_ADD:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ emit_arith(fp, PFS_OP_MAD, dest, mask,
+ src[0], pfs_one, src[1], flags);
+ break;
+ case OPCODE_CMP:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ src[2] = t_src(fp, fpi->SrcReg[2]);
+ /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c
+ * r300 - if src2.c < 0.0 ? src1.c : src0.c
+ */
+ emit_arith(fp, PFS_OP_CMP, dest, mask,
+ src[2], src[1], src[0], flags);
+ break;
+ case OPCODE_COS:
+ /*
+ * cos using a parabola (see SIN):
+ * cos(x):
+ * x = (x/(2*PI))+0.75
+ * x = frac(x)
+ * x = (x*2*PI)-PI
+ * result = sin(x)
+ */
+ temp[0] = get_temp_reg(fp);
+ const_sin[0] = emit_const4fv(fp, SinCosConsts[0]);
+ const_sin[1] = emit_const4fv(fp, SinCosConsts[1]);
+ src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
+
+ /* add 0.5*PI and do range reduction */
+
+ emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
+ swizzle(src[0], X, X, X, X),
+ swizzle(const_sin[1], Z, Z, Z, Z),
+ swizzle(const_sin[1], X, X, X, X), 0);
+
+ emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X,
+ swizzle(temp[0], X, X, X, X),
+ undef, undef, 0);
+
+ emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI
+ negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI
+ 0);
+
+ /* SIN */
+
+ emit_arith(fp, PFS_OP_MAD, temp[0],
+ WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
+ Z, Z, Z,
+ Z),
+ const_sin[0], pfs_zero, 0);
+
+ emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
+ swizzle(temp[0], Y, Y, Y, Y),
+ absolute(swizzle(temp[0], Z, Z, Z, Z)),
+ swizzle(temp[0], X, X, X, X), 0);
+
+ emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y,
+ swizzle(temp[0], X, X, X, X),
+ absolute(swizzle(temp[0], X, X, X, X)),
+ negate(swizzle(temp[0], X, X, X, X)), 0);
+
+ emit_arith(fp, PFS_OP_MAD, dest, mask,
+ swizzle(temp[0], Y, Y, Y, Y),
+ swizzle(const_sin[0], W, W, W, W),
+ swizzle(temp[0], X, X, X, X), flags);
+
+ free_temp(fp, temp[0]);
+ break;
+ case OPCODE_DP3:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ emit_arith(fp, PFS_OP_DP3, dest, mask,
+ src[0], src[1], undef, flags);
+ break;
+ case OPCODE_DP4:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ emit_arith(fp, PFS_OP_DP4, dest, mask,
+ src[0], src[1], undef, flags);
+ break;
+ case OPCODE_DPH:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ /* src0.xyz1 -> temp
+ * DP4 dest, temp, src1
+ */
+#if 0
+ temp[0] = get_temp_reg(fp);
+ src[0].s_swz = SWIZZLE_ONE;
+ emit_arith(fp, PFS_OP_MAD, temp[0], mask,
+ src[0], pfs_one, pfs_zero, 0);
+ emit_arith(fp, PFS_OP_DP4, dest, mask,
+ temp[0], src[1], undef, flags);
+ free_temp(fp, temp[0]);
+#else
+ emit_arith(fp, PFS_OP_DP4, dest, mask,
+ swizzle(src[0], X, Y, Z, ONE), src[1],
+ undef, flags);
+#endif
+ break;
+ case OPCODE_DST:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ /* dest.y = src0.y * src1.y */
+ if (mask & WRITEMASK_Y)
+ emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Y,
+ keep(src[0]), keep(src[1]),
+ pfs_zero, flags);
+ /* dest.z = src0.z */
+ if (mask & WRITEMASK_Z)
+ emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Z,
+ src[0], pfs_one, pfs_zero, flags);
+ /* result.x = 1.0
+ * result.w = src1.w */
+ if (mask & WRITEMASK_XW) {
+ REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */
+ emit_arith(fp, PFS_OP_MAD, dest,
+ mask & WRITEMASK_XW,
+ src[1], pfs_one, pfs_zero, flags);
+ }
+ break;
+ case OPCODE_EX2:
+ src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
+ emit_arith(fp, PFS_OP_EX2, dest, mask,
+ src[0], undef, undef, flags);
+ break;
+ case OPCODE_FLR:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ temp[0] = get_temp_reg(fp);
+ /* FRC temp, src0
+ * MAD dest, src0, 1.0, -temp
+ */
+ emit_arith(fp, PFS_OP_FRC, temp[0], mask,
+ keep(src[0]), undef, undef, 0);
+ emit_arith(fp, PFS_OP_MAD, dest, mask,
+ src[0], pfs_one, negate(temp[0]), flags);
+ free_temp(fp, temp[0]);
+ break;
+ case OPCODE_FRC:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ emit_arith(fp, PFS_OP_FRC, dest, mask,
+ src[0], undef, undef, flags);
+ break;
+ case OPCODE_KIL:
+ emit_tex(fp, fpi, R300_FPITX_OP_KIL);
+ break;
+ case OPCODE_LG2:
+ src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
+ emit_arith(fp, PFS_OP_LG2, dest, mask,
+ src[0], undef, undef, flags);
+ break;
+ case OPCODE_LIT:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ emit_lit(fp, dest, mask, src[0], flags);
+ break;
+ case OPCODE_LRP:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ src[2] = t_src(fp, fpi->SrcReg[2]);
+ /* result = tmp0tmp1 + (1 - tmp0)tmp2
+ * = tmp0tmp1 + tmp2 + (-tmp0)tmp2
+ * MAD temp, -tmp0, tmp2, tmp2
+ * MAD result, tmp0, tmp1, temp
+ */
+ temp[0] = get_temp_reg(fp);
+ emit_arith(fp, PFS_OP_MAD, temp[0], mask,
+ negate(keep(src[0])), keep(src[2]), src[2],
+ 0);
+ emit_arith(fp, PFS_OP_MAD, dest, mask,
+ src[0], src[1], temp[0], flags);
+ free_temp(fp, temp[0]);
+ break;
+ case OPCODE_MAD:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ src[2] = t_src(fp, fpi->SrcReg[2]);
+ emit_arith(fp, PFS_OP_MAD, dest, mask,
+ src[0], src[1], src[2], flags);
+ break;
+ case OPCODE_MAX:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ emit_arith(fp, PFS_OP_MAX, dest, mask,
+ src[0], src[1], undef, flags);
+ break;
+ case OPCODE_MIN:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ emit_arith(fp, PFS_OP_MIN, dest, mask,
+ src[0], src[1], undef, flags);
+ break;
+ case OPCODE_MOV:
+ case OPCODE_SWZ:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ emit_arith(fp, PFS_OP_MAD, dest, mask,
+ src[0], pfs_one, pfs_zero, flags);
+ break;
+ case OPCODE_MUL:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ emit_arith(fp, PFS_OP_MAD, dest, mask,
+ src[0], src[1], pfs_zero, flags);
+ break;
+ case OPCODE_POW:
+ src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
+ src[1] = t_scalar_src(fp, fpi->SrcReg[1]);
+ temp[0] = get_temp_reg(fp);
+ emit_arith(fp, PFS_OP_LG2, temp[0], WRITEMASK_W,
+ src[0], undef, undef, 0);
+ emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W,
+ temp[0], src[1], pfs_zero, 0);
+ emit_arith(fp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask,
+ temp[0], undef, undef, 0);
+ free_temp(fp, temp[0]);
+ break;
+ case OPCODE_RCP:
+ src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
+ emit_arith(fp, PFS_OP_RCP, dest, mask,
+ src[0], undef, undef, flags);
+ break;
+ case OPCODE_RSQ:
+ src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
+ emit_arith(fp, PFS_OP_RSQ, dest, mask,
+ absolute(src[0]), pfs_zero, pfs_zero, flags);
+ break;
+ case OPCODE_SCS:
+ /*
+ * scs using a parabola :
+ * scs(x):
+ * result.x = sin(-abs(x)+0.5*PI) (cos)
+ * result.y = sin(x) (sin)
+ *
+ */
+ temp[0] = get_temp_reg(fp);
+ temp[1] = get_temp_reg(fp);
+ const_sin[0] = emit_const4fv(fp, SinCosConsts[0]);
+ const_sin[1] = emit_const4fv(fp, SinCosConsts[1]);
+ src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
+
+ /* x = -abs(x)+0.5*PI */
+ emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI
+ pfs_half,
+ negate(abs
+ (swizzle(keep(src[0]), X, X, X, X))),
+ 0);
+
+ /* C*x (sin) */
+ emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W,
+ swizzle(const_sin[0], Y, Y, Y, Y),
+ swizzle(keep(src[0]), X, X, X, X),
+ pfs_zero, 0);
+
+ /* B*x, C*x (cos) */
+ emit_arith(fp, PFS_OP_MAD, temp[0],
+ WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
+ Z, Z, Z,
+ Z),
+ const_sin[0], pfs_zero, 0);
+
+ /* B*x (sin) */
+ emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W,
+ swizzle(const_sin[0], X, X, X, X),
+ keep(src[0]), pfs_zero, 0);
+
+ /* y = B*x + C*x*abs(x) (sin) */
+ emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_Z,
+ absolute(src[0]),
+ swizzle(temp[0], W, W, W, W),
+ swizzle(temp[1], W, W, W, W), 0);
+
+ /* y = B*x + C*x*abs(x) (cos) */
+ emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W,
+ swizzle(temp[0], Y, Y, Y, Y),
+ absolute(swizzle(temp[0], Z, Z, Z, Z)),
+ swizzle(temp[0], X, X, X, X), 0);
+
+ /* y*abs(y) - y (cos), y*abs(y) - y (sin) */
+ emit_arith(fp, PFS_OP_MAD, temp[0],
+ WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1],
+ W, Z, Y,
+ X),
+ absolute(swizzle(temp[1], W, Z, Y, X)),
+ negate(swizzle(temp[1], W, Z, Y, X)), 0);
+
+ /* dest.xy = mad(temp.xy, P, temp2.wz) */
+ emit_arith(fp, PFS_OP_MAD, dest,
+ mask & (WRITEMASK_X | WRITEMASK_Y), temp[0],
+ swizzle(const_sin[0], W, W, W, W),
+ swizzle(temp[1], W, Z, Y, X), flags);
+
+ free_temp(fp, temp[0]);
+ free_temp(fp, temp[1]);
+ break;
+ case OPCODE_SGE:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ temp[0] = get_temp_reg(fp);
+ /* temp = src0 - src1
+ * dest.c = (temp.c < 0.0) ? 0 : 1
+ */
+ emit_arith(fp, PFS_OP_MAD, temp[0], mask,
+ src[0], pfs_one, negate(src[1]), 0);
+ emit_arith(fp, PFS_OP_CMP, dest, mask,
+ pfs_one, pfs_zero, temp[0], 0);
+ free_temp(fp, temp[0]);
+ break;
+ case OPCODE_SIN:
+ /*
+ * using a parabola:
+ * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x)
+ * extra precision is obtained by weighting against
+ * itself squared.
+ */
+
+ temp[0] = get_temp_reg(fp);
+ const_sin[0] = emit_const4fv(fp, SinCosConsts[0]);
+ const_sin[1] = emit_const4fv(fp, SinCosConsts[1]);
+ src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
+
+ /* do range reduction */
+
+ emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
+ swizzle(keep(src[0]), X, X, X, X),
+ swizzle(const_sin[1], Z, Z, Z, Z),
+ pfs_half, 0);
+
+ emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X,
+ swizzle(temp[0], X, X, X, X),
+ undef, undef, 0);
+
+ emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI
+ negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI
+ 0);
+
+ /* SIN */
+
+ emit_arith(fp, PFS_OP_MAD, temp[0],
+ WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
+ Z, Z, Z,
+ Z),
+ const_sin[0], pfs_zero, 0);
+
+ emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
+ swizzle(temp[0], Y, Y, Y, Y),
+ absolute(swizzle(temp[0], Z, Z, Z, Z)),
+ swizzle(temp[0], X, X, X, X), 0);
+
+ emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y,
+ swizzle(temp[0], X, X, X, X),
+ absolute(swizzle(temp[0], X, X, X, X)),
+ negate(swizzle(temp[0], X, X, X, X)), 0);
+
+ emit_arith(fp, PFS_OP_MAD, dest, mask,
+ swizzle(temp[0], Y, Y, Y, Y),
+ swizzle(const_sin[0], W, W, W, W),
+ swizzle(temp[0], X, X, X, X), flags);
+
+ free_temp(fp, temp[0]);
+ break;
+ case OPCODE_SLT:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ temp[0] = get_temp_reg(fp);
+ /* temp = src0 - src1
+ * dest.c = (temp.c < 0.0) ? 1 : 0
+ */
+ emit_arith(fp, PFS_OP_MAD, temp[0], mask,
+ src[0], pfs_one, negate(src[1]), 0);
+ emit_arith(fp, PFS_OP_CMP, dest, mask,
+ pfs_zero, pfs_one, temp[0], 0);
+ free_temp(fp, temp[0]);
+ break;
+ case OPCODE_SUB:
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ emit_arith(fp, PFS_OP_MAD, dest, mask,
+ src[0], pfs_one, negate(src[1]), flags);
+ break;
+ case OPCODE_TEX:
+ emit_tex(fp, fpi, R300_FPITX_OP_TEX);
+ break;
+ case OPCODE_TXB:
+ emit_tex(fp, fpi, R300_FPITX_OP_TXB);
+ break;
+ case OPCODE_TXP:
+ emit_tex(fp, fpi, R300_FPITX_OP_TXP);
+ break;
+ case OPCODE_XPD:{
+ src[0] = t_src(fp, fpi->SrcReg[0]);
+ src[1] = t_src(fp, fpi->SrcReg[1]);
+ temp[0] = get_temp_reg(fp);
+ /* temp = src0.zxy * src1.yzx */
+ emit_arith(fp, PFS_OP_MAD, temp[0],
+ WRITEMASK_XYZ, swizzle(keep(src[0]),
+ Z, X, Y, W),
+ swizzle(keep(src[1]), Y, Z, X, W),
+ pfs_zero, 0);
+ /* dest.xyz = src0.yzx * src1.zxy - temp
+ * dest.w = undefined
+ * */
+ emit_arith(fp, PFS_OP_MAD, dest,
+ mask & WRITEMASK_XYZ, swizzle(src[0],
+ Y, Z,
+ X, W),
+ swizzle(src[1], Z, X, Y, W),
+ negate(temp[0]), flags);
+ /* cleanup */
+ free_temp(fp, temp[0]);
+ break;
+ }
+ default:
+ ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ break;
+ }
+
+ if (fp->error)
+ return GL_FALSE;
+
+ }
+
+ return GL_TRUE;
+}
+
+static void insert_wpos(struct gl_program *prog)
+{
+ static gl_state_index tokens[STATE_LENGTH] = {
+ STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
+ };
+ struct prog_instruction *fpi;
+ GLuint window_index;
+ int i = 0;
+ GLuint tempregi = prog->NumTemporaries;
+ /* should do something else if no temps left... */
+ prog->NumTemporaries++;
+
+ fpi = _mesa_alloc_instructions(prog->NumInstructions + 3);
+ _mesa_init_instructions(fpi, prog->NumInstructions + 3);
+
+ /* perspective divide */
+ fpi[i].Opcode = OPCODE_RCP;
+
+ fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+ fpi[i].DstReg.Index = tempregi;
+ fpi[i].DstReg.WriteMask = WRITEMASK_W;
+ fpi[i].DstReg.CondMask = COND_TR;
+
+ fpi[i].SrcReg[0].File = PROGRAM_INPUT;
+ fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
+ i++;
+
+ fpi[i].Opcode = OPCODE_MUL;
+
+ fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+ fpi[i].DstReg.Index = tempregi;
+ fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
+ fpi[i].DstReg.CondMask = COND_TR;
+
+ fpi[i].SrcReg[0].File = PROGRAM_INPUT;
+ fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
+ fpi[i].SrcReg[1].Index = tempregi;
+ fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ i++;
+
+ /* viewport transformation */
+ window_index = _mesa_add_state_reference(prog->Parameters, tokens);
+
+ fpi[i].Opcode = OPCODE_MAD;
+
+ fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+ fpi[i].DstReg.Index = tempregi;
+ fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
+ fpi[i].DstReg.CondMask = COND_TR;
+
+ fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
+ fpi[i].SrcReg[0].Index = tempregi;
+ fpi[i].SrcReg[0].Swizzle =
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
+ fpi[i].SrcReg[1].Index = window_index;
+ fpi[i].SrcReg[1].Swizzle =
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
+ fpi[i].SrcReg[2].Index = window_index;
+ fpi[i].SrcReg[2].Swizzle =
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+ i++;
+
+ _mesa_copy_instructions(&fpi[i], prog->Instructions,
+ prog->NumInstructions);
+
+ free(prog->Instructions);
+
+ prog->Instructions = fpi;
+
+ prog->NumInstructions += i;
+ fpi = &prog->Instructions[prog->NumInstructions - 1];
+
+ assert(fpi->Opcode == OPCODE_END);
+
+ for (fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++) {
+ for (i = 0; i < 3; i++)
+ if (fpi->SrcReg[i].File == PROGRAM_INPUT &&
+ fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS) {
+ fpi->SrcReg[i].File = PROGRAM_TEMPORARY;
+ fpi->SrcReg[i].Index = tempregi;
+ }
+ }
+}
+
+/* - Init structures
+ * - Determine what hwregs each input corresponds to
+ */
+static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp)
+{
+ struct r300_pfs_compile_state *cs = NULL;
+ struct gl_fragment_program *mp = &fp->mesa_program;
+ struct prog_instruction *fpi;
+ GLuint InputsRead = mp->Base.InputsRead;
+ GLuint temps_used = 0; /* for fp->temps[] */
+ int i, j;
+
+ /* New compile, reset tracking data */
+ fp->optimization =
+ driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
+ fp->translated = GL_FALSE;
+ fp->error = GL_FALSE;
+ fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
+ fp->tex.length = 0;
+ fp->cur_node = 0;
+ fp->first_node_has_tex = 0;
+ fp->const_nr = 0;
+ fp->max_temp_idx = 0;
+ fp->node[0].alu_end = -1;
+ fp->node[0].tex_end = -1;
+
+ _mesa_memset(cs, 0, sizeof(*fp->cs));
+ for (i = 0; i < PFS_MAX_ALU_INST; i++) {
+ for (j = 0; j < 3; j++) {
+ cs->slot[i].vsrc[j] = SRC_CONST;
+ cs->slot[i].ssrc[j] = SRC_CONST;
+ }
+ }
+
+ /* Work out what temps the Mesa inputs correspond to, this must match
+ * what setup_rs_unit does, which shouldn't be a problem as rs_unit
+ * configures itself based on the fragprog's InputsRead
+ *
+ * NOTE: this depends on get_hw_temp() allocating registers in order,
+ * starting from register 0.
+ */
+
+ /* Texcoords come first */
+ for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
+ if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+ cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
+ cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
+ get_hw_temp(fp, 0);
+ }
+ }
+ InputsRead &= ~FRAG_BITS_TEX_ANY;
+
+ /* fragment position treated as a texcoord */
+ if (InputsRead & FRAG_BIT_WPOS) {
+ cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
+ cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0);
+ insert_wpos(&mp->Base);
+ }
+ InputsRead &= ~FRAG_BIT_WPOS;
+
+ /* Then primary colour */
+ if (InputsRead & FRAG_BIT_COL0) {
+ cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
+ cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0);
+ }
+ InputsRead &= ~FRAG_BIT_COL0;
+
+ /* Secondary color */
+ if (InputsRead & FRAG_BIT_COL1) {
+ cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
+ cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0);
+ }
+ InputsRead &= ~FRAG_BIT_COL1;
+
+ /* Anything else */
+ if (InputsRead) {
+ WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
+ /* force read from hwreg 0 for now */
+ for (i = 0; i < 32; i++)
+ if (InputsRead & (1 << i))
+ cs->inputs[i].reg = 0;
+ }
+
+ /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
+ * That way, we can free up the reg when it's no longer needed
+ */
+ if (!mp->Base.Instructions) {
+ ERROR("No instructions found in program\n");
+ return;
+ }
+
+ for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
+ int idx;
+
+ for (i = 0; i < 3; i++) {
+ idx = fpi->SrcReg[i].Index;
+ switch (fpi->SrcReg[i].File) {
+ case PROGRAM_TEMPORARY:
+ if (!(temps_used & (1 << idx))) {
+ cs->temps[idx].reg = -1;
+ cs->temps[idx].refcount = 1;
+ temps_used |= (1 << idx);
+ } else
+ cs->temps[idx].refcount++;
+ break;
+ case PROGRAM_INPUT:
+ cs->inputs[idx].refcount++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ idx = fpi->DstReg.Index;
+ if (fpi->DstReg.File == PROGRAM_TEMPORARY) {
+ if (!(temps_used & (1 << idx))) {
+ cs->temps[idx].reg = -1;
+ cs->temps[idx].refcount = 1;
+ temps_used |= (1 << idx);
+ } else
+ cs->temps[idx].refcount++;
+ }
+ }
+ cs->temp_in_use = temps_used;
+}
+
+static void update_params(struct r300_fragment_program *fp)
+{
+ struct gl_fragment_program *mp = &fp->mesa_program;
+
+ /* Ask Mesa nicely to fill in ParameterValues for us */
+ if (mp->Base.Parameters)
+ _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
+}
+
+void r300TranslateFragmentShader(r300ContextPtr r300,
+ struct r300_fragment_program *fp)
+{
+ struct r300_pfs_compile_state *cs = NULL;
+
+ if (!fp->translated) {
+
+ init_program(r300, fp);
+ cs = fp->cs;
+
+ if (parse_program(fp) == GL_FALSE) {
+ dump_program(fp);
+ return;
+ }
+
+ /* Finish off */
+ fp->node[fp->cur_node].alu_end =
+ cs->nrslots - fp->node[fp->cur_node].alu_offset - 1;
+ if (fp->node[fp->cur_node].tex_end < 0)
+ fp->node[fp->cur_node].tex_end = 0;
+ fp->alu_offset = 0;
+ fp->alu_end = cs->nrslots - 1;
+ fp->tex_offset = 0;
+ fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0;
+ assert(fp->node[fp->cur_node].alu_end >= 0);
+ assert(fp->alu_end >= 0);
+
+ fp->translated = GL_TRUE;
+ if (RADEON_DEBUG & DEBUG_PIXEL)
+ dump_program(fp);
+ r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
+ }
+
+ update_params(fp);
+}
+
+/* just some random things... */
+static void dump_program(struct r300_fragment_program *fp)
+{
+ int n, i, j;
+ static int pc = 0;
+
+ fprintf(stderr, "pc=%d*************************************\n", pc++);
+
+ fprintf(stderr, "Mesa program:\n");
+ fprintf(stderr, "-------------\n");
+ _mesa_print_program(&fp->mesa_program.Base);
+ fflush(stdout);
+
+ fprintf(stderr, "Hardware program\n");
+ fprintf(stderr, "----------------\n");
+
+ for (n = 0; n < (fp->cur_node + 1); n++) {
+ fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
+ "alu_end: %d, tex_end: %d\n", n,
+ fp->node[n].alu_offset,
+ fp->node[n].tex_offset,
+ fp->node[n].alu_end, fp->node[n].tex_end);
+
+ if (fp->tex.length) {
+ fprintf(stderr, " TEX:\n");
+ for (i = fp->node[n].tex_offset;
+ i <= fp->node[n].tex_offset + fp->node[n].tex_end;
+ ++i) {
+ const char *instr;
+
+ switch ((fp->tex.
+ inst[i] >> R300_FPITX_OPCODE_SHIFT) &
+ 15) {
+ case R300_FPITX_OP_TEX:
+ instr = "TEX";
+ break;
+ case R300_FPITX_OP_KIL:
+ instr = "KIL";
+ break;
+ case R300_FPITX_OP_TXP:
+ instr = "TXP";
+ break;
+ case R300_FPITX_OP_TXB:
+ instr = "TXB";
+ break;
+ default:
+ instr = "UNKNOWN";
+ }
+
+ fprintf(stderr,
+ " %s t%i, %c%i, texture[%i] (%08x)\n",
+ instr,
+ (fp->tex.
+ inst[i] >> R300_FPITX_DST_SHIFT) & 31,
+ (fp->tex.
+ inst[i] & R300_FPITX_SRC_CONST) ? 'c' :
+ 't',
+ (fp->tex.
+ inst[i] >> R300_FPITX_SRC_SHIFT) & 31,
+ (fp->tex.
+ inst[i] & R300_FPITX_IMAGE_MASK) >>
+ R300_FPITX_IMAGE_SHIFT,
+ fp->tex.inst[i]);
+ }
+ }
+
+ for (i = fp->node[n].alu_offset;
+ i <= fp->node[n].alu_offset + fp->node[n].alu_end; ++i) {
+ char srcc[3][10], dstc[20];
+ char srca[3][10], dsta[20];
+ char argc[3][20];
+ char arga[3][20];
+ char flags[5], tmp[10];
+
+ for (j = 0; j < 3; ++j) {
+ int regc = fp->alu.inst[i].inst1 >> (j * 6);
+ int rega = fp->alu.inst[i].inst3 >> (j * 6);
+
+ sprintf(srcc[j], "%c%i",
+ (regc & 32) ? 'c' : 't', regc & 31);
+ sprintf(srca[j], "%c%i",
+ (rega & 32) ? 'c' : 't', rega & 31);
+ }
+
+ dstc[0] = 0;
+ sprintf(flags, "%s%s%s",
+ (fp->alu.inst[i].
+ inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "",
+ (fp->alu.inst[i].
+ inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "",
+ (fp->alu.inst[i].
+ inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(dstc, "t%i.%s ",
+ (fp->alu.inst[i].
+ inst1 >> R300_FPI1_DSTC_SHIFT) & 31,
+ flags);
+ }
+ sprintf(flags, "%s%s%s",
+ (fp->alu.inst[i].
+ inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "",
+ (fp->alu.inst[i].
+ inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "",
+ (fp->alu.inst[i].
+ inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(tmp, "o%i.%s",
+ (fp->alu.inst[i].
+ inst1 >> R300_FPI1_DSTC_SHIFT) & 31,
+ flags);
+ strcat(dstc, tmp);
+ }
+
+ dsta[0] = 0;
+ if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) {
+ sprintf(dsta, "t%i.w ",
+ (fp->alu.inst[i].
+ inst3 >> R300_FPI3_DSTA_SHIFT) & 31);
+ }
+ if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) {
+ sprintf(tmp, "o%i.w ",
+ (fp->alu.inst[i].
+ inst3 >> R300_FPI3_DSTA_SHIFT) & 31);
+ strcat(dsta, tmp);
+ }
+ if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) {
+ strcat(dsta, "Z");
+ }
+
+ fprintf(stderr,
+ "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
+ " w: %3s %3s %3s -> %-20s (%08x)\n", i,
+ srcc[0], srcc[1], srcc[2], dstc,
+ fp->alu.inst[i].inst1, srca[0], srca[1],
+ srca[2], dsta, fp->alu.inst[i].inst3);
+
+ for (j = 0; j < 3; ++j) {
+ int regc = fp->alu.inst[i].inst0 >> (j * 7);
+ int rega = fp->alu.inst[i].inst2 >> (j * 7);
+ int d;
+ char buf[20];
+
+ d = regc & 31;
+ if (d < 12) {
+ switch (d % 4) {
+ case R300_FPI0_ARGC_SRC0C_XYZ:
+ sprintf(buf, "%s.xyz",
+ srcc[d / 4]);
+ break;
+ case R300_FPI0_ARGC_SRC0C_XXX:
+ sprintf(buf, "%s.xxx",
+ srcc[d / 4]);
+ break;
+ case R300_FPI0_ARGC_SRC0C_YYY:
+ sprintf(buf, "%s.yyy",
+ srcc[d / 4]);
+ break;
+ case R300_FPI0_ARGC_SRC0C_ZZZ:
+ sprintf(buf, "%s.zzz",
+ srcc[d / 4]);
+ break;
+ }
+ } else if (d < 15) {
+ sprintf(buf, "%s.www", srca[d - 12]);
+ } else if (d == 20) {
+ sprintf(buf, "0.0");
+ } else if (d == 21) {
+ sprintf(buf, "1.0");
+ } else if (d == 22) {
+ sprintf(buf, "0.5");
+ } else if (d >= 23 && d < 32) {
+ d -= 23;
+ switch (d / 3) {
+ case 0:
+ sprintf(buf, "%s.yzx",
+ srcc[d % 3]);
+ break;
+ case 1:
+ sprintf(buf, "%s.zxy",
+ srcc[d % 3]);
+ break;
+ case 2:
+ sprintf(buf, "%s.Wzy",
+ srcc[d % 3]);
+ break;
+ }
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(argc[j], "%s%s%s%s",
+ (regc & 32) ? "-" : "",
+ (regc & 64) ? "|" : "",
+ buf, (regc & 64) ? "|" : "");
+
+ d = rega & 31;
+ if (d < 9) {
+ sprintf(buf, "%s.%c", srcc[d / 3],
+ 'x' + (char)(d % 3));
+ } else if (d < 12) {
+ sprintf(buf, "%s.w", srca[d - 9]);
+ } else if (d == 16) {
+ sprintf(buf, "0.0");
+ } else if (d == 17) {
+ sprintf(buf, "1.0");
+ } else if (d == 18) {
+ sprintf(buf, "0.5");
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(arga[j], "%s%s%s%s",
+ (rega & 32) ? "-" : "",
+ (rega & 64) ? "|" : "",
+ buf, (rega & 64) ? "|" : "");
+ }
+
+ fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
+ " w: %8s %8s %8s op: %08x\n",
+ argc[0], argc[1], argc[2],
+ fp->alu.inst[i].inst0, arga[0], arga[1],
+ arga[2], fp->alu.inst[i].inst2);
+ }
+ }
+}
diff --git a/r300/r300_fragprog.h b/r300/r300_fragprog.h
new file mode 100644
index 0000000..72fca77
--- /dev/null
+++ b/r300/r300_fragprog.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ * Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R300_FRAGPROG_H_
+#define __R300_FRAGPROG_H_
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+
+#include "r300_context.h"
+
+typedef struct r300_fragment_program_swizzle {
+ GLuint length;
+ GLuint src[4];
+ GLuint inst[8];
+} r300_fragment_program_swizzle_t;
+
+/* supported hw opcodes */
+#define PFS_OP_MAD 0
+#define PFS_OP_DP3 1
+#define PFS_OP_DP4 2
+#define PFS_OP_MIN 3
+#define PFS_OP_MAX 4
+#define PFS_OP_CMP 5
+#define PFS_OP_FRC 6
+#define PFS_OP_EX2 7
+#define PFS_OP_LG2 8
+#define PFS_OP_RCP 9
+#define PFS_OP_RSQ 10
+#define PFS_OP_REPL_ALPHA 11
+#define PFS_OP_CMPH 12
+#define MAX_PFS_OP 12
+
+#define PFS_FLAG_SAT (1 << 0)
+#define PFS_FLAG_ABS (1 << 1)
+
+#define ARG_NEG (1 << 5)
+#define ARG_ABS (1 << 6)
+#define ARG_MASK (127 << 0)
+#define ARG_STRIDE 7
+#define SRC_CONST (1 << 5)
+#define SRC_MASK (63 << 0)
+#define SRC_STRIDE 6
+
+#define NOP_INST0 ( \
+ (R300_FPI0_OUTC_MAD) | \
+ (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \
+ (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \
+ (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT))
+#define NOP_INST1 ( \
+ ((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \
+ ((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \
+ ((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT))
+#define NOP_INST2 ( \
+ (R300_FPI2_OUTA_MAD) | \
+ (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \
+ (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \
+ (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT))
+#define NOP_INST3 ( \
+ ((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \
+ ((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \
+ ((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT))
+
+#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
+#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
+
+struct r300_fragment_program;
+
+extern void r300TranslateFragmentShader(r300ContextPtr r300,
+ struct r300_fragment_program *fp);
+
+#endif
diff --git a/r300/r300_ioctl.c b/r300/r300_ioctl.c
new file mode 100644
index 0000000..ea94ce2
--- /dev/null
+++ b/r300/r300_ioctl.c
@@ -0,0 +1,719 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002.
+Copyright (C) 2004 Nicolai Haehnle.
+All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include <sched.h>
+#include <errno.h>
+
+#include "glheader.h"
+#include "imports.h"
+#include "macros.h"
+#include "context.h"
+#include "swrast/swrast.h"
+
+#include "r300_context.h"
+#include "radeon_ioctl.h"
+#include "r300_ioctl.h"
+#include "r300_cmdbuf.h"
+#include "r300_state.h"
+#include "r300_program.h"
+#include "radeon_reg.h"
+#include "r300_emit.h"
+
+#include "vblank.h"
+
+#define CLEARBUFFER_COLOR 0x1
+#define CLEARBUFFER_DEPTH 0x2
+#define CLEARBUFFER_STENCIL 0x4
+
+static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
+{
+ GLcontext *ctx = r300->radeon.glCtx;
+ __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
+ GLuint cboffset, cbpitch;
+ drm_r300_cmd_header_t *cmd2;
+ int cmd_reserved = 0;
+ int cmd_written = 0;
+ drm_radeon_cmd_header_t *cmd = NULL;
+ r300ContextPtr rmesa = r300;
+
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n",
+ __FUNCTION__, buffer ? "back" : "front",
+ dPriv->x, dPriv->y, dPriv->w, dPriv->h);
+
+ if (buffer) {
+ cboffset = r300->radeon.radeonScreen->backOffset;
+ cbpitch = r300->radeon.radeonScreen->backPitch;
+ } else {
+ cboffset = r300->radeon.radeonScreen->frontOffset;
+ cbpitch = r300->radeon.radeonScreen->frontPitch;
+ }
+
+ cboffset += r300->radeon.radeonScreen->fbLocation;
+
+ cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+ end_3d(rmesa);
+
+ R300_STATECHANGE(r300, cb);
+ reg_start(R300_RB3D_COLOROFFSET0, 0);
+ e32(cboffset);
+
+ if (r300->radeon.radeonScreen->cpp == 4)
+ cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+ else
+ cbpitch |= R300_COLOR_FORMAT_RGB565;
+
+ if (r300->radeon.sarea->tiling_enabled)
+ cbpitch |= R300_COLOR_TILE_ENABLE;
+
+ reg_start(R300_RB3D_COLORPITCH0, 0);
+ e32(cbpitch);
+
+ R300_STATECHANGE(r300, cmk);
+ reg_start(R300_RB3D_COLORMASK, 0);
+
+ if (flags & CLEARBUFFER_COLOR) {
+ e32((ctx->Color.ColorMask[BCOMP] ? R300_COLORMASK0_B : 0) |
+ (ctx->Color.ColorMask[GCOMP] ? R300_COLORMASK0_G : 0) |
+ (ctx->Color.ColorMask[RCOMP] ? R300_COLORMASK0_R : 0) |
+ (ctx->Color.ColorMask[ACOMP] ? R300_COLORMASK0_A : 0));
+ } else {
+ e32(0x0);
+ }
+
+ R300_STATECHANGE(r300, zs);
+ reg_start(R300_RB3D_ZSTENCIL_CNTL_0, 2);
+
+ {
+ uint32_t t1, t2;
+
+ t1 = 0x0;
+ t2 = 0x0;
+
+ if (flags & CLEARBUFFER_DEPTH) {
+ t1 |= R300_RB3D_Z_WRITE_ONLY;
+ t2 |=
+ (R300_ZS_ALWAYS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT);
+ } else {
+ t1 |= R300_RB3D_Z_DISABLED_1; // disable
+ }
+
+ if (flags & CLEARBUFFER_STENCIL) {
+ t1 |= R300_RB3D_STENCIL_ENABLE;
+ t2 |=
+ (R300_ZS_ALWAYS <<
+ R300_RB3D_ZS1_FRONT_FUNC_SHIFT) |
+ (R300_ZS_REPLACE <<
+ R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) |
+ (R300_ZS_REPLACE <<
+ R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT) |
+ (R300_ZS_REPLACE <<
+ R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT) |
+ (R300_ZS_ALWAYS <<
+ R300_RB3D_ZS1_BACK_FUNC_SHIFT) |
+ (R300_ZS_REPLACE <<
+ R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) |
+ (R300_ZS_REPLACE <<
+ R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT) |
+ (R300_ZS_REPLACE <<
+ R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT);
+ }
+
+ e32(t1);
+ e32(t2);
+ e32(r300->state.stencil.clear);
+ }
+
+ cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__);
+ cmd2[0].packet3.cmd_type = R300_CMD_PACKET3;
+ cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR;
+ cmd2[1].u = r300PackFloat32(dPriv->w / 2.0);
+ cmd2[2].u = r300PackFloat32(dPriv->h / 2.0);
+ cmd2[3].u = r300PackFloat32(ctx->Depth.Clear);
+ cmd2[4].u = r300PackFloat32(1.0);
+ cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]);
+ cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]);
+ cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]);
+ cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
+
+ reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
+ e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
+
+ reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
+ e32(R300_RB3D_ZCACHE_UNKNOWN_03);
+ cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+}
+
+static void r300EmitClearState(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ r300ContextPtr rmesa = r300;
+ __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
+ int i;
+ int cmd_reserved = 0;
+ int cmd_written = 0;
+ drm_radeon_cmd_header_t *cmd = NULL;
+ int has_tcl = 1;
+
+ if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ has_tcl = 0;
+
+ /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and
+ * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are
+ * quite complex; see the functions in r300_emit.c.
+ *
+ * I believe it would be a good idea to extend the functions in
+ * r300_emit.c so that they can be used to setup the default values for
+ * these registers, as well as the actual values used for rendering.
+ */
+ R300_STATECHANGE(r300, vir[0]);
+ reg_start(R300_VAP_INPUT_ROUTE_0_0, 0);
+ if (!has_tcl)
+ e32(0x22030003);
+ else
+ e32(0x21030003);
+
+ /* disable fog */
+ R300_STATECHANGE(r300, fogs);
+ reg_start(R300_RE_FOG_STATE, 0);
+ e32(0x0);
+
+ R300_STATECHANGE(r300, vir[1]);
+ reg_start(R300_VAP_INPUT_ROUTE_1_0, 0);
+ e32(0xF688F688);
+
+ /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
+ R300_STATECHANGE(r300, vic);
+ reg_start(R300_VAP_INPUT_CNTL_0, 1);
+ e32(R300_INPUT_CNTL_0_COLOR);
+ e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
+
+ if (!has_tcl) {
+ R300_STATECHANGE(r300, vte);
+ /* comes from fglrx startup of clear */
+ reg_start(R300_SE_VTE_CNTL, 1);
+ e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
+ R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
+ R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
+ R300_VPORT_Z_OFFSET_ENA);
+ e32(0x8);
+
+ reg_start(0x21dc, 0);
+ e32(0xaaaaaaaa);
+ }
+
+ R300_STATECHANGE(r300, vof);
+ reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
+ e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT);
+ e32(0x0); /* no textures */
+
+ R300_STATECHANGE(r300, txe);
+ reg_start(R300_TX_ENABLE, 0);
+ e32(0x0);
+
+ R300_STATECHANGE(r300, vpt);
+ reg_start(R300_SE_VPORT_XSCALE, 5);
+ efloat(1.0);
+ efloat(dPriv->x);
+ efloat(1.0);
+ efloat(dPriv->y);
+ efloat(1.0);
+ efloat(0.0);
+
+ R300_STATECHANGE(r300, at);
+ reg_start(R300_PP_ALPHA_TEST, 0);
+ e32(0x0);
+
+ R300_STATECHANGE(r300, bld);
+ reg_start(R300_RB3D_CBLEND, 1);
+ e32(0x0);
+ e32(0x0);
+
+ R300_STATECHANGE(r300, vap_clip_cntl);
+ reg_start(R300_VAP_CLIP_CNTL, 0);
+ e32(R300_221C_CLEAR);
+
+ R300_STATECHANGE(r300, ps);
+ reg_start(R300_RE_POINTSIZE, 0);
+ e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
+ ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
+
+ R300_STATECHANGE(r300, ri);
+ reg_start(R300_RS_INTERP_0, 8);
+ for (i = 0; i < 8; ++i) {
+ e32(R300_RS_INTERP_USED);
+ }
+
+ R300_STATECHANGE(r300, rc);
+ /* The second constant is needed to get glxgears display anything .. */
+ reg_start(R300_RS_CNTL_0, 1);
+ e32((1 << R300_RS_CNTL_CI_CNT_SHIFT) | R300_RS_CNTL_0_UNKNOWN_18);
+ e32(0x0);
+
+ R300_STATECHANGE(r300, rr);
+ reg_start(R300_RS_ROUTE_0, 0);
+ e32(R300_RS_ROUTE_0_COLOR);
+
+ R300_STATECHANGE(r300, fp);
+ reg_start(R300_PFS_CNTL_0, 2);
+ e32(0x0);
+ e32(0x0);
+ e32(0x0);
+ reg_start(R300_PFS_NODE_0, 3);
+ e32(0x0);
+ e32(0x0);
+ e32(0x0);
+ e32(R300_PFS_NODE_OUTPUT_COLOR);
+
+ R300_STATECHANGE(r300, fpi[0]);
+ R300_STATECHANGE(r300, fpi[1]);
+ R300_STATECHANGE(r300, fpi[2]);
+ R300_STATECHANGE(r300, fpi[3]);
+
+ reg_start(R300_PFS_INSTR0_0, 0);
+ e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
+
+ reg_start(R300_PFS_INSTR1_0, 0);
+ e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
+
+ reg_start(R300_PFS_INSTR2_0, 0);
+ e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
+
+ reg_start(R300_PFS_INSTR3_0, 0);
+ e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
+
+ if (has_tcl) {
+ R300_STATECHANGE(r300, pvs);
+ reg_start(R300_VAP_PVS_CNTL_1, 2);
+ e32((0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) |
+ (0 << R300_PVS_CNTL_1_POS_END_SHIFT) |
+ (1 << R300_PVS_CNTL_1_PROGRAM_END_SHIFT));
+ e32(0x0);
+ e32(1 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT);
+
+ R300_STATECHANGE(r300, vpi);
+ vsf_start_fragment(0x0, 8);
+ e32(VP_OUT(ADD, OUT, 0, XYZW));
+ e32(VP_IN(IN, 0));
+ e32(VP_ZERO());
+ e32(0x0);
+
+ e32(VP_OUT(ADD, OUT, 1, XYZW));
+ e32(VP_IN(IN, 1));
+ e32(VP_ZERO());
+ e32(0x0);
+ }
+}
+
+/**
+ * Buffer clear
+ */
+static void r300Clear(GLcontext * ctx, GLbitfield mask)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
+ int flags = 0;
+ int bits = 0;
+ int swapped;
+
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "r300Clear\n");
+
+ {
+ LOCK_HARDWARE(&r300->radeon);
+ UNLOCK_HARDWARE(&r300->radeon);
+ if (dPriv->numClipRects == 0)
+ return;
+ }
+
+ if (mask & BUFFER_BIT_FRONT_LEFT) {
+ flags |= BUFFER_BIT_FRONT_LEFT;
+ mask &= ~BUFFER_BIT_FRONT_LEFT;
+ }
+
+ if (mask & BUFFER_BIT_BACK_LEFT) {
+ flags |= BUFFER_BIT_BACK_LEFT;
+ mask &= ~BUFFER_BIT_BACK_LEFT;
+ }
+
+ if (mask & BUFFER_BIT_DEPTH) {
+ bits |= CLEARBUFFER_DEPTH;
+ mask &= ~BUFFER_BIT_DEPTH;
+ }
+
+ if ((mask & BUFFER_BIT_STENCIL) && r300->state.stencil.hw_stencil) {
+ bits |= CLEARBUFFER_STENCIL;
+ mask &= ~BUFFER_BIT_STENCIL;
+ }
+
+ if (mask) {
+ if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "%s: swrast clear, mask: %x\n",
+ __FUNCTION__, mask);
+ _swrast_Clear(ctx, mask);
+ }
+
+ swapped = r300->radeon.sarea->pfCurrentPage == 1;
+
+ /* Make sure it fits there. */
+ r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__);
+ if (flags || bits)
+ r300EmitClearState(ctx);
+
+ if (flags & BUFFER_BIT_FRONT_LEFT) {
+ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped);
+ bits = 0;
+ }
+
+ if (flags & BUFFER_BIT_BACK_LEFT) {
+ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1);
+ bits = 0;
+ }
+
+ if (bits)
+ r300ClearBuffer(r300, bits, 0);
+
+}
+
+void r300Flush(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (r300->cmdbuf.count_used > r300->cmdbuf.count_reemit)
+ r300FlushCmdBuf(r300, __FUNCTION__);
+}
+
+#ifdef USER_BUFFERS
+#include "r300_mem.h"
+
+static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
+{
+ struct r300_dma_buffer *dmabuf;
+ size = MAX2(size, RADEON_BUFFER_SIZE * 16);
+
+ if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (rmesa->dma.flush) {
+ rmesa->dma.flush(rmesa);
+ }
+
+ if (rmesa->dma.current.buf)
+ r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
+
+ if (rmesa->dma.nr_released_bufs > 4)
+ r300FlushCmdBuf(rmesa, __FUNCTION__);
+
+ dmabuf = CALLOC_STRUCT(r300_dma_buffer);
+ dmabuf->buf = (void *)1; /* hack */
+ dmabuf->refcount = 1;
+
+ dmabuf->id = r300_mem_alloc(rmesa, 4, size);
+ if (dmabuf->id == 0) {
+ LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */
+
+ r300FlushCmdBufLocked(rmesa, __FUNCTION__);
+ radeonWaitForIdleLocked(&rmesa->radeon);
+
+ dmabuf->id = r300_mem_alloc(rmesa, 4, size);
+
+ UNLOCK_HARDWARE(&rmesa->radeon);
+
+ if (dmabuf->id == 0) {
+ fprintf(stderr,
+ "Error: Could not get dma buffer... exiting\n");
+ _mesa_exit(-1);
+ }
+ }
+
+ rmesa->dma.current.buf = dmabuf;
+ rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id);
+ rmesa->dma.current.end = size;
+ rmesa->dma.current.start = 0;
+ rmesa->dma.current.ptr = 0;
+}
+
+void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+ struct r300_dma_region *region, const char *caller)
+{
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+
+ if (!region->buf)
+ return;
+
+ if (rmesa->dma.flush)
+ rmesa->dma.flush(rmesa);
+
+ if (--region->buf->refcount == 0) {
+ r300_mem_free(rmesa, region->buf->id);
+ FREE(region->buf);
+ rmesa->dma.nr_released_bufs++;
+ }
+
+ region->buf = 0;
+ region->start = 0;
+}
+
+/* Allocates a region from rmesa->dma.current. If there isn't enough
+ * space in current, grab a new buffer (and discard what was left of current)
+ */
+void r300AllocDmaRegion(r300ContextPtr rmesa,
+ struct r300_dma_region *region,
+ int bytes, int alignment)
+{
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+
+ if (rmesa->dma.flush)
+ rmesa->dma.flush(rmesa);
+
+ if (region->buf)
+ r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
+
+ alignment--;
+ rmesa->dma.current.start = rmesa->dma.current.ptr =
+ (rmesa->dma.current.ptr + alignment) & ~alignment;
+
+ if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
+ r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7);
+
+ region->start = rmesa->dma.current.start;
+ region->ptr = rmesa->dma.current.start;
+ region->end = rmesa->dma.current.start + bytes;
+ region->address = rmesa->dma.current.address;
+ region->buf = rmesa->dma.current.buf;
+ region->buf->refcount++;
+
+ rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+ rmesa->dma.current.start =
+ rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
+
+ assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
+}
+
+#else
+static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
+{
+ struct r300_dma_buffer *dmabuf;
+ int fd = rmesa->radeon.dri.fd;
+ int index = 0;
+ int size = 0;
+ drmDMAReq dma;
+ int ret;
+
+ if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (rmesa->dma.flush) {
+ rmesa->dma.flush(rmesa);
+ }
+
+ if (rmesa->dma.current.buf)
+ r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
+
+ if (rmesa->dma.nr_released_bufs > 4)
+ r300FlushCmdBuf(rmesa, __FUNCTION__);
+
+ dma.context = rmesa->radeon.dri.hwContext;
+ dma.send_count = 0;
+ dma.send_list = NULL;
+ dma.send_sizes = NULL;
+ dma.flags = 0;
+ dma.request_count = 1;
+ dma.request_size = RADEON_BUFFER_SIZE;
+ dma.request_list = &index;
+ dma.request_sizes = &size;
+ dma.granted_count = 0;
+
+ LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */
+
+ ret = drmDMA(fd, &dma);
+
+ if (ret != 0) {
+ /* Try to release some buffers and wait until we can't get any more */
+ if (rmesa->dma.nr_released_bufs) {
+ r300FlushCmdBufLocked(rmesa, __FUNCTION__);
+ }
+
+ if (RADEON_DEBUG & DEBUG_DMA)
+ fprintf(stderr, "Waiting for buffers\n");
+
+ radeonWaitForIdleLocked(&rmesa->radeon);
+ ret = drmDMA(fd, &dma);
+
+ if (ret != 0) {
+ UNLOCK_HARDWARE(&rmesa->radeon);
+ fprintf(stderr,
+ "Error: Could not get dma buffer... exiting\n");
+ _mesa_exit(-1);
+ }
+ }
+
+ UNLOCK_HARDWARE(&rmesa->radeon);
+
+ if (RADEON_DEBUG & DEBUG_DMA)
+ fprintf(stderr, "Allocated buffer %d\n", index);
+
+ dmabuf = CALLOC_STRUCT(r300_dma_buffer);
+ dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index];
+ dmabuf->refcount = 1;
+
+ rmesa->dma.current.buf = dmabuf;
+ rmesa->dma.current.address = dmabuf->buf->address;
+ rmesa->dma.current.end = dmabuf->buf->total;
+ rmesa->dma.current.start = 0;
+ rmesa->dma.current.ptr = 0;
+}
+
+void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+ struct r300_dma_region *region, const char *caller)
+{
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+
+ if (!region->buf)
+ return;
+
+ if (rmesa->dma.flush)
+ rmesa->dma.flush(rmesa);
+
+ if (--region->buf->refcount == 0) {
+ drm_radeon_cmd_header_t *cmd;
+
+ if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+ fprintf(stderr, "%s -- DISCARD BUF %d\n",
+ __FUNCTION__, region->buf->buf->idx);
+ cmd =
+ (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
+ sizeof
+ (*cmd) / 4,
+ __FUNCTION__);
+ cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
+ cmd->dma.buf_idx = region->buf->buf->idx;
+
+ FREE(region->buf);
+ rmesa->dma.nr_released_bufs++;
+ }
+
+ region->buf = 0;
+ region->start = 0;
+}
+
+/* Allocates a region from rmesa->dma.current. If there isn't enough
+ * space in current, grab a new buffer (and discard what was left of current)
+ */
+void r300AllocDmaRegion(r300ContextPtr rmesa,
+ struct r300_dma_region *region,
+ int bytes, int alignment)
+{
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+
+ if (rmesa->dma.flush)
+ rmesa->dma.flush(rmesa);
+
+ if (region->buf)
+ r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
+
+ alignment--;
+ rmesa->dma.current.start = rmesa->dma.current.ptr =
+ (rmesa->dma.current.ptr + alignment) & ~alignment;
+
+ if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
+ r300RefillCurrentDmaRegion(rmesa);
+
+ region->start = rmesa->dma.current.start;
+ region->ptr = rmesa->dma.current.start;
+ region->end = rmesa->dma.current.start + bytes;
+ region->address = rmesa->dma.current.address;
+ region->buf = rmesa->dma.current.buf;
+ region->buf->refcount++;
+
+ rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+ rmesa->dma.current.start =
+ rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
+
+ assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
+}
+
+#endif
+
+GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer,
+ GLint size)
+{
+ int offset =
+ (char *)pointer -
+ (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+ int valid = (size >= 0 && offset >= 0
+ && offset + size <
+ rmesa->radeon.radeonScreen->gartTextures.size);
+
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer,
+ valid);
+
+ return valid;
+}
+
+GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer)
+{
+ int offset =
+ (char *)pointer -
+ (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+
+ //fprintf(stderr, "offset=%08x\n", offset);
+
+ if (offset < 0
+ || offset > rmesa->radeon.radeonScreen->gartTextures.size)
+ return ~0;
+ else
+ return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
+}
+
+void r300InitIoctlFuncs(struct dd_function_table *functions)
+{
+ functions->Clear = r300Clear;
+ functions->Finish = radeonFinish;
+ functions->Flush = r300Flush;
+}
diff --git a/r300/r300_ioctl.h b/r300/r300_ioctl.h
new file mode 100644
index 0000000..7a19a2c
--- /dev/null
+++ b/r300/r300_ioctl.h
@@ -0,0 +1,59 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R300_IOCTL_H__
+#define __R300_IOCTL_H__
+
+#include "r300_context.h"
+#include "radeon_drm.h"
+
+extern GLboolean r300IsGartMemory(r300ContextPtr rmesa,
+ const GLvoid * pointer, GLint size);
+
+extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa,
+ const GLvoid * pointer);
+
+extern void r300Flush(GLcontext * ctx);
+
+extern void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+ struct r300_dma_region *region,
+ const char *caller);
+extern void r300AllocDmaRegion(r300ContextPtr rmesa,
+ struct r300_dma_region *region, int bytes,
+ int alignment);
+
+extern void r300InitIoctlFuncs(struct dd_function_table *functions);
+
+#endif /* __R300_IOCTL_H__ */
diff --git a/r300/r300_mem.c b/r300/r300_mem.c
new file mode 100644
index 0000000..f8f9d4f
--- /dev/null
+++ b/r300/r300_mem.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright (C) 2005 Aapo Tahkola.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * \author Aapo Tahkola <aet@rasterburn.org>
+ */
+
+#include <unistd.h>
+
+#include "r300_context.h"
+#include "r300_cmdbuf.h"
+#include "r300_ioctl.h"
+#include "r300_mem.h"
+#include "radeon_ioctl.h"
+
+#ifdef USER_BUFFERS
+
+static void resize_u_list(r300ContextPtr rmesa)
+{
+ void *temp;
+ int nsize;
+
+ temp = rmesa->rmm->u_list;
+ nsize = rmesa->rmm->u_size * 2;
+
+ rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list));
+ _mesa_memset(rmesa->rmm->u_list, 0,
+ nsize * sizeof(*rmesa->rmm->u_list));
+
+ if (temp) {
+ r300FlushCmdBuf(rmesa, __FUNCTION__);
+
+ _mesa_memcpy(rmesa->rmm->u_list, temp,
+ rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list));
+ _mesa_free(temp);
+ }
+
+ rmesa->rmm->u_size = nsize;
+}
+
+void r300_mem_init(r300ContextPtr rmesa)
+{
+ rmesa->rmm = malloc(sizeof(struct r300_memory_manager));
+ memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager));
+
+ rmesa->rmm->u_size = 128;
+ resize_u_list(rmesa);
+}
+
+void r300_mem_destroy(r300ContextPtr rmesa)
+{
+ _mesa_free(rmesa->rmm->u_list);
+ rmesa->rmm->u_list = NULL;
+
+ _mesa_free(rmesa->rmm);
+ rmesa->rmm = NULL;
+}
+
+void *r300_mem_ptr(r300ContextPtr rmesa, int id)
+{
+ assert(id <= rmesa->rmm->u_last);
+ return rmesa->rmm->u_list[id].ptr;
+}
+
+int r300_mem_find(r300ContextPtr rmesa, void *ptr)
+{
+ int i;
+
+ for (i = 1; i < rmesa->rmm->u_size + 1; i++)
+ if (rmesa->rmm->u_list[i].ptr &&
+ ptr >= rmesa->rmm->u_list[i].ptr &&
+ ptr <
+ rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size)
+ break;
+
+ if (i < rmesa->rmm->u_size + 1)
+ return i;
+
+ fprintf(stderr, "%p failed\n", ptr);
+ return 0;
+}
+
+//#define MM_DEBUG
+int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size)
+{
+ drm_radeon_mem_alloc_t alloc;
+ int offset = 0, ret;
+ int i, free = -1;
+ int done_age;
+ drm_radeon_mem_free_t memfree;
+ int tries = 0;
+ static int bytes_wasted = 0, allocated = 0;
+
+ if (size < 4096)
+ bytes_wasted += 4096 - size;
+
+ allocated += size;
+
+#if 0
+ static int t = 0;
+ if (t != time(NULL)) {
+ t = time(NULL);
+ fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n",
+ rmesa->rmm->u_last, bytes_wasted / 1024,
+ allocated / 1024);
+ }
+#endif
+
+ memfree.region = RADEON_MEM_REGION_GART;
+
+ again:
+
+ done_age = radeonGetAge((radeonContextPtr) rmesa);
+
+ if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size)
+ resize_u_list(rmesa);
+
+ for (i = rmesa->rmm->u_last + 1; i > 0; i--) {
+ if (rmesa->rmm->u_list[i].ptr == NULL) {
+ free = i;
+ continue;
+ }
+
+ if (rmesa->rmm->u_list[i].h_pending == 0 &&
+ rmesa->rmm->u_list[i].pending
+ && rmesa->rmm->u_list[i].age <= done_age) {
+ memfree.region_offset =
+ (char *)rmesa->rmm->u_list[i].ptr -
+ (char *)rmesa->radeon.radeonScreen->gartTextures.
+ map;
+
+ ret =
+ drmCommandWrite(rmesa->radeon.radeonScreen->
+ driScreen->fd, DRM_RADEON_FREE,
+ &memfree, sizeof(memfree));
+
+ if (ret) {
+ fprintf(stderr, "Failed to free at %p\n",
+ rmesa->rmm->u_list[i].ptr);
+ fprintf(stderr, "ret = %s\n", strerror(-ret));
+ exit(1);
+ } else {
+#ifdef MM_DEBUG
+ fprintf(stderr, "really freed %d at age %x\n",
+ i,
+ radeonGetAge((radeonContextPtr) rmesa));
+#endif
+ if (i == rmesa->rmm->u_last)
+ rmesa->rmm->u_last--;
+
+ if (rmesa->rmm->u_list[i].size < 4096)
+ bytes_wasted -=
+ 4096 - rmesa->rmm->u_list[i].size;
+
+ allocated -= rmesa->rmm->u_list[i].size;
+ rmesa->rmm->u_list[i].pending = 0;
+ rmesa->rmm->u_list[i].ptr = NULL;
+ free = i;
+ }
+ }
+ }
+ rmesa->rmm->u_head = i;
+
+ if (free == -1) {
+ WARN_ONCE("Ran out of slots!\n");
+ //usleep(100);
+ r300FlushCmdBuf(rmesa, __FUNCTION__);
+ tries++;
+ if (tries > 100) {
+ WARN_ONCE("Ran out of slots!\n");
+ exit(1);
+ }
+ goto again;
+ }
+
+ alloc.region = RADEON_MEM_REGION_GART;
+ alloc.alignment = alignment;
+ alloc.size = size;
+ alloc.region_offset = &offset;
+
+ ret =
+ drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc,
+ sizeof(alloc));
+ if (ret) {
+#if 0
+ WARN_ONCE("Ran out of mem!\n");
+ r300FlushCmdBuf(rmesa, __FUNCTION__);
+ //usleep(100);
+ tries2++;
+ tries = 0;
+ if (tries2 > 100) {
+ WARN_ONCE("Ran out of GART memory!\n");
+ exit(1);
+ }
+ goto again;
+#else
+ WARN_ONCE
+ ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n",
+ size);
+ return 0;
+#endif
+ }
+
+ i = free;
+
+ if (i > rmesa->rmm->u_last)
+ rmesa->rmm->u_last = i;
+
+ rmesa->rmm->u_list[i].ptr =
+ ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset;
+ rmesa->rmm->u_list[i].size = size;
+ rmesa->rmm->u_list[i].age = 0;
+ //fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i);
+
+#ifdef MM_DEBUG
+ fprintf(stderr, "allocated %d at age %x\n", i,
+ radeonGetAge((radeonContextPtr) rmesa));
+#endif
+
+ return i;
+}
+
+void r300_mem_use(r300ContextPtr rmesa, int id)
+{
+ uint64_t ull;
+#ifdef MM_DEBUG
+ fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+ radeonGetAge((radeonContextPtr) rmesa));
+#endif
+ drm_r300_cmd_header_t *cmd;
+
+ assert(id <= rmesa->rmm->u_last);
+
+ if (id == 0)
+ return;
+
+ cmd =
+ (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa,
+ 2 + sizeof(ull) / 4,
+ __FUNCTION__);
+ cmd[0].scratch.cmd_type = R300_CMD_SCRATCH;
+ cmd[0].scratch.reg = R300_MEM_SCRATCH;
+ cmd[0].scratch.n_bufs = 1;
+ cmd[0].scratch.flags = 0;
+ cmd++;
+
+ ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age;
+ _mesa_memcpy(cmd, &ull, sizeof(ull));
+ cmd += sizeof(ull) / 4;
+
+ cmd[0].u = /*id */ 0;
+
+ LOCK_HARDWARE(&rmesa->radeon); /* Protect from DRM. */
+ rmesa->rmm->u_list[id].h_pending++;
+ UNLOCK_HARDWARE(&rmesa->radeon);
+}
+
+unsigned long r300_mem_offset(r300ContextPtr rmesa, int id)
+{
+ unsigned long offset;
+
+ assert(id <= rmesa->rmm->u_last);
+
+ offset = (char *)rmesa->rmm->u_list[id].ptr -
+ (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+ offset += rmesa->radeon.radeonScreen->gart_texture_offset;
+
+ return offset;
+}
+
+void *r300_mem_map(r300ContextPtr rmesa, int id, int access)
+{
+#ifdef MM_DEBUG
+ fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+ radeonGetAge((radeonContextPtr) rmesa));
+#endif
+ void *ptr;
+ int tries = 0;
+
+ assert(id <= rmesa->rmm->u_last);
+
+ if (access == R300_MEM_R) {
+
+ if (rmesa->rmm->u_list[id].mapped == 1)
+ WARN_ONCE("buffer %d already mapped\n", id);
+
+ rmesa->rmm->u_list[id].mapped = 1;
+ ptr = r300_mem_ptr(rmesa, id);
+
+ return ptr;
+ }
+
+ if (rmesa->rmm->u_list[id].h_pending)
+ r300FlushCmdBuf(rmesa, __FUNCTION__);
+
+ if (rmesa->rmm->u_list[id].h_pending) {
+ return NULL;
+ }
+
+ while (rmesa->rmm->u_list[id].age >
+ radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000)
+ usleep(10);
+
+ if (tries >= 1000) {
+ fprintf(stderr, "Idling failed (%x vs %x)\n",
+ rmesa->rmm->u_list[id].age,
+ radeonGetAge((radeonContextPtr) rmesa));
+ return NULL;
+ }
+
+ if (rmesa->rmm->u_list[id].mapped == 1)
+ WARN_ONCE("buffer %d already mapped\n", id);
+
+ rmesa->rmm->u_list[id].mapped = 1;
+ ptr = r300_mem_ptr(rmesa, id);
+
+ return ptr;
+}
+
+void r300_mem_unmap(r300ContextPtr rmesa, int id)
+{
+#ifdef MM_DEBUG
+ fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+ radeonGetAge((radeonContextPtr) rmesa));
+#endif
+
+ assert(id <= rmesa->rmm->u_last);
+
+ if (rmesa->rmm->u_list[id].mapped == 0)
+ WARN_ONCE("buffer %d not mapped\n", id);
+
+ rmesa->rmm->u_list[id].mapped = 0;
+}
+
+void r300_mem_free(r300ContextPtr rmesa, int id)
+{
+#ifdef MM_DEBUG
+ fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+ radeonGetAge((radeonContextPtr) rmesa));
+#endif
+
+ assert(id <= rmesa->rmm->u_last);
+
+ if (id == 0)
+ return;
+
+ if (rmesa->rmm->u_list[id].ptr == NULL) {
+ WARN_ONCE("Not allocated!\n");
+ return;
+ }
+
+ if (rmesa->rmm->u_list[id].pending) {
+ WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr);
+ return;
+ }
+
+ rmesa->rmm->u_list[id].pending = 1;
+}
+#endif
diff --git a/r300/r300_mem.h b/r300/r300_mem.h
new file mode 100644
index 0000000..625a7f6
--- /dev/null
+++ b/r300/r300_mem.h
@@ -0,0 +1,37 @@
+#ifndef __R300_MEM_H__
+#define __R300_MEM_H__
+
+//#define R300_MEM_PDL 0
+#define R300_MEM_UL 1
+
+#define R300_MEM_R 1
+#define R300_MEM_W 2
+#define R300_MEM_RW (R300_MEM_R | R300_MEM_W)
+
+#define R300_MEM_SCRATCH 2
+
+struct r300_memory_manager {
+ struct {
+ void *ptr;
+ uint32_t size;
+ uint32_t age;
+ uint32_t h_pending;
+ int pending;
+ int mapped;
+ } *u_list;
+ int u_head, u_size, u_last;
+
+};
+
+extern void r300_mem_init(r300ContextPtr rmesa);
+extern void r300_mem_destroy(r300ContextPtr rmesa);
+extern void *r300_mem_ptr(r300ContextPtr rmesa, int id);
+extern int r300_mem_find(r300ContextPtr rmesa, void *ptr);
+extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size);
+extern void r300_mem_use(r300ContextPtr rmesa, int id);
+extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id);
+extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access);
+extern void r300_mem_unmap(r300ContextPtr rmesa, int id);
+extern void r300_mem_free(r300ContextPtr rmesa, int id);
+
+#endif
diff --git a/r300/r300_program.h b/r300/r300_program.h
new file mode 100644
index 0000000..eddd783
--- /dev/null
+++ b/r300/r300_program.h
@@ -0,0 +1,150 @@
+/*
+Copyright (C) 2004 Nicolai Haehnle. All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R300_PROGRAM_H__
+#define __R300_PROGRAM_H__
+
+#include "r300_reg.h"
+
+/**
+ * Vertex program helper macros
+ */
+
+/* Produce out dword */
+#define VP_OUTCLASS_TMP R300_VPI_OUT_REG_CLASS_TEMPORARY
+#define VP_OUTCLASS_OUT R300_VPI_OUT_REG_CLASS_RESULT
+
+#define VP_OUTMASK_X R300_VPI_OUT_WRITE_X
+#define VP_OUTMASK_Y R300_VPI_OUT_WRITE_Y
+#define VP_OUTMASK_Z R300_VPI_OUT_WRITE_Z
+#define VP_OUTMASK_W R300_VPI_OUT_WRITE_W
+#define VP_OUTMASK_XY (VP_OUTMASK_X|VP_OUTMASK_Y)
+#define VP_OUTMASK_XZ (VP_OUTMASK_X|VP_OUTMASK_Z)
+#define VP_OUTMASK_XW (VP_OUTMASK_X|VP_OUTMASK_W)
+#define VP_OUTMASK_XYZ (VP_OUTMASK_XY|VP_OUTMASK_Z)
+#define VP_OUTMASK_XYW (VP_OUTMASK_XY|VP_OUTMASK_W)
+#define VP_OUTMASK_XZW (VP_OUTMASK_XZ|VP_OUTMASK_W)
+#define VP_OUTMASK_XYZW (VP_OUTMASK_XYZ|VP_OUTMASK_W)
+#define VP_OUTMASK_YZ (VP_OUTMASK_Y|VP_OUTMASK_Z)
+#define VP_OUTMASK_YW (VP_OUTMASK_Y|VP_OUTMASK_W)
+#define VP_OUTMASK_YZW (VP_OUTMASK_YZ|VP_OUTMASK_W)
+#define VP_OUTMASK_ZW (VP_OUTMASK_Z|VP_OUTMASK_W)
+
+#define VP_OUT(instr,outclass,outidx,outmask) \
+ (R300_VPI_OUT_OP_##instr | \
+ ((outidx) << R300_VPI_OUT_REG_INDEX_SHIFT) | \
+ VP_OUTCLASS_##outclass | \
+ VP_OUTMASK_##outmask)
+
+/* Produce in dword */
+#define VP_INCLASS_TMP R300_VPI_IN_REG_CLASS_TEMPORARY
+#define VP_INCLASS_IN R300_VPI_IN_REG_CLASS_ATTRIBUTE
+#define VP_INCLASS_CONST R300_VPI_IN_REG_CLASS_PARAMETER
+
+#define VP_IN(class,idx) \
+ (((idx) << R300_VPI_IN_REG_INDEX_SHIFT) | \
+ VP_INCLASS_##class | \
+ (R300_VPI_IN_SELECT_X << R300_VPI_IN_X_SHIFT) | \
+ (R300_VPI_IN_SELECT_Y << R300_VPI_IN_Y_SHIFT) | \
+ (R300_VPI_IN_SELECT_Z << R300_VPI_IN_Z_SHIFT) | \
+ (R300_VPI_IN_SELECT_W << R300_VPI_IN_W_SHIFT))
+#define VP_ZERO() \
+ ((R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_X_SHIFT) | \
+ (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_Y_SHIFT) | \
+ (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_Z_SHIFT) | \
+ (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_W_SHIFT))
+#define VP_ONE() \
+ ((R300_VPI_IN_SELECT_ONE << R300_VPI_IN_X_SHIFT) | \
+ (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_Y_SHIFT) | \
+ (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_Z_SHIFT) | \
+ (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_W_SHIFT))
+
+#define VP_NEG(in,comp) ((in) ^ (R300_VPI_IN_NEG_##comp))
+#define VP_NEGALL(in,comp) VP_NEG(VP_NEG(VP_NEG(VP_NEG((in),X),Y),Z),W)
+
+/**
+ * Fragment program helper macros
+ */
+
+/* Produce unshifted source selectors */
+#define FP_TMP(idx) (idx)
+#define FP_CONST(idx) ((idx) | (1 << 5))
+
+/* Produce source/dest selector dword */
+#define FP_SELC_MASK_NO 0
+#define FP_SELC_MASK_X 1
+#define FP_SELC_MASK_Y 2
+#define FP_SELC_MASK_XY 3
+#define FP_SELC_MASK_Z 4
+#define FP_SELC_MASK_XZ 5
+#define FP_SELC_MASK_YZ 6
+#define FP_SELC_MASK_XYZ 7
+
+#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \
+ (((destidx) << R300_FPI1_DSTC_SHIFT) | \
+ (FP_SELC_MASK_##regmask << 23) | \
+ (FP_SELC_MASK_##outmask << 26) | \
+ ((src0) << R300_FPI1_SRC0C_SHIFT) | \
+ ((src1) << R300_FPI1_SRC1C_SHIFT) | \
+ ((src2) << R300_FPI1_SRC2C_SHIFT))
+
+#define FP_SELA_MASK_NO 0
+#define FP_SELA_MASK_W 1
+
+#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \
+ (((destidx) << R300_FPI3_DSTA_SHIFT) | \
+ (FP_SELA_MASK_##regmask << 23) | \
+ (FP_SELA_MASK_##outmask << 24) | \
+ ((src0) << R300_FPI3_SRC0A_SHIFT) | \
+ ((src1) << R300_FPI3_SRC1A_SHIFT) | \
+ ((src2) << R300_FPI3_SRC2A_SHIFT))
+
+/* Produce unshifted argument selectors */
+#define FP_ARGC(source) R300_FPI0_ARGC_##source
+#define FP_ARGA(source) R300_FPI2_ARGA_##source
+#define FP_ABS(arg) ((arg) | (1 << 6))
+#define FP_NEG(arg) ((arg) ^ (1 << 5))
+
+/* Produce instruction dword */
+#define FP_INSTRC(opcode,arg0,arg1,arg2) \
+ (R300_FPI0_OUTC_##opcode | \
+ ((arg0) << R300_FPI0_ARG0C_SHIFT) | \
+ ((arg1) << R300_FPI0_ARG1C_SHIFT) | \
+ ((arg2) << R300_FPI0_ARG2C_SHIFT))
+
+#define FP_INSTRA(opcode,arg0,arg1,arg2) \
+ (R300_FPI2_OUTA_##opcode | \
+ ((arg0) << R300_FPI2_ARG0A_SHIFT) | \
+ ((arg1) << R300_FPI2_ARG1A_SHIFT) | \
+ ((arg2) << R300_FPI2_ARG2A_SHIFT))
+
+extern void debug_vp(GLcontext * ctx, struct gl_vertex_program *vp);
+
+#endif /* __R300_PROGRAM_H__ */
diff --git a/r300/r300_reg.h b/r300/r300_reg.h
new file mode 100644
index 0000000..e5501b6
--- /dev/null
+++ b/r300/r300_reg.h
@@ -0,0 +1,1635 @@
+/**************************************************************************
+
+Copyright (C) 2004-2005 Nicolai Haehnle et al.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/* *INDENT-OFF* */
+
+#ifndef _R300_REG_H
+#define _R300_REG_H
+
+#define R300_MC_INIT_MISC_LAT_TIMER 0x180
+# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0
+# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4
+# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8
+# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12
+# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16
+# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20
+# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24
+# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28
+
+
+#define R300_MC_INIT_GFX_LAT_TIMER 0x154
+# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0
+# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4
+# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8
+# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12
+# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16
+# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20
+# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24
+# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28
+
+/*
+ * This file contains registers and constants for the R300. They have been
+ * found mostly by examining command buffers captured using glxtest, as well
+ * as by extrapolating some known registers and constants from the R200.
+ * I am fairly certain that they are correct unless stated otherwise
+ * in comments.
+ */
+
+#define R300_SE_VPORT_XSCALE 0x1D98
+#define R300_SE_VPORT_XOFFSET 0x1D9C
+#define R300_SE_VPORT_YSCALE 0x1DA0
+#define R300_SE_VPORT_YOFFSET 0x1DA4
+#define R300_SE_VPORT_ZSCALE 0x1DA8
+#define R300_SE_VPORT_ZOFFSET 0x1DAC
+
+
+/*
+ * Vertex Array Processing (VAP) Control
+ * Stolen from r200 code from Christoph Brill (It's a guess!)
+ */
+#define R300_VAP_CNTL 0x2080
+
+/* This register is written directly and also starts data section
+ * in many 3d CP_PACKET3's
+ */
+#define R300_VAP_VF_CNTL 0x2084
+# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0
+# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0)
+# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0)
+# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0)
+# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0)
+# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0)
+
+# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4
+ /* State based - direct writes to registers trigger vertex
+ generation */
+# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4)
+
+ /* I don't think I saw these three used.. */
+# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6
+# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9
+# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10
+
+ /* index size - when not set the indices are assumed to be 16 bit */
+# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11)
+ /* number of vertices */
+# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16
+
+/* BEGIN: Wild guesses */
+#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090
+# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT (1<<1)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) /* GUESS */
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) /* GUESS */
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) /* GUESS */
+# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) /* GUESS */
+
+#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
+/* END: Wild guesses */
+
+#define R300_SE_VTE_CNTL 0x20b0
+# define R300_VPORT_X_SCALE_ENA 0x00000001
+# define R300_VPORT_X_OFFSET_ENA 0x00000002
+# define R300_VPORT_Y_SCALE_ENA 0x00000004
+# define R300_VPORT_Y_OFFSET_ENA 0x00000008
+# define R300_VPORT_Z_SCALE_ENA 0x00000010
+# define R300_VPORT_Z_OFFSET_ENA 0x00000020
+# define R300_VTX_XY_FMT 0x00000100
+# define R300_VTX_Z_FMT 0x00000200
+# define R300_VTX_W0_FMT 0x00000400
+# define R300_VTX_W0_NORMALIZE 0x00000800
+# define R300_VTX_ST_DENORMALIZED 0x00001000
+
+/* BEGIN: Vertex data assembly - lots of uncertainties */
+
+/* gap */
+
+#define R300_VAP_CNTL_STATUS 0x2140
+# define R300_VC_NO_SWAP (0 << 0)
+# define R300_VC_16BIT_SWAP (1 << 0)
+# define R300_VC_32BIT_SWAP (2 << 0)
+# define R300_VAP_TCL_BYPASS (1 << 8)
+
+/* gap */
+
+/* Where do we get our vertex data?
+ *
+ * Vertex data either comes either from immediate mode registers or from
+ * vertex arrays.
+ * There appears to be no mixed mode (though we can force the pitch of
+ * vertex arrays to 0, effectively reusing the same element over and over
+ * again).
+ *
+ * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure
+ * if these registers influence vertex array processing.
+ *
+ * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3.
+ *
+ * In both cases, vertex attributes are then passed through INPUT_ROUTE.
+ *
+ * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data
+ * into the vertex processor's input registers.
+ * The first word routes the first input, the second word the second, etc.
+ * The corresponding input is routed into the register with the given index.
+ * The list is ended by a word with INPUT_ROUTE_END set.
+ *
+ * Always set COMPONENTS_4 in immediate mode.
+ */
+
+#define R300_VAP_INPUT_ROUTE_0_0 0x2150
+# define R300_INPUT_ROUTE_COMPONENTS_1 (0 << 0)
+# define R300_INPUT_ROUTE_COMPONENTS_2 (1 << 0)
+# define R300_INPUT_ROUTE_COMPONENTS_3 (2 << 0)
+# define R300_INPUT_ROUTE_COMPONENTS_4 (3 << 0)
+# define R300_INPUT_ROUTE_COMPONENTS_RGBA (4 << 0) /* GUESS */
+# define R300_VAP_INPUT_ROUTE_IDX_SHIFT 8
+# define R300_VAP_INPUT_ROUTE_IDX_MASK (31 << 8) /* GUESS */
+# define R300_VAP_INPUT_ROUTE_END (1 << 13)
+# define R300_INPUT_ROUTE_IMMEDIATE_MODE (0 << 14) /* GUESS */
+# define R300_INPUT_ROUTE_FLOAT (1 << 14) /* GUESS */
+# define R300_INPUT_ROUTE_UNSIGNED_BYTE (2 << 14) /* GUESS */
+# define R300_INPUT_ROUTE_FLOAT_COLOR (3 << 14) /* GUESS */
+#define R300_VAP_INPUT_ROUTE_0_1 0x2154
+#define R300_VAP_INPUT_ROUTE_0_2 0x2158
+#define R300_VAP_INPUT_ROUTE_0_3 0x215C
+#define R300_VAP_INPUT_ROUTE_0_4 0x2160
+#define R300_VAP_INPUT_ROUTE_0_5 0x2164
+#define R300_VAP_INPUT_ROUTE_0_6 0x2168
+#define R300_VAP_INPUT_ROUTE_0_7 0x216C
+
+/* gap */
+
+/* Notes:
+ * - always set up to produce at least two attributes:
+ * if vertex program uses only position, fglrx will set normal, too
+ * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal.
+ */
+#define R300_VAP_INPUT_CNTL_0 0x2180
+# define R300_INPUT_CNTL_0_COLOR 0x00000001
+#define R300_VAP_INPUT_CNTL_1 0x2184
+# define R300_INPUT_CNTL_POS 0x00000001
+# define R300_INPUT_CNTL_NORMAL 0x00000002
+# define R300_INPUT_CNTL_COLOR 0x00000004
+# define R300_INPUT_CNTL_TC0 0x00000400
+# define R300_INPUT_CNTL_TC1 0x00000800
+# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */
+# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */
+# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */
+# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */
+# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */
+# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */
+
+/* gap */
+
+/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0
+ * are set to a swizzling bit pattern, other words are 0.
+ *
+ * In immediate mode, the pattern is always set to xyzw. In vertex array
+ * mode, the swizzling pattern is e.g. used to set zw components in texture
+ * coordinates with only tweo components.
+ */
+#define R300_VAP_INPUT_ROUTE_1_0 0x21E0
+# define R300_INPUT_ROUTE_SELECT_X 0
+# define R300_INPUT_ROUTE_SELECT_Y 1
+# define R300_INPUT_ROUTE_SELECT_Z 2
+# define R300_INPUT_ROUTE_SELECT_W 3
+# define R300_INPUT_ROUTE_SELECT_ZERO 4
+# define R300_INPUT_ROUTE_SELECT_ONE 5
+# define R300_INPUT_ROUTE_SELECT_MASK 7
+# define R300_INPUT_ROUTE_X_SHIFT 0
+# define R300_INPUT_ROUTE_Y_SHIFT 3
+# define R300_INPUT_ROUTE_Z_SHIFT 6
+# define R300_INPUT_ROUTE_W_SHIFT 9
+# define R300_INPUT_ROUTE_ENABLE (15 << 12)
+#define R300_VAP_INPUT_ROUTE_1_1 0x21E4
+#define R300_VAP_INPUT_ROUTE_1_2 0x21E8
+#define R300_VAP_INPUT_ROUTE_1_3 0x21EC
+#define R300_VAP_INPUT_ROUTE_1_4 0x21F0
+#define R300_VAP_INPUT_ROUTE_1_5 0x21F4
+#define R300_VAP_INPUT_ROUTE_1_6 0x21F8
+#define R300_VAP_INPUT_ROUTE_1_7 0x21FC
+
+/* END: Vertex data assembly */
+
+/* gap */
+
+/* BEGIN: Upload vertex program and data */
+
+/*
+ * The programmable vertex shader unit has a memory bank of unknown size
+ * that can be written to in 16 byte units by writing the address into
+ * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs).
+ *
+ * Pointers into the memory bank are always in multiples of 16 bytes.
+ *
+ * The memory bank is divided into areas with fixed meaning.
+ *
+ * Starting at address UPLOAD_PROGRAM: Vertex program instructions.
+ * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB),
+ * whereas the difference between known addresses suggests size 512.
+ *
+ * Starting at address UPLOAD_PARAMETERS: Vertex program parameters.
+ * Native reported limits and the VPI layout suggest size 256, whereas
+ * difference between known addresses suggests size 512.
+ *
+ * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the
+ * floating point pointsize. The exact purpose of this state is uncertain,
+ * as there is also the R300_RE_POINTSIZE register.
+ *
+ * Multiple vertex programs and parameter sets can be loaded at once,
+ * which could explain the size discrepancy.
+ */
+#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200
+# define R300_PVS_UPLOAD_PROGRAM 0x00000000
+# define R300_PVS_UPLOAD_PARAMETERS 0x00000200
+# define R300_PVS_UPLOAD_CLIP_PLANE0 0x00000400
+# define R300_PVS_UPLOAD_CLIP_PLANE1 0x00000401
+# define R300_PVS_UPLOAD_CLIP_PLANE2 0x00000402
+# define R300_PVS_UPLOAD_CLIP_PLANE3 0x00000403
+# define R300_PVS_UPLOAD_CLIP_PLANE4 0x00000404
+# define R300_PVS_UPLOAD_CLIP_PLANE5 0x00000405
+# define R300_PVS_UPLOAD_POINTSIZE 0x00000406
+
+# define R500_PVS_UPLOAD_CLIP_PLANE0 0x00000600
+# define R500_PVS_UPLOAD_CLIP_PLANE1 0x00000601
+# define R500_PVS_UPLOAD_CLIP_PLANE2 0x00000602
+# define R500_PVS_UPLOAD_CLIP_PLANE3 0x00000603
+# define R500_PVS_UPLOAD_CLIP_PLANE4 0x00000604
+# define R500_PVS_UPLOAD_CLIP_PLANE5 0x00000605
+
+/* gap */
+
+#define R300_VAP_PVS_UPLOAD_DATA 0x2208
+
+/* END: Upload vertex program and data */
+
+/* gap */
+
+/* I do not know the purpose of this register. However, I do know that
+ * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL
+ * for normal rendering.
+ *
+ * 2007-11-05: This register is the user clip plane control register, but there
+ * also seems to be a rendering mode control; the NORMAL/CLEAR defines.
+ *
+ * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view
+ */
+#define R300_VAP_CLIP_CNTL 0x221C
+# define R300_221C_NORMAL 0x00000000
+# define R300_221C_CLEAR 0x0001C000
+#define R300_VAP_UCP_ENABLE_0 (1 << 0)
+
+/* gap */
+
+/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
+ * rendering commands and overwriting vertex program parameters.
+ * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and
+ * avoids bugs caused by still running shaders reading bad data from memory.
+ */
+#define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */
+
+/* Absolutely no clue what this register is about. */
+#define R300_VAP_UNKNOWN_2288 0x2288
+# define R300_2288_R300 0x00750000 /* -- nh */
+# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
+
+/* gap */
+
+/* Addresses are relative to the vertex program instruction area of the
+ * memory bank. PROGRAM_END points to the last instruction of the active
+ * program
+ *
+ * The meaning of the two UNKNOWN fields is obviously not known. However,
+ * experiments so far have shown that both *must* point to an instruction
+ * inside the vertex program, otherwise the GPU locks up.
+ * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and
+ * CNTL_1_UNKNOWN points to instruction where last write to position takes
+ * place.
+ * Most likely this is used to ignore rest of the program in cases
+ * where group of verts arent visible. For some reason this "section"
+ * is sometimes accepted other instruction that have no relationship with
+ *position calculations.
+ */
+#define R300_VAP_PVS_CNTL_1 0x22D0
+# define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0
+# define R300_PVS_CNTL_1_POS_END_SHIFT 10
+# define R300_PVS_CNTL_1_PROGRAM_END_SHIFT 20
+/* Addresses are relative the the vertex program parameters area. */
+#define R300_VAP_PVS_CNTL_2 0x22D4
+# define R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0
+# define R300_PVS_CNTL_2_PARAM_COUNT_SHIFT 16
+#define R300_VAP_PVS_CNTL_3 0x22D8
+# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT 10
+# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0
+
+/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
+ * immediate vertices
+ */
+#define R300_VAP_VTX_COLOR_R 0x2464
+#define R300_VAP_VTX_COLOR_G 0x2468
+#define R300_VAP_VTX_COLOR_B 0x246C
+#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */
+#define R300_VAP_VTX_POS_0_Y_1 0x2494
+#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */
+#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */
+#define R300_VAP_VTX_POS_0_Y_2 0x24A4
+#define R300_VAP_VTX_POS_0_Z_2 0x24A8
+/* write 0 to indicate end of packet? */
+#define R300_VAP_VTX_END_OF_PKT 0x24AC
+
+/* gap */
+
+/* These are values from r300_reg/r300_reg.h - they are known to be correct
+ * and are here so we can use one register file instead of several
+ * - Vladimir
+ */
+#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000
+# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16)
+
+#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004
+ /* each of the following is 3 bits wide, specifies number
+ of components */
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
+
+/* UNK30 seems to enables point to quad transformation on textures
+ * (or something closely related to that).
+ * This bit is rather fatal at the time being due to lackings at pixel
+ * shader side
+ */
+#define R300_GB_ENABLE 0x4008
+# define R300_GB_POINT_STUFF_ENABLE (1<<0)
+# define R300_GB_LINE_STUFF_ENABLE (1<<1)
+# define R300_GB_TRIANGLE_STUFF_ENABLE (1<<2)
+# define R300_GB_STENCIL_AUTO_ENABLE (1<<4)
+# define R300_GB_UNK31 (1<<31)
+ /* each of the following is 2 bits wide */
+#define R300_GB_TEX_REPLICATE 0
+#define R300_GB_TEX_ST 1
+#define R300_GB_TEX_STR 2
+# define R300_GB_TEX0_SOURCE_SHIFT 16
+# define R300_GB_TEX1_SOURCE_SHIFT 18
+# define R300_GB_TEX2_SOURCE_SHIFT 20
+# define R300_GB_TEX3_SOURCE_SHIFT 22
+# define R300_GB_TEX4_SOURCE_SHIFT 24
+# define R300_GB_TEX5_SOURCE_SHIFT 26
+# define R300_GB_TEX6_SOURCE_SHIFT 28
+# define R300_GB_TEX7_SOURCE_SHIFT 30
+
+/* MSPOS - positions for multisample antialiasing (?) */
+#define R300_GB_MSPOS0 0x4010
+ /* shifts - each of the fields is 4 bits */
+# define R300_GB_MSPOS0__MS_X0_SHIFT 0
+# define R300_GB_MSPOS0__MS_Y0_SHIFT 4
+# define R300_GB_MSPOS0__MS_X1_SHIFT 8
+# define R300_GB_MSPOS0__MS_Y1_SHIFT 12
+# define R300_GB_MSPOS0__MS_X2_SHIFT 16
+# define R300_GB_MSPOS0__MS_Y2_SHIFT 20
+# define R300_GB_MSPOS0__MSBD0_Y 24
+# define R300_GB_MSPOS0__MSBD0_X 28
+
+#define R300_GB_MSPOS1 0x4014
+# define R300_GB_MSPOS1__MS_X3_SHIFT 0
+# define R300_GB_MSPOS1__MS_Y3_SHIFT 4
+# define R300_GB_MSPOS1__MS_X4_SHIFT 8
+# define R300_GB_MSPOS1__MS_Y4_SHIFT 12
+# define R300_GB_MSPOS1__MS_X5_SHIFT 16
+# define R300_GB_MSPOS1__MS_Y5_SHIFT 20
+# define R300_GB_MSPOS1__MSBD1 24
+
+
+#define R300_GB_TILE_CONFIG 0x4018
+# define R300_GB_TILE_ENABLE (1<<0)
+# define R300_GB_TILE_PIPE_COUNT_RV300 0
+# define R300_GB_TILE_PIPE_COUNT_R300 (3<<1)
+# define R300_GB_TILE_PIPE_COUNT_R420 (7<<1)
+# define R300_GB_TILE_PIPE_COUNT_RV410 (3<<1)
+# define R300_GB_TILE_SIZE_8 0
+# define R300_GB_TILE_SIZE_16 (1<<4)
+# define R300_GB_TILE_SIZE_32 (2<<4)
+# define R300_GB_SUPER_SIZE_1 (0<<6)
+# define R300_GB_SUPER_SIZE_2 (1<<6)
+# define R300_GB_SUPER_SIZE_4 (2<<6)
+# define R300_GB_SUPER_SIZE_8 (3<<6)
+# define R300_GB_SUPER_SIZE_16 (4<<6)
+# define R300_GB_SUPER_SIZE_32 (5<<6)
+# define R300_GB_SUPER_SIZE_64 (6<<6)
+# define R300_GB_SUPER_SIZE_128 (7<<6)
+# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */
+# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */
+# define R300_GB_SUPER_TILE_A 0
+# define R300_GB_SUPER_TILE_B (1<<15)
+# define R300_GB_SUBPIXEL_1_12 0
+# define R300_GB_SUBPIXEL_1_16 (1<<16)
+
+#define R300_GB_FIFO_SIZE 0x4024
+ /* each of the following is 2 bits wide */
+#define R300_GB_FIFO_SIZE_32 0
+#define R300_GB_FIFO_SIZE_64 1
+#define R300_GB_FIFO_SIZE_128 2
+#define R300_GB_FIFO_SIZE_256 3
+# define R300_SC_IFIFO_SIZE_SHIFT 0
+# define R300_SC_TZFIFO_SIZE_SHIFT 2
+# define R300_SC_BFIFO_SIZE_SHIFT 4
+
+# define R300_US_OFIFO_SIZE_SHIFT 12
+# define R300_US_WFIFO_SIZE_SHIFT 14
+ /* the following use the same constants as above, but meaning is
+ is times 2 (i.e. instead of 32 words it means 64 */
+# define R300_RS_TFIFO_SIZE_SHIFT 6
+# define R300_RS_CFIFO_SIZE_SHIFT 8
+# define R300_US_RAM_SIZE_SHIFT 10
+ /* watermarks, 3 bits wide */
+# define R300_RS_HIGHWATER_COL_SHIFT 16
+# define R300_RS_HIGHWATER_TEX_SHIFT 19
+# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */
+# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24
+
+#define R300_GB_SELECT 0x401C
+# define R300_GB_FOG_SELECT_C0A 0
+# define R300_GB_FOG_SELECT_C1A 1
+# define R300_GB_FOG_SELECT_C2A 2
+# define R300_GB_FOG_SELECT_C3A 3
+# define R300_GB_FOG_SELECT_1_1_W 4
+# define R300_GB_FOG_SELECT_Z 5
+# define R300_GB_DEPTH_SELECT_Z 0
+# define R300_GB_DEPTH_SELECT_1_1_W (1<<3)
+# define R300_GB_W_SELECT_1_W 0
+# define R300_GB_W_SELECT_1 (1<<4)
+
+#define R300_GB_AA_CONFIG 0x4020
+# define R300_AA_DISABLE 0x00
+# define R300_AA_ENABLE 0x01
+# define R300_AA_SUBSAMPLES_2 0
+# define R300_AA_SUBSAMPLES_3 (1<<1)
+# define R300_AA_SUBSAMPLES_4 (2<<1)
+# define R300_AA_SUBSAMPLES_6 (3<<1)
+
+/* gap */
+
+/* Zero to flush caches. */
+#define R300_TX_CNTL 0x4100
+#define R300_TX_FLUSH 0x0
+
+/* The upper enable bits are guessed, based on fglrx reported limits. */
+#define R300_TX_ENABLE 0x4104
+# define R300_TX_ENABLE_0 (1 << 0)
+# define R300_TX_ENABLE_1 (1 << 1)
+# define R300_TX_ENABLE_2 (1 << 2)
+# define R300_TX_ENABLE_3 (1 << 3)
+# define R300_TX_ENABLE_4 (1 << 4)
+# define R300_TX_ENABLE_5 (1 << 5)
+# define R300_TX_ENABLE_6 (1 << 6)
+# define R300_TX_ENABLE_7 (1 << 7)
+# define R300_TX_ENABLE_8 (1 << 8)
+# define R300_TX_ENABLE_9 (1 << 9)
+# define R300_TX_ENABLE_10 (1 << 10)
+# define R300_TX_ENABLE_11 (1 << 11)
+# define R300_TX_ENABLE_12 (1 << 12)
+# define R300_TX_ENABLE_13 (1 << 13)
+# define R300_TX_ENABLE_14 (1 << 14)
+# define R300_TX_ENABLE_15 (1 << 15)
+
+/* The pointsize is given in multiples of 6. The pointsize can be
+ * enormous: Clear() renders a single point that fills the entire
+ * framebuffer.
+ */
+#define R300_RE_POINTSIZE 0x421C
+# define R300_POINTSIZE_Y_SHIFT 0
+# define R300_POINTSIZE_Y_MASK (0xFFFF << 0) /* GUESS */
+# define R300_POINTSIZE_X_SHIFT 16
+# define R300_POINTSIZE_X_MASK (0xFFFF << 16) /* GUESS */
+# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6)
+
+/* The line width is given in multiples of 6.
+ * In default mode lines are classified as vertical lines.
+ * HO: horizontal
+ * VE: vertical or horizontal
+ * HO & VE: no classification
+ */
+#define R300_RE_LINE_CNT 0x4234
+# define R300_LINESIZE_SHIFT 0
+# define R300_LINESIZE_MASK (0xFFFF << 0) /* GUESS */
+# define R300_LINESIZE_MAX (R300_LINESIZE_MASK / 6)
+# define R300_LINE_CNT_HO (1 << 16)
+# define R300_LINE_CNT_VE (1 << 17)
+
+/* Some sort of scale or clamp value for texcoordless textures. */
+#define R300_RE_UNK4238 0x4238
+
+/* Something shade related */
+#define R300_RE_SHADE 0x4274
+
+#define R300_RE_SHADE_MODEL 0x4278
+# define R300_RE_SHADE_MODEL_SMOOTH 0x3aaaa
+# define R300_RE_SHADE_MODEL_FLAT 0x39595
+
+/* Dangerous */
+#define R300_RE_POLYGON_MODE 0x4288
+# define R300_PM_ENABLED (1 << 0)
+# define R300_PM_FRONT_POINT (0 << 0)
+# define R300_PM_BACK_POINT (0 << 0)
+# define R300_PM_FRONT_LINE (1 << 4)
+# define R300_PM_FRONT_FILL (1 << 5)
+# define R300_PM_BACK_LINE (1 << 7)
+# define R300_PM_BACK_FILL (1 << 8)
+
+/* Fog parameters */
+#define R300_RE_FOG_SCALE 0x4294
+#define R300_RE_FOG_START 0x4298
+
+/* Not sure why there are duplicate of factor and constant values.
+ * My best guess so far is that there are seperate zbiases for test and write.
+ * Ordering might be wrong.
+ * Some of the tests indicate that fgl has a fallback implementation of zbias
+ * via pixel shaders.
+ */
+#define R300_RE_ZBIAS_CNTL 0x42A0 /* GUESS */
+#define R300_RE_ZBIAS_T_FACTOR 0x42A4
+#define R300_RE_ZBIAS_T_CONSTANT 0x42A8
+#define R300_RE_ZBIAS_W_FACTOR 0x42AC
+#define R300_RE_ZBIAS_W_CONSTANT 0x42B0
+
+/* This register needs to be set to (1<<1) for RV350 to correctly
+ * perform depth test (see --vb-triangles in r300_demo)
+ * Don't know about other chips. - Vladimir
+ * This is set to 3 when GL_POLYGON_OFFSET_FILL is on.
+ * My guess is that there are two bits for each zbias primitive
+ * (FILL, LINE, POINT).
+ * One to enable depth test and one for depth write.
+ * Yet this doesnt explain why depth writes work ...
+ */
+#define R300_RE_OCCLUSION_CNTL 0x42B4
+# define R300_OCCLUSION_ON (1<<1)
+
+#define R300_RE_CULL_CNTL 0x42B8
+# define R300_CULL_FRONT (1 << 0)
+# define R300_CULL_BACK (1 << 1)
+# define R300_FRONT_FACE_CCW (0 << 2)
+# define R300_FRONT_FACE_CW (1 << 2)
+
+
+/* BEGIN: Rasterization / Interpolators - many guesses */
+
+/* 0_UNKNOWN_18 has always been set except for clear operations.
+ * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends
+ * on the vertex program, *not* the fragment program)
+ */
+#define R300_RS_CNTL_0 0x4300
+# define R300_RS_CNTL_TC_CNT_SHIFT 2
+# define R300_RS_CNTL_TC_CNT_MASK (7 << 2)
+ /* number of color interpolators used */
+# define R300_RS_CNTL_CI_CNT_SHIFT 7
+# define R300_RS_CNTL_0_UNKNOWN_18 (1 << 18)
+ /* Guess: RS_CNTL_1 holds the index of the highest used RS_ROUTE_n
+ register. */
+#define R300_RS_CNTL_1 0x4304
+
+/* gap */
+
+/* Only used for texture coordinates.
+ * Use the source field to route texture coordinate input from the
+ * vertex program to the desired interpolator. Note that the source
+ * field is relative to the outputs the vertex program *actually*
+ * writes. If a vertex program only writes texcoord[1], this will
+ * be source index 0.
+ * Set INTERP_USED on all interpolators that produce data used by
+ * the fragment program. INTERP_USED looks like a swizzling mask,
+ * but I haven't seen it used that way.
+ *
+ * Note: The _UNKNOWN constants are always set in their respective
+ * register. I don't know if this is necessary.
+ */
+#define R300_RS_INTERP_0 0x4310
+#define R300_RS_INTERP_1 0x4314
+# define R300_RS_INTERP_1_UNKNOWN 0x40
+#define R300_RS_INTERP_2 0x4318
+# define R300_RS_INTERP_2_UNKNOWN 0x80
+#define R300_RS_INTERP_3 0x431C
+# define R300_RS_INTERP_3_UNKNOWN 0xC0
+#define R300_RS_INTERP_4 0x4320
+#define R300_RS_INTERP_5 0x4324
+#define R300_RS_INTERP_6 0x4328
+#define R300_RS_INTERP_7 0x432C
+# define R300_RS_INTERP_SRC_SHIFT 2
+# define R300_RS_INTERP_SRC_MASK (7 << 2)
+# define R300_RS_INTERP_USED 0x00D10000
+
+/* These DWORDs control how vertex data is routed into fragment program
+ * registers, after interpolators.
+ */
+#define R300_RS_ROUTE_0 0x4330
+#define R300_RS_ROUTE_1 0x4334
+#define R300_RS_ROUTE_2 0x4338
+#define R300_RS_ROUTE_3 0x433C /* GUESS */
+#define R300_RS_ROUTE_4 0x4340 /* GUESS */
+#define R300_RS_ROUTE_5 0x4344 /* GUESS */
+#define R300_RS_ROUTE_6 0x4348 /* GUESS */
+#define R300_RS_ROUTE_7 0x434C /* GUESS */
+# define R300_RS_ROUTE_SOURCE_INTERP_0 0
+# define R300_RS_ROUTE_SOURCE_INTERP_1 1
+# define R300_RS_ROUTE_SOURCE_INTERP_2 2
+# define R300_RS_ROUTE_SOURCE_INTERP_3 3
+# define R300_RS_ROUTE_SOURCE_INTERP_4 4
+# define R300_RS_ROUTE_SOURCE_INTERP_5 5 /* GUESS */
+# define R300_RS_ROUTE_SOURCE_INTERP_6 6 /* GUESS */
+# define R300_RS_ROUTE_SOURCE_INTERP_7 7 /* GUESS */
+# define R300_RS_ROUTE_ENABLE (1 << 3) /* GUESS */
+# define R300_RS_ROUTE_DEST_SHIFT 6
+# define R300_RS_ROUTE_DEST_MASK (31 << 6) /* GUESS */
+
+/* Special handling for color: When the fragment program uses color,
+ * the ROUTE_0_COLOR bit is set and ROUTE_0_COLOR_DEST contains the
+ * color register index.
+ *
+ * Apperently you may set the R300_RS_ROUTE_0_COLOR bit, but not provide any
+ * R300_RS_ROUTE_0_COLOR_DEST value; this setup is used for clearing the state.
+ * See r300_ioctl.c:r300EmitClearState. I'm not sure if this setup is strictly
+ * correct or not. - Oliver.
+ */
+# define R300_RS_ROUTE_0_COLOR (1 << 14)
+# define R300_RS_ROUTE_0_COLOR_DEST_SHIFT 17
+# define R300_RS_ROUTE_0_COLOR_DEST_MASK (31 << 17) /* GUESS */
+/* As above, but for secondary color */
+# define R300_RS_ROUTE_1_COLOR1 (1 << 14)
+# define R300_RS_ROUTE_1_COLOR1_DEST_SHIFT 17
+# define R300_RS_ROUTE_1_COLOR1_DEST_MASK (31 << 17)
+# define R300_RS_ROUTE_1_UNKNOWN11 (1 << 11)
+/* END: Rasterization / Interpolators - many guesses */
+
+/* BEGIN: Scissors and cliprects */
+
+/* There are four clipping rectangles. Their corner coordinates are inclusive.
+ * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
+ * on whether the pixel is inside cliprects 0-3, respectively. For example,
+ * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
+ * the number 3 (binary 0011).
+ * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set,
+ * the pixel is rasterized.
+ *
+ * In addition to this, there is a scissors rectangle. Only pixels inside the
+ * scissors rectangle are drawn. (coordinates are inclusive)
+ *
+ * For some reason, the top-left corner of the framebuffer is at (1440, 1440)
+ * for the purpose of clipping and scissors.
+ */
+#define R300_RE_CLIPRECT_TL_0 0x43B0
+#define R300_RE_CLIPRECT_BR_0 0x43B4
+#define R300_RE_CLIPRECT_TL_1 0x43B8
+#define R300_RE_CLIPRECT_BR_1 0x43BC
+#define R300_RE_CLIPRECT_TL_2 0x43C0
+#define R300_RE_CLIPRECT_BR_2 0x43C4
+#define R300_RE_CLIPRECT_TL_3 0x43C8
+#define R300_RE_CLIPRECT_BR_3 0x43CC
+# define R300_CLIPRECT_OFFSET 1440
+# define R300_CLIPRECT_MASK 0x1FFF
+# define R300_CLIPRECT_X_SHIFT 0
+# define R300_CLIPRECT_X_MASK (0x1FFF << 0)
+# define R300_CLIPRECT_Y_SHIFT 13
+# define R300_CLIPRECT_Y_MASK (0x1FFF << 13)
+#define R300_RE_CLIPRECT_CNTL 0x43D0
+# define R300_CLIP_OUT (1 << 0)
+# define R300_CLIP_0 (1 << 1)
+# define R300_CLIP_1 (1 << 2)
+# define R300_CLIP_10 (1 << 3)
+# define R300_CLIP_2 (1 << 4)
+# define R300_CLIP_20 (1 << 5)
+# define R300_CLIP_21 (1 << 6)
+# define R300_CLIP_210 (1 << 7)
+# define R300_CLIP_3 (1 << 8)
+# define R300_CLIP_30 (1 << 9)
+# define R300_CLIP_31 (1 << 10)
+# define R300_CLIP_310 (1 << 11)
+# define R300_CLIP_32 (1 << 12)
+# define R300_CLIP_320 (1 << 13)
+# define R300_CLIP_321 (1 << 14)
+# define R300_CLIP_3210 (1 << 15)
+
+/* gap */
+
+#define R300_RE_SCISSORS_TL 0x43E0
+#define R300_RE_SCISSORS_BR 0x43E4
+# define R300_SCISSORS_OFFSET 1440
+# define R300_SCISSORS_X_SHIFT 0
+# define R300_SCISSORS_X_MASK (0x1FFF << 0)
+# define R300_SCISSORS_Y_SHIFT 13
+# define R300_SCISSORS_Y_MASK (0x1FFF << 13)
+/* END: Scissors and cliprects */
+
+/* BEGIN: Texture specification */
+
+/*
+ * The texture specification dwords are grouped by meaning and not by texture
+ * unit. This means that e.g. the offset for texture image unit N is found in
+ * register TX_OFFSET_0 + (4*N)
+ */
+#define R300_TX_FILTER_0 0x4400
+# define R300_TX_REPEAT 0
+# define R300_TX_MIRRORED 1
+# define R300_TX_CLAMP 4
+# define R300_TX_CLAMP_TO_EDGE 2
+# define R300_TX_CLAMP_TO_BORDER 6
+# define R300_TX_WRAP_S_SHIFT 0
+# define R300_TX_WRAP_S_MASK (7 << 0)
+# define R300_TX_WRAP_T_SHIFT 3
+# define R300_TX_WRAP_T_MASK (7 << 3)
+# define R300_TX_WRAP_Q_SHIFT 6
+# define R300_TX_WRAP_Q_MASK (7 << 6)
+# define R300_TX_MAG_FILTER_NEAREST (1 << 9)
+# define R300_TX_MAG_FILTER_LINEAR (2 << 9)
+# define R300_TX_MAG_FILTER_MASK (3 << 9)
+# define R300_TX_MIN_FILTER_NEAREST (1 << 11)
+# define R300_TX_MIN_FILTER_LINEAR (2 << 11)
+# define R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST (5 << 11)
+# define R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR (9 << 11)
+# define R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST (6 << 11)
+# define R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR (10 << 11)
+
+/* NOTE: NEAREST doesnt seem to exist.
+ * Im not seting MAG_FILTER_MASK and (3 << 11) on for all
+ * anisotropy modes because that would void selected mag filter
+ */
+# define R300_TX_MIN_FILTER_ANISO_NEAREST (0 << 13)
+# define R300_TX_MIN_FILTER_ANISO_LINEAR (0 << 13)
+# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (1 << 13)
+# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR (2 << 13)
+# define R300_TX_MIN_FILTER_MASK ( (15 << 11) | (3 << 13) )
+# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21)
+# define R300_TX_MAX_ANISO_2_TO_1 (2 << 21)
+# define R300_TX_MAX_ANISO_4_TO_1 (4 << 21)
+# define R300_TX_MAX_ANISO_8_TO_1 (6 << 21)
+# define R300_TX_MAX_ANISO_16_TO_1 (8 << 21)
+# define R300_TX_MAX_ANISO_MASK (14 << 21)
+
+#define R300_TX_FILTER1_0 0x4440
+# define R300_CHROMA_KEY_MODE_DISABLE 0
+# define R300_CHROMA_KEY_FORCE 1
+# define R300_CHROMA_KEY_BLEND 2
+# define R300_MC_ROUND_NORMAL (0<<2)
+# define R300_MC_ROUND_MPEG4 (1<<2)
+# define R300_LOD_BIAS_MASK 0x1fff
+# define R300_EDGE_ANISO_EDGE_DIAG (0<<13)
+# define R300_EDGE_ANISO_EDGE_ONLY (1<<13)
+# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14)
+# define R300_MC_COORD_TRUNCATE_MPEG (1<<14)
+# define R300_TX_TRI_PERF_0_8 (0<<15)
+# define R300_TX_TRI_PERF_1_8 (1<<15)
+# define R300_TX_TRI_PERF_1_4 (2<<15)
+# define R300_TX_TRI_PERF_3_8 (3<<15)
+# define R300_ANISO_THRESHOLD_MASK (7<<17)
+
+#define R300_TX_SIZE_0 0x4480
+# define R300_TX_WIDTHMASK_SHIFT 0
+# define R300_TX_WIDTHMASK_MASK (2047 << 0)
+# define R300_TX_HEIGHTMASK_SHIFT 11
+# define R300_TX_HEIGHTMASK_MASK (2047 << 11)
+# define R300_TX_UNK23 (1 << 23)
+# define R300_TX_MAX_MIP_LEVEL_SHIFT 26
+# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26)
+# define R300_TX_SIZE_PROJECTED (1<<30)
+# define R300_TX_SIZE_TXPITCH_EN (1<<31)
+#define R300_TX_FORMAT_0 0x44C0
+ /* The interpretation of the format word by Wladimir van der Laan */
+ /* The X, Y, Z and W refer to the layout of the components.
+ They are given meanings as R, G, B and Alpha by the swizzle
+ specification */
+# define R300_TX_FORMAT_X8 0x0
+# define R300_TX_FORMAT_X16 0x1
+# define R300_TX_FORMAT_Y4X4 0x2
+# define R300_TX_FORMAT_Y8X8 0x3
+# define R300_TX_FORMAT_Y16X16 0x4
+# define R300_TX_FORMAT_Z3Y3X2 0x5
+# define R300_TX_FORMAT_Z5Y6X5 0x6
+# define R300_TX_FORMAT_Z6Y5X5 0x7
+# define R300_TX_FORMAT_Z11Y11X10 0x8
+# define R300_TX_FORMAT_Z10Y11X11 0x9
+# define R300_TX_FORMAT_W4Z4Y4X4 0xA
+# define R300_TX_FORMAT_W1Z5Y5X5 0xB
+# define R300_TX_FORMAT_W8Z8Y8X8 0xC
+# define R300_TX_FORMAT_W2Z10Y10X10 0xD
+# define R300_TX_FORMAT_W16Z16Y16X16 0xE
+# define R300_TX_FORMAT_DXT1 0xF
+# define R300_TX_FORMAT_DXT3 0x10
+# define R300_TX_FORMAT_DXT5 0x11
+# define R300_TX_FORMAT_D3DMFT_CxV8U8 0x12 /* no swizzle */
+# define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */
+# define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */
+# define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */
+ /* 0x16 - some 16 bit green format.. ?? */
+# define R300_TX_FORMAT_UNK25 (1 << 25) /* no swizzle */
+# define R300_TX_FORMAT_CUBIC_MAP (1 << 26)
+
+ /* gap */
+ /* Floating point formats */
+ /* Note - hardware supports both 16 and 32 bit floating point */
+# define R300_TX_FORMAT_FL_I16 0x18
+# define R300_TX_FORMAT_FL_I16A16 0x19
+# define R300_TX_FORMAT_FL_R16G16B16A16 0x1A
+# define R300_TX_FORMAT_FL_I32 0x1B
+# define R300_TX_FORMAT_FL_I32A32 0x1C
+# define R300_TX_FORMAT_FL_R32G32B32A32 0x1D
+ /* alpha modes, convenience mostly */
+ /* if you have alpha, pick constant appropriate to the
+ number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */
+# define R300_TX_FORMAT_ALPHA_1CH 0x000
+# define R300_TX_FORMAT_ALPHA_2CH 0x200
+# define R300_TX_FORMAT_ALPHA_4CH 0x600
+# define R300_TX_FORMAT_ALPHA_NONE 0xA00
+ /* Swizzling */
+ /* constants */
+# define R300_TX_FORMAT_X 0
+# define R300_TX_FORMAT_Y 1
+# define R300_TX_FORMAT_Z 2
+# define R300_TX_FORMAT_W 3
+# define R300_TX_FORMAT_ZERO 4
+# define R300_TX_FORMAT_ONE 5
+ /* 2.0*Z, everything above 1.0 is set to 0.0 */
+# define R300_TX_FORMAT_CUT_Z 6
+ /* 2.0*W, everything above 1.0 is set to 0.0 */
+# define R300_TX_FORMAT_CUT_W 7
+
+# define R300_TX_FORMAT_B_SHIFT 18
+# define R300_TX_FORMAT_G_SHIFT 15
+# define R300_TX_FORMAT_R_SHIFT 12
+# define R300_TX_FORMAT_A_SHIFT 9
+ /* Convenience macro to take care of layout and swizzling */
+# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \
+ ((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \
+ | ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \
+ | ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \
+ | ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \
+ | (R300_TX_FORMAT_##FMT) \
+ )
+ /* These can be ORed with result of R300_EASY_TX_FORMAT()
+ We don't really know what they do. Take values from a
+ constant color ? */
+# define R300_TX_FORMAT_CONST_X (1<<5)
+# define R300_TX_FORMAT_CONST_Y (2<<5)
+# define R300_TX_FORMAT_CONST_Z (4<<5)
+# define R300_TX_FORMAT_CONST_W (8<<5)
+
+# define R300_TX_FORMAT_YUV_MODE 0x00800000
+
+#define R300_TX_PITCH_0 0x4500 /* obvious missing in gap */
+#define R300_TX_OFFSET_0 0x4540
+ /* BEGIN: Guess from R200 */
+# define R300_TXO_ENDIAN_NO_SWAP (0 << 0)
+# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0)
+# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0)
+# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+# define R300_TXO_MACRO_TILE (1 << 2)
+# define R300_TXO_MICRO_TILE (1 << 3)
+# define R300_TXO_OFFSET_MASK 0xffffffe0
+# define R300_TXO_OFFSET_SHIFT 5
+ /* END: Guess from R200 */
+
+/* 32 bit chroma key */
+#define R300_TX_CHROMA_KEY_0 0x4580
+/* ff00ff00 == { 0, 1.0, 0, 1.0 } */
+#define R300_TX_BORDER_COLOR_0 0x45C0
+
+/* END: Texture specification */
+
+/* BEGIN: Fragment program instruction set */
+
+/* Fragment programs are written directly into register space.
+ * There are separate instruction streams for texture instructions and ALU
+ * instructions.
+ * In order to synchronize these streams, the program is divided into up
+ * to 4 nodes. Each node begins with a number of TEX operations, followed
+ * by a number of ALU operations.
+ * The first node can have zero TEX ops, all subsequent nodes must have at
+ * least
+ * one TEX ops.
+ * All nodes must have at least one ALU op.
+ *
+ * The index of the last node is stored in PFS_CNTL_0: A value of 0 means
+ * 1 node, a value of 3 means 4 nodes.
+ * The total amount of instructions is defined in PFS_CNTL_2. The offsets are
+ * offsets into the respective instruction streams, while *_END points to the
+ * last instruction relative to this offset.
+ */
+#define R300_PFS_CNTL_0 0x4600
+# define R300_PFS_CNTL_LAST_NODES_SHIFT 0
+# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0)
+# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3)
+#define R300_PFS_CNTL_1 0x4604
+/* There is an unshifted value here which has so far always been equal to the
+ * index of the highest used temporary register.
+ */
+#define R300_PFS_CNTL_2 0x4608
+# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0
+# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0)
+# define R300_PFS_CNTL_ALU_END_SHIFT 6
+# define R300_PFS_CNTL_ALU_END_MASK (63 << 6)
+# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 12
+# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 12) /* GUESS */
+# define R300_PFS_CNTL_TEX_END_SHIFT 18
+# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* GUESS */
+
+/* gap */
+
+/* Nodes are stored backwards. The last active node is always stored in
+ * PFS_NODE_3.
+ * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The
+ * first node is stored in NODE_2, the second node is stored in NODE_3.
+ *
+ * Offsets are relative to the master offset from PFS_CNTL_2.
+ * LAST_NODE is set for the last node, and only for the last node.
+ */
+#define R300_PFS_NODE_0 0x4610
+#define R300_PFS_NODE_1 0x4614
+#define R300_PFS_NODE_2 0x4618
+#define R300_PFS_NODE_3 0x461C
+# define R300_PFS_NODE_ALU_OFFSET_SHIFT 0
+# define R300_PFS_NODE_ALU_OFFSET_MASK (63 << 0)
+# define R300_PFS_NODE_ALU_END_SHIFT 6
+# define R300_PFS_NODE_ALU_END_MASK (63 << 6)
+# define R300_PFS_NODE_TEX_OFFSET_SHIFT 12
+# define R300_PFS_NODE_TEX_OFFSET_MASK (31 << 12)
+# define R300_PFS_NODE_TEX_END_SHIFT 17
+# define R300_PFS_NODE_TEX_END_MASK (31 << 17)
+/*# define R300_PFS_NODE_LAST_NODE (1 << 22) */
+# define R300_PFS_NODE_OUTPUT_COLOR (1 << 22)
+# define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23)
+
+/* TEX
+ * As far as I can tell, texture instructions cannot write into output
+ * registers directly. A subsequent ALU instruction is always necessary,
+ * even if it's just MAD o0, r0, 1, 0
+ */
+#define R300_PFS_TEXI_0 0x4620
+# define R300_FPITX_SRC_SHIFT 0
+# define R300_FPITX_SRC_MASK (31 << 0)
+ /* GUESS */
+# define R300_FPITX_SRC_CONST (1 << 5)
+# define R300_FPITX_DST_SHIFT 6
+# define R300_FPITX_DST_MASK (31 << 6)
+# define R300_FPITX_IMAGE_SHIFT 11
+ /* GUESS based on layout and native limits */
+# define R300_FPITX_IMAGE_MASK (15 << 11)
+/* Unsure if these are opcodes, or some kind of bitfield, but this is how
+ * they were set when I checked
+ */
+# define R300_FPITX_OPCODE_SHIFT 15
+# define R300_FPITX_OP_TEX 1
+# define R300_FPITX_OP_KIL 2
+# define R300_FPITX_OP_TXP 3
+# define R300_FPITX_OP_TXB 4
+# define R300_FPITX_OPCODE_MASK (7 << 15)
+
+/* ALU
+ * The ALU instructions register blocks are enumerated according to the order
+ * in which fglrx. I assume there is space for 64 instructions, since
+ * each block has space for a maximum of 64 DWORDs, and this matches reported
+ * native limits.
+ *
+ * The basic functional block seems to be one MAD for each color and alpha,
+ * and an adder that adds all components after the MUL.
+ * - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands
+ * - DP4: Use OUTC_DP4, OUTA_DP4
+ * - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands
+ * - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands
+ * - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1
+ * - CMP: If ARG2 < 0, return ARG1, else return ARG0
+ * - FLR: use FRC+MAD
+ * - XPD: use MAD+MAD
+ * - SGE, SLT: use MAD+CMP
+ * - RSQ: use ABS modifier for argument
+ * - Use OUTC_REPL_ALPHA to write results of an alpha-only operation
+ * (e.g. RCP) into color register
+ * - apparently, there's no quick DST operation
+ * - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2"
+ * - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0"
+ * - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1"
+ *
+ * Operand selection
+ * First stage selects three sources from the available registers and
+ * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha).
+ * fglrx sorts the three source fields: Registers before constants,
+ * lower indices before higher indices; I do not know whether this is
+ * necessary.
+ *
+ * fglrx fills unused sources with "read constant 0"
+ * According to specs, you cannot select more than two different constants.
+ *
+ * Second stage selects the operands from the sources. This is defined in
+ * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants
+ * zero and one.
+ * Swizzling and negation happens in this stage, as well.
+ *
+ * Important: Color and alpha seem to be mostly separate, i.e. their sources
+ * selection appears to be fully independent (the register storage is probably
+ * physically split into a color and an alpha section).
+ * However (because of the apparent physical split), there is some interaction
+ * WRT swizzling. If, for example, you want to load an R component into an
+ * Alpha operand, this R component is taken from a *color* source, not from
+ * an alpha source. The corresponding register doesn't even have to appear in
+ * the alpha sources list. (I hope this all makes sense to you)
+ *
+ * Destination selection
+ * The destination register index is in FPI1 (color) and FPI3 (alpha)
+ * together with enable bits.
+ * There are separate enable bits for writing into temporary registers
+ * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_*
+ * /DSTA_OUTPUT). You can write to both at once, or not write at all (the
+ * same index must be used for both).
+ *
+ * Note: There is a special form for LRP
+ * - Argument order is the same as in ARB_fragment_program.
+ * - Operation is MAD
+ * - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP
+ * - Set FPI0/FPI2_SPECIAL_LRP
+ * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD
+ */
+#define R300_PFS_INSTR1_0 0x46C0
+# define R300_FPI1_SRC0C_SHIFT 0
+# define R300_FPI1_SRC0C_MASK (31 << 0)
+# define R300_FPI1_SRC0C_CONST (1 << 5)
+# define R300_FPI1_SRC1C_SHIFT 6
+# define R300_FPI1_SRC1C_MASK (31 << 6)
+# define R300_FPI1_SRC1C_CONST (1 << 11)
+# define R300_FPI1_SRC2C_SHIFT 12
+# define R300_FPI1_SRC2C_MASK (31 << 12)
+# define R300_FPI1_SRC2C_CONST (1 << 17)
+# define R300_FPI1_SRC_MASK 0x0003ffff
+# define R300_FPI1_DSTC_SHIFT 18
+# define R300_FPI1_DSTC_MASK (31 << 18)
+# define R300_FPI1_DSTC_REG_MASK_SHIFT 23
+# define R300_FPI1_DSTC_REG_X (1 << 23)
+# define R300_FPI1_DSTC_REG_Y (1 << 24)
+# define R300_FPI1_DSTC_REG_Z (1 << 25)
+# define R300_FPI1_DSTC_OUTPUT_MASK_SHIFT 26
+# define R300_FPI1_DSTC_OUTPUT_X (1 << 26)
+# define R300_FPI1_DSTC_OUTPUT_Y (1 << 27)
+# define R300_FPI1_DSTC_OUTPUT_Z (1 << 28)
+
+#define R300_PFS_INSTR3_0 0x47C0
+# define R300_FPI3_SRC0A_SHIFT 0
+# define R300_FPI3_SRC0A_MASK (31 << 0)
+# define R300_FPI3_SRC0A_CONST (1 << 5)
+# define R300_FPI3_SRC1A_SHIFT 6
+# define R300_FPI3_SRC1A_MASK (31 << 6)
+# define R300_FPI3_SRC1A_CONST (1 << 11)
+# define R300_FPI3_SRC2A_SHIFT 12
+# define R300_FPI3_SRC2A_MASK (31 << 12)
+# define R300_FPI3_SRC2A_CONST (1 << 17)
+# define R300_FPI3_SRC_MASK 0x0003ffff
+# define R300_FPI3_DSTA_SHIFT 18
+# define R300_FPI3_DSTA_MASK (31 << 18)
+# define R300_FPI3_DSTA_REG (1 << 23)
+# define R300_FPI3_DSTA_OUTPUT (1 << 24)
+# define R300_FPI3_DSTA_DEPTH (1 << 27)
+
+#define R300_PFS_INSTR0_0 0x48C0
+# define R300_FPI0_ARGC_SRC0C_XYZ 0
+# define R300_FPI0_ARGC_SRC0C_XXX 1
+# define R300_FPI0_ARGC_SRC0C_YYY 2
+# define R300_FPI0_ARGC_SRC0C_ZZZ 3
+# define R300_FPI0_ARGC_SRC1C_XYZ 4
+# define R300_FPI0_ARGC_SRC1C_XXX 5
+# define R300_FPI0_ARGC_SRC1C_YYY 6
+# define R300_FPI0_ARGC_SRC1C_ZZZ 7
+# define R300_FPI0_ARGC_SRC2C_XYZ 8
+# define R300_FPI0_ARGC_SRC2C_XXX 9
+# define R300_FPI0_ARGC_SRC2C_YYY 10
+# define R300_FPI0_ARGC_SRC2C_ZZZ 11
+# define R300_FPI0_ARGC_SRC0A 12
+# define R300_FPI0_ARGC_SRC1A 13
+# define R300_FPI0_ARGC_SRC2A 14
+# define R300_FPI0_ARGC_SRC1C_LRP 15
+# define R300_FPI0_ARGC_ZERO 20
+# define R300_FPI0_ARGC_ONE 21
+ /* GUESS */
+# define R300_FPI0_ARGC_HALF 22
+# define R300_FPI0_ARGC_SRC0C_YZX 23
+# define R300_FPI0_ARGC_SRC1C_YZX 24
+# define R300_FPI0_ARGC_SRC2C_YZX 25
+# define R300_FPI0_ARGC_SRC0C_ZXY 26
+# define R300_FPI0_ARGC_SRC1C_ZXY 27
+# define R300_FPI0_ARGC_SRC2C_ZXY 28
+# define R300_FPI0_ARGC_SRC0CA_WZY 29
+# define R300_FPI0_ARGC_SRC1CA_WZY 30
+# define R300_FPI0_ARGC_SRC2CA_WZY 31
+
+# define R300_FPI0_ARG0C_SHIFT 0
+# define R300_FPI0_ARG0C_MASK (31 << 0)
+# define R300_FPI0_ARG0C_NEG (1 << 5)
+# define R300_FPI0_ARG0C_ABS (1 << 6)
+# define R300_FPI0_ARG1C_SHIFT 7
+# define R300_FPI0_ARG1C_MASK (31 << 7)
+# define R300_FPI0_ARG1C_NEG (1 << 12)
+# define R300_FPI0_ARG1C_ABS (1 << 13)
+# define R300_FPI0_ARG2C_SHIFT 14
+# define R300_FPI0_ARG2C_MASK (31 << 14)
+# define R300_FPI0_ARG2C_NEG (1 << 19)
+# define R300_FPI0_ARG2C_ABS (1 << 20)
+# define R300_FPI0_SPECIAL_LRP (1 << 21)
+# define R300_FPI0_OUTC_MAD (0 << 23)
+# define R300_FPI0_OUTC_DP3 (1 << 23)
+# define R300_FPI0_OUTC_DP4 (2 << 23)
+# define R300_FPI0_OUTC_MIN (4 << 23)
+# define R300_FPI0_OUTC_MAX (5 << 23)
+# define R300_FPI0_OUTC_CMPH (7 << 23)
+# define R300_FPI0_OUTC_CMP (8 << 23)
+# define R300_FPI0_OUTC_FRC (9 << 23)
+# define R300_FPI0_OUTC_REPL_ALPHA (10 << 23)
+# define R300_FPI0_OUTC_SAT (1 << 30)
+# define R300_FPI0_INSERT_NOP (1 << 31)
+
+#define R300_PFS_INSTR2_0 0x49C0
+# define R300_FPI2_ARGA_SRC0C_X 0
+# define R300_FPI2_ARGA_SRC0C_Y 1
+# define R300_FPI2_ARGA_SRC0C_Z 2
+# define R300_FPI2_ARGA_SRC1C_X 3
+# define R300_FPI2_ARGA_SRC1C_Y 4
+# define R300_FPI2_ARGA_SRC1C_Z 5
+# define R300_FPI2_ARGA_SRC2C_X 6
+# define R300_FPI2_ARGA_SRC2C_Y 7
+# define R300_FPI2_ARGA_SRC2C_Z 8
+# define R300_FPI2_ARGA_SRC0A 9
+# define R300_FPI2_ARGA_SRC1A 10
+# define R300_FPI2_ARGA_SRC2A 11
+# define R300_FPI2_ARGA_SRC1A_LRP 15
+# define R300_FPI2_ARGA_ZERO 16
+# define R300_FPI2_ARGA_ONE 17
+ /* GUESS */
+# define R300_FPI2_ARGA_HALF 18
+# define R300_FPI2_ARG0A_SHIFT 0
+# define R300_FPI2_ARG0A_MASK (31 << 0)
+# define R300_FPI2_ARG0A_NEG (1 << 5)
+ /* GUESS */
+# define R300_FPI2_ARG0A_ABS (1 << 6)
+# define R300_FPI2_ARG1A_SHIFT 7
+# define R300_FPI2_ARG1A_MASK (31 << 7)
+# define R300_FPI2_ARG1A_NEG (1 << 12)
+ /* GUESS */
+# define R300_FPI2_ARG1A_ABS (1 << 13)
+# define R300_FPI2_ARG2A_SHIFT 14
+# define R300_FPI2_ARG2A_MASK (31 << 14)
+# define R300_FPI2_ARG2A_NEG (1 << 19)
+ /* GUESS */
+# define R300_FPI2_ARG2A_ABS (1 << 20)
+# define R300_FPI2_SPECIAL_LRP (1 << 21)
+# define R300_FPI2_OUTA_MAD (0 << 23)
+# define R300_FPI2_OUTA_DP4 (1 << 23)
+# define R300_FPI2_OUTA_MIN (2 << 23)
+# define R300_FPI2_OUTA_MAX (3 << 23)
+# define R300_FPI2_OUTA_CMP (6 << 23)
+# define R300_FPI2_OUTA_FRC (7 << 23)
+# define R300_FPI2_OUTA_EX2 (8 << 23)
+# define R300_FPI2_OUTA_LG2 (9 << 23)
+# define R300_FPI2_OUTA_RCP (10 << 23)
+# define R300_FPI2_OUTA_RSQ (11 << 23)
+# define R300_FPI2_OUTA_SAT (1 << 30)
+# define R300_FPI2_UNKNOWN_31 (1 << 31)
+/* END: Fragment program instruction set */
+
+/* Fog state and color */
+#define R300_RE_FOG_STATE 0x4BC0
+# define R300_FOG_ENABLE (1 << 0)
+# define R300_FOG_MODE_LINEAR (0 << 1)
+# define R300_FOG_MODE_EXP (1 << 1)
+# define R300_FOG_MODE_EXP2 (2 << 1)
+# define R300_FOG_MODE_MASK (3 << 1)
+#define R300_FOG_COLOR_R 0x4BC8
+#define R300_FOG_COLOR_G 0x4BCC
+#define R300_FOG_COLOR_B 0x4BD0
+
+#define R300_PP_ALPHA_TEST 0x4BD4
+# define R300_REF_ALPHA_MASK 0x000000ff
+# define R300_ALPHA_TEST_FAIL (0 << 8)
+# define R300_ALPHA_TEST_LESS (1 << 8)
+# define R300_ALPHA_TEST_LEQUAL (3 << 8)
+# define R300_ALPHA_TEST_EQUAL (2 << 8)
+# define R300_ALPHA_TEST_GEQUAL (6 << 8)
+# define R300_ALPHA_TEST_GREATER (4 << 8)
+# define R300_ALPHA_TEST_NEQUAL (5 << 8)
+# define R300_ALPHA_TEST_PASS (7 << 8)
+# define R300_ALPHA_TEST_OP_MASK (7 << 8)
+# define R300_ALPHA_TEST_ENABLE (1 << 11)
+
+/* gap */
+
+/* Fragment program parameters in 7.16 floating point */
+#define R300_PFS_PARAM_0_X 0x4C00
+#define R300_PFS_PARAM_0_Y 0x4C04
+#define R300_PFS_PARAM_0_Z 0x4C08
+#define R300_PFS_PARAM_0_W 0x4C0C
+/* GUESS: PARAM_31 is last, based on native limits reported by fglrx */
+#define R300_PFS_PARAM_31_X 0x4DF0
+#define R300_PFS_PARAM_31_Y 0x4DF4
+#define R300_PFS_PARAM_31_Z 0x4DF8
+#define R300_PFS_PARAM_31_W 0x4DFC
+
+/* Notes:
+ * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in
+ * the application
+ * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND
+ * are set to the same
+ * function (both registers are always set up completely in any case)
+ * - Most blend flags are simply copied from R200 and not tested yet
+ */
+#define R300_RB3D_CBLEND 0x4E04
+#define R300_RB3D_ABLEND 0x4E08
+/* the following only appear in CBLEND */
+# define R300_BLEND_ENABLE (1 << 0)
+# define R300_BLEND_UNKNOWN (3 << 1)
+# define R300_BLEND_NO_SEPARATE (1 << 3)
+/* the following are shared between CBLEND and ABLEND */
+# define R300_FCN_MASK (3 << 12)
+# define R300_COMB_FCN_ADD_CLAMP (0 << 12)
+# define R300_COMB_FCN_ADD_NOCLAMP (1 << 12)
+# define R300_COMB_FCN_SUB_CLAMP (2 << 12)
+# define R300_COMB_FCN_SUB_NOCLAMP (3 << 12)
+# define R300_COMB_FCN_MIN (4 << 12)
+# define R300_COMB_FCN_MAX (5 << 12)
+# define R300_COMB_FCN_RSUB_CLAMP (6 << 12)
+# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12)
+# define R300_BLEND_GL_ZERO (32)
+# define R300_BLEND_GL_ONE (33)
+# define R300_BLEND_GL_SRC_COLOR (34)
+# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35)
+# define R300_BLEND_GL_DST_COLOR (36)
+# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37)
+# define R300_BLEND_GL_SRC_ALPHA (38)
+# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39)
+# define R300_BLEND_GL_DST_ALPHA (40)
+# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41)
+# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42)
+# define R300_BLEND_GL_CONST_COLOR (43)
+# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44)
+# define R300_BLEND_GL_CONST_ALPHA (45)
+# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46)
+# define R300_BLEND_MASK (63)
+# define R300_SRC_BLEND_SHIFT (16)
+# define R300_DST_BLEND_SHIFT (24)
+#define R300_RB3D_BLEND_COLOR 0x4E10
+#define R300_RB3D_COLORMASK 0x4E0C
+# define R300_COLORMASK0_B (1<<0)
+# define R300_COLORMASK0_G (1<<1)
+# define R300_COLORMASK0_R (1<<2)
+# define R300_COLORMASK0_A (1<<3)
+
+/* gap */
+
+#define R300_RB3D_COLOROFFSET0 0x4E28
+# define R300_COLOROFFSET_MASK 0xFFFFFFF0 /* GUESS */
+#define R300_RB3D_COLOROFFSET1 0x4E2C /* GUESS */
+#define R300_RB3D_COLOROFFSET2 0x4E30 /* GUESS */
+#define R300_RB3D_COLOROFFSET3 0x4E34 /* GUESS */
+
+/* gap */
+
+/* Bit 16: Larger tiles
+ * Bit 17: 4x2 tiles
+ * Bit 18: Extremely weird tile like, but some pixels duplicated?
+ */
+#define R300_RB3D_COLORPITCH0 0x4E38
+# define R300_COLORPITCH_MASK 0x00001FF8 /* GUESS */
+# define R300_COLOR_TILE_ENABLE (1 << 16) /* GUESS */
+# define R300_COLOR_MICROTILE_ENABLE (1 << 17) /* GUESS */
+# define R300_COLOR_ENDIAN_NO_SWAP (0 << 18) /* GUESS */
+# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 18) /* GUESS */
+# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 18) /* GUESS */
+# define R300_COLOR_FORMAT_RGB565 (2 << 22)
+# define R300_COLOR_FORMAT_ARGB8888 (3 << 22)
+#define R300_RB3D_COLORPITCH1 0x4E3C /* GUESS */
+#define R300_RB3D_COLORPITCH2 0x4E40 /* GUESS */
+#define R300_RB3D_COLORPITCH3 0x4E44 /* GUESS */
+
+/* gap */
+
+/* Guess by Vladimir.
+ * Set to 0A before 3D operations, set to 02 afterwards.
+ */
+#define R300_RB3D_DSTCACHE_CTLSTAT 0x4E4C
+# define R300_RB3D_DSTCACHE_UNKNOWN_02 0x00000002
+# define R300_RB3D_DSTCACHE_UNKNOWN_0A 0x0000000A
+
+/* gap */
+/* There seems to be no "write only" setting, so use Z-test = ALWAYS
+ * for this.
+ * Bit (1<<8) is the "test" bit. so plain write is 6 - vd
+ */
+#define R300_RB3D_ZSTENCIL_CNTL_0 0x4F00
+# define R300_RB3D_Z_DISABLED_1 0x00000010
+# define R300_RB3D_Z_DISABLED_2 0x00000014
+# define R300_RB3D_Z_TEST 0x00000012
+# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016
+# define R300_RB3D_Z_WRITE_ONLY 0x00000006
+
+# define R300_RB3D_Z_TEST 0x00000012
+# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016
+# define R300_RB3D_Z_WRITE_ONLY 0x00000006
+# define R300_RB3D_STENCIL_ENABLE 0x00000001
+
+#define R300_RB3D_ZSTENCIL_CNTL_1 0x4F04
+ /* functions */
+# define R300_ZS_NEVER 0
+# define R300_ZS_LESS 1
+# define R300_ZS_LEQUAL 2
+# define R300_ZS_EQUAL 3
+# define R300_ZS_GEQUAL 4
+# define R300_ZS_GREATER 5
+# define R300_ZS_NOTEQUAL 6
+# define R300_ZS_ALWAYS 7
+# define R300_ZS_MASK 7
+ /* operations */
+# define R300_ZS_KEEP 0
+# define R300_ZS_ZERO 1
+# define R300_ZS_REPLACE 2
+# define R300_ZS_INCR 3
+# define R300_ZS_DECR 4
+# define R300_ZS_INVERT 5
+# define R300_ZS_INCR_WRAP 6
+# define R300_ZS_DECR_WRAP 7
+ /* front and back refer to operations done for front
+ and back faces, i.e. separate stencil function support */
+# define R300_RB3D_ZS1_DEPTH_FUNC_SHIFT 0
+# define R300_RB3D_ZS1_FRONT_FUNC_SHIFT 3
+# define R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT 6
+# define R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT 9
+# define R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT 12
+# define R300_RB3D_ZS1_BACK_FUNC_SHIFT 15
+# define R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT 18
+# define R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT 21
+# define R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT 24
+
+#define R300_RB3D_ZSTENCIL_CNTL_2 0x4F08
+# define R300_RB3D_ZS2_STENCIL_REF_SHIFT 0
+# define R300_RB3D_ZS2_STENCIL_MASK 0xFF
+# define R300_RB3D_ZS2_STENCIL_MASK_SHIFT 8
+# define R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT 16
+
+/* gap */
+
+#define R300_RB3D_ZSTENCIL_FORMAT 0x4F10
+# define R300_DEPTH_FORMAT_16BIT_INT_Z (0 << 0)
+# define R300_DEPTH_FORMAT_24BIT_INT_Z (2 << 0)
+ /* 16 bit format or some aditional bit ? */
+# define R300_DEPTH_FORMAT_UNK32 (32 << 0)
+
+#define R300_RB3D_EARLY_Z 0x4F14
+# define R300_EARLY_Z_DISABLE (0 << 0)
+# define R300_EARLY_Z_ENABLE (1 << 0)
+
+/* gap */
+
+#define R300_RB3D_ZCACHE_CTLSTAT 0x4F18 /* GUESS */
+# define R300_RB3D_ZCACHE_UNKNOWN_01 0x1
+# define R300_RB3D_ZCACHE_UNKNOWN_03 0x3
+
+/* gap */
+
+#define R300_RB3D_DEPTHOFFSET 0x4F20
+#define R300_RB3D_DEPTHPITCH 0x4F24
+# define R300_DEPTHPITCH_MASK 0x00001FF8 /* GUESS */
+# define R300_DEPTH_TILE_ENABLE (1 << 16) /* GUESS */
+# define R300_DEPTH_MICROTILE_ENABLE (1 << 17) /* GUESS */
+# define R300_DEPTH_ENDIAN_NO_SWAP (0 << 18) /* GUESS */
+# define R300_DEPTH_ENDIAN_WORD_SWAP (1 << 18) /* GUESS */
+# define R300_DEPTH_ENDIAN_DWORD_SWAP (2 << 18) /* GUESS */
+
+/* BEGIN: Vertex program instruction set */
+
+/* Every instruction is four dwords long:
+ * DWORD 0: output and opcode
+ * DWORD 1: first argument
+ * DWORD 2: second argument
+ * DWORD 3: third argument
+ *
+ * Notes:
+ * - ABS r, a is implemented as MAX r, a, -a
+ * - MOV is implemented as ADD to zero
+ * - XPD is implemented as MUL + MAD
+ * - FLR is implemented as FRC + ADD
+ * - apparently, fglrx tries to schedule instructions so that there is at
+ * least one instruction between the write to a temporary and the first
+ * read from said temporary; however, violations of this scheduling are
+ * allowed
+ * - register indices seem to be unrelated with OpenGL aliasing to
+ * conventional state
+ * - only one attribute and one parameter can be loaded at a time; however,
+ * the same attribute/parameter can be used for more than one argument
+ * - the second software argument for POW is the third hardware argument
+ * (no idea why)
+ * - MAD with only temporaries as input seems to use VPI_OUT_SELECT_MAD_2
+ *
+ * There is some magic surrounding LIT:
+ * The single argument is replicated across all three inputs, but swizzled:
+ * First argument: xyzy
+ * Second argument: xyzx
+ * Third argument: xyzw
+ * Whenever the result is used later in the fragment program, fglrx forces
+ * x and w to be 1.0 in the input selection; I don't know whether this is
+ * strictly necessary
+ */
+#define R300_VPI_OUT_OP_DOT (1 << 0)
+#define R300_VPI_OUT_OP_MUL (2 << 0)
+#define R300_VPI_OUT_OP_ADD (3 << 0)
+#define R300_VPI_OUT_OP_MAD (4 << 0)
+#define R300_VPI_OUT_OP_DST (5 << 0)
+#define R300_VPI_OUT_OP_FRC (6 << 0)
+#define R300_VPI_OUT_OP_MAX (7 << 0)
+#define R300_VPI_OUT_OP_MIN (8 << 0)
+#define R300_VPI_OUT_OP_SGE (9 << 0)
+#define R300_VPI_OUT_OP_SLT (10 << 0)
+ /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, vector(scalar, vector) */
+#define R300_VPI_OUT_OP_UNK12 (12 << 0)
+#define R300_VPI_OUT_OP_ARL (13 << 0)
+#define R300_VPI_OUT_OP_EXP (65 << 0)
+#define R300_VPI_OUT_OP_LOG (66 << 0)
+ /* Used in fog computations, scalar(scalar) */
+#define R300_VPI_OUT_OP_UNK67 (67 << 0)
+#define R300_VPI_OUT_OP_LIT (68 << 0)
+#define R300_VPI_OUT_OP_POW (69 << 0)
+#define R300_VPI_OUT_OP_RCP (70 << 0)
+#define R300_VPI_OUT_OP_RSQ (72 << 0)
+ /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, scalar(scalar) */
+#define R300_VPI_OUT_OP_UNK73 (73 << 0)
+#define R300_VPI_OUT_OP_EX2 (75 << 0)
+#define R300_VPI_OUT_OP_LG2 (76 << 0)
+#define R300_VPI_OUT_OP_MAD_2 (128 << 0)
+ /* all temps, vector(scalar, vector, vector) */
+#define R300_VPI_OUT_OP_UNK129 (129 << 0)
+
+#define R300_VPI_OUT_REG_CLASS_TEMPORARY (0 << 8)
+#define R300_VPI_OUT_REG_CLASS_ADDR (1 << 8)
+#define R300_VPI_OUT_REG_CLASS_RESULT (2 << 8)
+#define R300_VPI_OUT_REG_CLASS_MASK (31 << 8)
+
+#define R300_VPI_OUT_REG_INDEX_SHIFT 13
+ /* GUESS based on fglrx native limits */
+#define R300_VPI_OUT_REG_INDEX_MASK (31 << 13)
+
+#define R300_VPI_OUT_WRITE_X (1 << 20)
+#define R300_VPI_OUT_WRITE_Y (1 << 21)
+#define R300_VPI_OUT_WRITE_Z (1 << 22)
+#define R300_VPI_OUT_WRITE_W (1 << 23)
+
+#define R300_VPI_IN_REG_CLASS_TEMPORARY (0 << 0)
+#define R300_VPI_IN_REG_CLASS_ATTRIBUTE (1 << 0)
+#define R300_VPI_IN_REG_CLASS_PARAMETER (2 << 0)
+#define R300_VPI_IN_REG_CLASS_NONE (9 << 0)
+#define R300_VPI_IN_REG_CLASS_MASK (31 << 0)
+
+#define R300_VPI_IN_REG_INDEX_SHIFT 5
+ /* GUESS based on fglrx native limits */
+#define R300_VPI_IN_REG_INDEX_MASK (255 << 5)
+
+/* The R300 can select components from the input register arbitrarily.
+ * Use the following constants, shifted by the component shift you
+ * want to select
+ */
+#define R300_VPI_IN_SELECT_X 0
+#define R300_VPI_IN_SELECT_Y 1
+#define R300_VPI_IN_SELECT_Z 2
+#define R300_VPI_IN_SELECT_W 3
+#define R300_VPI_IN_SELECT_ZERO 4
+#define R300_VPI_IN_SELECT_ONE 5
+#define R300_VPI_IN_SELECT_MASK 7
+
+#define R300_VPI_IN_X_SHIFT 13
+#define R300_VPI_IN_Y_SHIFT 16
+#define R300_VPI_IN_Z_SHIFT 19
+#define R300_VPI_IN_W_SHIFT 22
+
+#define R300_VPI_IN_NEG_X (1 << 25)
+#define R300_VPI_IN_NEG_Y (1 << 26)
+#define R300_VPI_IN_NEG_Z (1 << 27)
+#define R300_VPI_IN_NEG_W (1 << 28)
+/* END: Vertex program instruction set */
+
+/* BEGIN: Packet 3 commands */
+
+/* A primitive emission dword. */
+#define R300_PRIM_TYPE_NONE (0 << 0)
+#define R300_PRIM_TYPE_POINT (1 << 0)
+#define R300_PRIM_TYPE_LINE (2 << 0)
+#define R300_PRIM_TYPE_LINE_STRIP (3 << 0)
+#define R300_PRIM_TYPE_TRI_LIST (4 << 0)
+#define R300_PRIM_TYPE_TRI_FAN (5 << 0)
+#define R300_PRIM_TYPE_TRI_STRIP (6 << 0)
+#define R300_PRIM_TYPE_TRI_TYPE2 (7 << 0)
+#define R300_PRIM_TYPE_RECT_LIST (8 << 0)
+#define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0)
+#define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0)
+ /* GUESS (based on r200) */
+#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0)
+#define R300_PRIM_TYPE_LINE_LOOP (12 << 0)
+#define R300_PRIM_TYPE_QUADS (13 << 0)
+#define R300_PRIM_TYPE_QUAD_STRIP (14 << 0)
+#define R300_PRIM_TYPE_POLYGON (15 << 0)
+#define R300_PRIM_TYPE_MASK 0xF
+#define R300_PRIM_WALK_IND (1 << 4)
+#define R300_PRIM_WALK_LIST (2 << 4)
+#define R300_PRIM_WALK_RING (3 << 4)
+#define R300_PRIM_WALK_MASK (3 << 4)
+ /* GUESS (based on r200) */
+#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6)
+#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6)
+#define R300_PRIM_NUM_VERTICES_SHIFT 16
+#define R300_PRIM_NUM_VERTICES_MASK 0xffff
+
+/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR.
+ * Two parameter dwords:
+ * 0. The first parameter appears to be always 0
+ * 1. The second parameter is a standard primitive emission dword.
+ */
+#define R300_PACKET3_3D_DRAW_VBUF 0x00002800
+
+/* Specify the full set of vertex arrays as (address, stride).
+ * The first parameter is the number of vertex arrays specified.
+ * The rest of the command is a variable length list of blocks, where
+ * each block is three dwords long and specifies two arrays.
+ * The first dword of a block is split into two words, the lower significant
+ * word refers to the first array, the more significant word to the second
+ * array in the block.
+ * The low byte of each word contains the size of an array entry in dwords,
+ * the high byte contains the stride of the array.
+ * The second dword of a block contains the pointer to the first array,
+ * the third dword of a block contains the pointer to the second array.
+ * Note that if the total number of arrays is odd, the third dword of
+ * the last block is omitted.
+ */
+#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00
+
+#define R300_PACKET3_INDX_BUFFER 0x00003300
+# define R300_EB_UNK1_SHIFT 24
+# define R300_EB_UNK1 (0x80<<24)
+# define R300_EB_UNK2 0x0810
+#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600
+
+/* END: Packet 3 commands */
+
+
+/* Color formats for 2d packets
+ */
+#define R300_CP_COLOR_FORMAT_CI8 2
+#define R300_CP_COLOR_FORMAT_ARGB1555 3
+#define R300_CP_COLOR_FORMAT_RGB565 4
+#define R300_CP_COLOR_FORMAT_ARGB8888 6
+#define R300_CP_COLOR_FORMAT_RGB332 7
+#define R300_CP_COLOR_FORMAT_RGB8 9
+#define R300_CP_COLOR_FORMAT_ARGB4444 15
+
+/*
+ * CP type-3 packets
+ */
+#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00
+
+#endif /* _R300_REG_H */
+
+/* *INDENT-ON* */
diff --git a/r300/r300_render.c b/r300/r300_render.c
new file mode 100644
index 0000000..cc13e9a
--- /dev/null
+++ b/r300/r300_render.c
@@ -0,0 +1,536 @@
+/**************************************************************************
+
+Copyright (C) 2004 Nicolai Haehnle.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \brief R300 Render (Vertex Buffer Implementation)
+ *
+ * The immediate implementation has been removed from CVS in favor of the vertex
+ * buffer implementation.
+ *
+ * The render functions are called by the pipeline manager to render a batch of
+ * primitives. They return TRUE to pass on to the next stage (i.e. software
+ * rasterization) or FALSE to indicate that the pipeline has finished after
+ * rendering something.
+ *
+ * When falling back to software TCL still attempt to use hardware
+ * rasterization.
+ *
+ * I am not sure that the cache related registers are setup correctly, but
+ * obviously this does work... Further investigation is needed.
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "glheader.h"
+#include "state.h"
+#include "imports.h"
+#include "enums.h"
+#include "macros.h"
+#include "context.h"
+#include "dd.h"
+#include "simple_list.h"
+#include "api_arrayelt.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_vp_build.h"
+#include "radeon_reg.h"
+#include "radeon_macros.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "r300_context.h"
+#include "r300_ioctl.h"
+#include "r300_state.h"
+#include "r300_reg.h"
+#include "r300_tex.h"
+#include "r300_emit.h"
+extern int future_hw_tcl_on;
+
+/**
+ * \brief Convert a OpenGL primitive type into a R300 primitive type.
+ */
+static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim)
+{
+ switch (prim & PRIM_MODE_MASK) {
+ case GL_POINTS:
+ return R300_VAP_VF_CNTL__PRIM_POINTS;
+ break;
+ case GL_LINES:
+ return R300_VAP_VF_CNTL__PRIM_LINES;
+ break;
+ case GL_LINE_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
+ break;
+ case GL_LINE_LOOP:
+ return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
+ break;
+ case GL_TRIANGLES:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
+ break;
+ case GL_TRIANGLE_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
+ break;
+ case GL_TRIANGLE_FAN:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
+ break;
+ case GL_QUADS:
+ return R300_VAP_VF_CNTL__PRIM_QUADS;
+ break;
+ case GL_QUAD_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
+ break;
+ case GL_POLYGON:
+ return R300_VAP_VF_CNTL__PRIM_POLYGON;
+ break;
+ default:
+ assert(0);
+ return -1;
+ break;
+ }
+}
+
+static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
+{
+ int verts_off = 0;
+
+ switch (prim & PRIM_MODE_MASK) {
+ case GL_POINTS:
+ verts_off = 0;
+ break;
+ case GL_LINES:
+ verts_off = num_verts % 2;
+ break;
+ case GL_LINE_STRIP:
+ if (num_verts < 2)
+ verts_off = num_verts;
+ break;
+ case GL_LINE_LOOP:
+ if (num_verts < 2)
+ verts_off = num_verts;
+ break;
+ case GL_TRIANGLES:
+ verts_off = num_verts % 3;
+ break;
+ case GL_TRIANGLE_STRIP:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ case GL_TRIANGLE_FAN:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ case GL_QUADS:
+ verts_off = num_verts % 4;
+ break;
+ case GL_QUAD_STRIP:
+ if (num_verts < 4)
+ verts_off = num_verts;
+ else
+ verts_off = num_verts % 2;
+ break;
+ case GL_POLYGON:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ default:
+ assert(0);
+ return -1;
+ break;
+ }
+
+ return num_verts - verts_off;
+}
+
+static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts,
+ int elt_size)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct r300_dma_region *rvb = &rmesa->state.elt_dma;
+ void *out;
+
+ assert(elt_size == 2 || elt_size == 4);
+
+ if (r300IsGartMemory(rmesa, elts, n_elts * elt_size)) {
+ rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
+ rvb->start = ((char *)elts) - rvb->address;
+ rvb->aos_offset =
+ rmesa->radeon.radeonScreen->gart_texture_offset +
+ rvb->start;
+ return;
+ } else if (r300IsGartMemory(rmesa, elts, 1)) {
+ WARN_ONCE("Pointer not within GART memory!\n");
+ _mesa_exit(-1);
+ }
+
+ r300AllocDmaRegion(rmesa, rvb, n_elts * elt_size, elt_size);
+ rvb->aos_offset = GET_START(rvb);
+
+ out = rvb->address + rvb->start;
+ memcpy(out, elts, n_elts * elt_size);
+}
+
+static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
+ int vertex_count, int type, int elt_size)
+{
+ int cmd_reserved = 0;
+ int cmd_written = 0;
+ drm_radeon_cmd_header_t *cmd = NULL;
+ unsigned long t_addr;
+ unsigned long magic_1, magic_2;
+
+ assert(elt_size == 2 || elt_size == 4);
+
+ if (addr & (elt_size - 1)) {
+ WARN_ONCE("Badly aligned buffer\n");
+ return;
+ }
+
+ magic_1 = (addr % 32) / 4;
+ t_addr = addr & ~0x1d;
+ magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1;
+
+ start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
+ if (elt_size == 4) {
+ e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+ (vertex_count << 16) | type |
+ R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+ } else {
+ e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+ (vertex_count << 16) | type);
+ }
+
+ start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
+#ifdef OPTIMIZE_ELTS
+ if (elt_size == 4) {
+ e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
+ e32(addr);
+ } else {
+ e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2);
+ e32(t_addr);
+ }
+#else
+ e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
+ e32(addr);
+#endif
+
+ if (elt_size == 4) {
+ e32(vertex_count);
+ } else {
+#ifdef OPTIMIZE_ELTS
+ e32(magic_2);
+#else
+ e32((vertex_count + 1) / 2);
+#endif
+ }
+}
+
+static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
+{
+ int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+ int i;
+ int cmd_reserved = 0;
+ int cmd_written = 0;
+ drm_radeon_cmd_header_t *cmd = NULL;
+
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
+ offset);
+
+ start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+ e32(nr);
+ for (i = 0; i + 1 < nr; i += 2) {
+ e32((rmesa->state.aos[i].aos_size << 0)
+ | (rmesa->state.aos[i].aos_stride << 8)
+ | (rmesa->state.aos[i + 1].aos_size << 16)
+ | (rmesa->state.aos[i + 1].aos_stride << 24)
+ );
+ e32(rmesa->state.aos[i].aos_offset +
+ offset * 4 * rmesa->state.aos[i].aos_stride);
+ e32(rmesa->state.aos[i + 1].aos_offset +
+ offset * 4 * rmesa->state.aos[i + 1].aos_stride);
+ }
+
+ if (nr & 1) {
+ e32((rmesa->state.aos[nr - 1].aos_size << 0)
+ | (rmesa->state.aos[nr - 1].aos_stride << 8)
+ );
+ e32(rmesa->state.aos[nr - 1].aos_offset +
+ offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
+ }
+}
+
+static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
+{
+ int cmd_reserved = 0;
+ int cmd_written = 0;
+ drm_radeon_cmd_header_t *cmd = NULL;
+
+ start_packet3(RADEON_CP_PACKET3_3D_DRAW_VBUF_2, 0);
+ e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16)
+ | type);
+}
+
+static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
+ int start, int end, int prim)
+{
+ int type, num_verts;
+
+ type = r300PrimitiveType(rmesa, ctx, prim);
+ num_verts = r300NumVerts(rmesa, end - start, prim);
+
+ if (type < 0 || num_verts <= 0)
+ return;
+
+ if (rmesa->state.VB.Elts) {
+ r300EmitAOS(rmesa, rmesa->state.aos_count, start);
+ if (num_verts > 65535) {
+ /* not implemented yet */
+ WARN_ONCE("Too many elts\n");
+ return;
+ }
+ r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts,
+ rmesa->state.VB.elt_size);
+ r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset,
+ num_verts, type, rmesa->state.VB.elt_size);
+ } else {
+ r300EmitAOS(rmesa, rmesa->state.aos_count, start);
+ r300FireAOS(rmesa, num_verts, type);
+ }
+}
+
+#define CONV_VB(a, b) rvb->AttribPtr[(a)].size = vb->b->size, \
+ rvb->AttribPtr[(a)].type = GL_FLOAT, \
+ rvb->AttribPtr[(a)].stride = vb->b->stride, \
+ rvb->AttribPtr[(a)].data = vb->b->data
+
+static void radeon_vb_to_rvb(r300ContextPtr rmesa,
+ struct radeon_vertex_buffer *rvb,
+ struct vertex_buffer *vb)
+{
+ int i;
+ GLcontext *ctx;
+ ctx = rmesa->radeon.glCtx;
+
+ memset(rvb, 0, sizeof(*rvb));
+
+ rvb->Elts = vb->Elts;
+ rvb->elt_size = 4;
+ rvb->elt_min = 0;
+ rvb->elt_max = vb->Count;
+
+ rvb->Count = vb->Count;
+
+ if (hw_tcl_on) {
+ CONV_VB(VERT_ATTRIB_POS, ObjPtr);
+ } else {
+ assert(vb->ClipPtr);
+ CONV_VB(VERT_ATTRIB_POS, ClipPtr);
+ }
+
+ CONV_VB(VERT_ATTRIB_NORMAL, NormalPtr);
+ CONV_VB(VERT_ATTRIB_COLOR0, ColorPtr[0]);
+ CONV_VB(VERT_ATTRIB_COLOR1, SecondaryColorPtr[0]);
+ CONV_VB(VERT_ATTRIB_FOG, FogCoordPtr);
+
+ for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++)
+ CONV_VB(VERT_ATTRIB_TEX0 + i, TexCoordPtr[i]);
+
+ for (i = 0; i < MAX_VERTEX_PROGRAM_ATTRIBS; i++)
+ CONV_VB(VERT_ATTRIB_GENERIC0 + i,
+ AttribPtr[VERT_ATTRIB_GENERIC0 + i]);
+
+ rvb->Primitive = vb->Primitive;
+ rvb->PrimitiveCount = vb->PrimitiveCount;
+ rvb->LockFirst = rvb->LockCount = 0;
+ rvb->lock_uptodate = GL_FALSE;
+}
+
+static GLboolean r300RunRender(GLcontext * ctx,
+ struct tnl_pipeline_stage *stage)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct radeon_vertex_buffer *VB = &rmesa->state.VB;
+ int i;
+ int cmd_reserved = 0;
+ int cmd_written = 0;
+ drm_radeon_cmd_header_t *cmd = NULL;
+
+ if (RADEON_DEBUG & DEBUG_PRIMS)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (stage) {
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ radeon_vb_to_rvb(rmesa, VB, &tnl->vb);
+ }
+
+ r300UpdateShaders(rmesa);
+ if (r300EmitArrays(ctx))
+ return GL_TRUE;
+
+ r300UpdateShaderStates(rmesa);
+
+ reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
+ e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
+
+ reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
+ e32(R300_RB3D_ZCACHE_UNKNOWN_03);
+
+ r300EmitState(rmesa);
+
+ for (i = 0; i < VB->PrimitiveCount; i++) {
+ GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
+ GLuint start = VB->Primitive[i].start;
+ GLuint end = VB->Primitive[i].start + VB->Primitive[i].count;
+ r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
+ }
+
+ reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
+ e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
+
+ reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
+ e32(R300_RB3D_ZCACHE_UNKNOWN_03);
+
+#ifdef USER_BUFFERS
+ r300UseArrays(ctx);
+#endif
+
+ r300ReleaseArrays(ctx);
+
+ return GL_FALSE;
+}
+
+#define FALLBACK_IF(expr) \
+ do { \
+ if (expr) { \
+ if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
+ WARN_ONCE("Software fallback:%s\n", \
+ #expr); \
+ return R300_FALLBACK_RAST; \
+ } \
+ } while(0)
+
+static int r300Fallback(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_fragment_program *fp = (struct r300_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+
+ if (fp) {
+ if (!fp->translated)
+ r300TranslateFragmentShader(r300, fp);
+ FALLBACK_IF(!fp->translated);
+ }
+
+ FALLBACK_IF(ctx->RenderMode != GL_RENDER);
+
+ FALLBACK_IF(ctx->Stencil._TestTwoSide
+ && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
+ || ctx->Stencil.ValueMask[0] !=
+ ctx->Stencil.ValueMask[1]
+ || ctx->Stencil.WriteMask[0] !=
+ ctx->Stencil.WriteMask[1]));
+
+ FALLBACK_IF(ctx->Color.ColorLogicOpEnabled);
+
+ if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
+ FALLBACK_IF(ctx->Point.PointSprite);
+
+ if (!r300->disable_lowimpact_fallback) {
+ FALLBACK_IF(ctx->Polygon.OffsetPoint);
+ FALLBACK_IF(ctx->Polygon.OffsetLine);
+ FALLBACK_IF(ctx->Polygon.StippleFlag);
+ FALLBACK_IF(ctx->Multisample.Enabled);
+ FALLBACK_IF(ctx->Line.StippleFlag);
+ FALLBACK_IF(ctx->Line.SmoothFlag);
+ FALLBACK_IF(ctx->Point.SmoothFlag);
+ }
+
+ return R300_FALLBACK_NONE;
+}
+
+static GLboolean r300RunNonTCLRender(GLcontext * ctx,
+ struct tnl_pipeline_stage *stage)
+{
+ if (RADEON_DEBUG & DEBUG_PRIMS)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
+ return GL_TRUE;
+
+ return r300RunRender(ctx, stage);
+}
+
+static GLboolean r300RunTCLRender(GLcontext * ctx,
+ struct tnl_pipeline_stage *stage)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct r300_vertex_program *vp;
+
+ hw_tcl_on = future_hw_tcl_on;
+
+ if (RADEON_DEBUG & DEBUG_PRIMS)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (hw_tcl_on == GL_FALSE)
+ return GL_TRUE;
+
+ if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
+ hw_tcl_on = GL_FALSE;
+ return GL_TRUE;
+ }
+
+ r300UpdateShaders(rmesa);
+
+ vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
+ if (vp->native == GL_FALSE) {
+ hw_tcl_on = GL_FALSE;
+ return GL_TRUE;
+ }
+
+ return r300RunRender(ctx, stage);
+}
+
+const struct tnl_pipeline_stage _r300_render_stage = {
+ "r300 Hardware Rasterization",
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ r300RunNonTCLRender
+};
+
+const struct tnl_pipeline_stage _r300_tcl_stage = {
+ "r300 Hardware Transform, Clipping and Lighting",
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ r300RunTCLRender
+};
diff --git a/r300/r300_shader.c b/r300/r300_shader.c
new file mode 100644
index 0000000..59fe17b
--- /dev/null
+++ b/r300/r300_shader.c
@@ -0,0 +1,73 @@
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "program.h"
+#include "tnl/tnl.h"
+#include "r300_context.h"
+#include "r300_fragprog.h"
+
+static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
+ GLuint id)
+{
+ struct r300_vertex_program_cont *vp;
+ struct r300_fragment_program *fp;
+
+ switch (target) {
+ case GL_VERTEX_STATE_PROGRAM_NV:
+ case GL_VERTEX_PROGRAM_ARB:
+ vp = CALLOC_STRUCT(r300_vertex_program_cont);
+ return _mesa_init_vertex_program(ctx, &vp->mesa_program,
+ target, id);
+ case GL_FRAGMENT_PROGRAM_ARB:
+ fp = CALLOC_STRUCT(r300_fragment_program);
+ fp->ctx = ctx;
+ return _mesa_init_fragment_program(ctx, &fp->mesa_program,
+ target, id);
+ case GL_FRAGMENT_PROGRAM_NV:
+ fp = CALLOC_STRUCT(r300_fragment_program);
+ return _mesa_init_fragment_program(ctx, &fp->mesa_program,
+ target, id);
+ default:
+ _mesa_problem(ctx, "Bad target in r300NewProgram");
+ }
+
+ return NULL;
+}
+
+static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog)
+{
+ _mesa_delete_program(ctx, prog);
+}
+
+static void
+r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+ struct r300_vertex_program_cont *vp = (void *)prog;
+ struct r300_fragment_program *fp = (struct r300_fragment_program *)prog;
+
+ switch (target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ vp->progs = NULL;
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ fp->translated = GL_FALSE;
+ break;
+ }
+ /* need this for tcl fallbacks */
+ _tnl_program_string(ctx, target, prog);
+}
+
+static GLboolean
+r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+ return 1;
+}
+
+void r300InitShaderFuncs(struct dd_function_table *functions)
+{
+ functions->NewProgram = r300NewProgram;
+ functions->DeleteProgram = r300DeleteProgram;
+ functions->ProgramStringNotify = r300ProgramStringNotify;
+ functions->IsProgramNative = r300IsProgramNative;
+}
diff --git a/r300/r300_state.c b/r300/r300_state.c
new file mode 100644
index 0000000..2aaf041
--- /dev/null
+++ b/r300/r300_state.c
@@ -0,0 +1,2375 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002.
+Copyright (C) 2004 Nicolai Haehnle.
+All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "glheader.h"
+#include "state.h"
+#include "imports.h"
+#include "enums.h"
+#include "macros.h"
+#include "context.h"
+#include "dd.h"
+#include "simple_list.h"
+
+#include "api_arrayelt.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "texformat.h"
+
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "r300_context.h"
+#include "r300_ioctl.h"
+#include "r300_state.h"
+#include "r300_reg.h"
+#include "r300_emit.h"
+#include "r300_fragprog.h"
+#include "r300_tex.h"
+
+#include "drirenderbuffer.h"
+
+static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4])
+{
+ GLubyte color[4];
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(rmesa, blend_color);
+
+ CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
+ CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
+ CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
+ CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
+
+ rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0],
+ color[1], color[2]);
+}
+
+/**
+ * Calculate the hardware blend factor setting. This same function is used
+ * for source and destination of both alpha and RGB.
+ *
+ * \returns
+ * The hardware register value for the specified blend factor. This value
+ * will need to be shifted into the correct position for either source or
+ * destination factor.
+ *
+ * \todo
+ * Since the two cases where source and destination are handled differently
+ * are essentially error cases, they should never happen. Determine if these
+ * cases can be removed.
+ */
+static int blend_factor(GLenum factor, GLboolean is_src)
+{
+ switch (factor) {
+ case GL_ZERO:
+ return R300_BLEND_GL_ZERO;
+ break;
+ case GL_ONE:
+ return R300_BLEND_GL_ONE;
+ break;
+ case GL_DST_COLOR:
+ return R300_BLEND_GL_DST_COLOR;
+ break;
+ case GL_ONE_MINUS_DST_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_DST_COLOR;
+ break;
+ case GL_SRC_COLOR:
+ return R300_BLEND_GL_SRC_COLOR;
+ break;
+ case GL_ONE_MINUS_SRC_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_SRC_COLOR;
+ break;
+ case GL_SRC_ALPHA:
+ return R300_BLEND_GL_SRC_ALPHA;
+ break;
+ case GL_ONE_MINUS_SRC_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+ break;
+ case GL_DST_ALPHA:
+ return R300_BLEND_GL_DST_ALPHA;
+ break;
+ case GL_ONE_MINUS_DST_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_DST_ALPHA;
+ break;
+ case GL_SRC_ALPHA_SATURATE:
+ return (is_src) ? R300_BLEND_GL_SRC_ALPHA_SATURATE :
+ R300_BLEND_GL_ZERO;
+ break;
+ case GL_CONSTANT_COLOR:
+ return R300_BLEND_GL_CONST_COLOR;
+ break;
+ case GL_ONE_MINUS_CONSTANT_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_CONST_COLOR;
+ break;
+ case GL_CONSTANT_ALPHA:
+ return R300_BLEND_GL_CONST_ALPHA;
+ break;
+ case GL_ONE_MINUS_CONSTANT_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+ break;
+ default:
+ fprintf(stderr, "unknown blend factor %x\n", factor);
+ return (is_src) ? R300_BLEND_GL_ONE : R300_BLEND_GL_ZERO;
+ break;
+ }
+}
+
+/**
+ * Sets both the blend equation and the blend function.
+ * This is done in a single
+ * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
+ * change the interpretation of the blend function.
+ * Also, make sure that blend function and blend equation are set to their
+ * default value if color blending is not enabled, since at least blend
+ * equations GL_MIN and GL_FUNC_REVERSE_SUBTRACT will cause wrong results
+ * otherwise for unknown reasons.
+ */
+
+/* helper function */
+static void r300SetBlendCntl(r300ContextPtr r300, int func, int eqn,
+ int cbits, int funcA, int eqnA)
+{
+ GLuint new_ablend, new_cblend;
+
+#if 0
+ fprintf(stderr,
+ "eqnA=%08x funcA=%08x eqn=%08x func=%08x cbits=%08x\n",
+ eqnA, funcA, eqn, func, cbits);
+#endif
+ new_ablend = eqnA | funcA;
+ new_cblend = eqn | func;
+
+ /* Some blend factor combinations don't seem to work when the
+ * BLEND_NO_SEPARATE bit is set.
+ *
+ * Especially problematic candidates are the ONE_MINUS_* flags,
+ * but I can't see a real pattern.
+ */
+#if 0
+ if (new_ablend == new_cblend) {
+ new_cblend |= R300_BLEND_NO_SEPARATE;
+ }
+#endif
+ new_cblend |= cbits;
+
+ if ((new_ablend != r300->hw.bld.cmd[R300_BLD_ABLEND]) ||
+ (new_cblend != r300->hw.bld.cmd[R300_BLD_CBLEND])) {
+ R300_STATECHANGE(r300, bld);
+ r300->hw.bld.cmd[R300_BLD_ABLEND] = new_ablend;
+ r300->hw.bld.cmd[R300_BLD_CBLEND] = new_cblend;
+ }
+}
+
+static void r300SetBlendState(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
+ int eqn = R300_COMB_FCN_ADD_CLAMP;
+ int funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
+ int eqnA = R300_COMB_FCN_ADD_CLAMP;
+
+ if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) {
+ r300SetBlendCntl(r300, func, eqn, 0, func, eqn);
+ return;
+ }
+
+ func =
+ (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) <<
+ R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstRGB,
+ GL_FALSE) <<
+ R300_DST_BLEND_SHIFT);
+
+ switch (ctx->Color.BlendEquationRGB) {
+ case GL_FUNC_ADD:
+ eqn = R300_COMB_FCN_ADD_CLAMP;
+ break;
+
+ case GL_FUNC_SUBTRACT:
+ eqn = R300_COMB_FCN_SUB_CLAMP;
+ break;
+
+ case GL_FUNC_REVERSE_SUBTRACT:
+ eqn = R300_COMB_FCN_RSUB_CLAMP;
+ break;
+
+ case GL_MIN:
+ eqn = R300_COMB_FCN_MIN;
+ func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+ break;
+
+ case GL_MAX:
+ eqn = R300_COMB_FCN_MAX;
+ func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+ break;
+
+ default:
+ fprintf(stderr,
+ "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+ __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB);
+ return;
+ }
+
+ funcA =
+ (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) <<
+ R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstA,
+ GL_FALSE) <<
+ R300_DST_BLEND_SHIFT);
+
+ switch (ctx->Color.BlendEquationA) {
+ case GL_FUNC_ADD:
+ eqnA = R300_COMB_FCN_ADD_CLAMP;
+ break;
+
+ case GL_FUNC_SUBTRACT:
+ eqnA = R300_COMB_FCN_SUB_CLAMP;
+ break;
+
+ case GL_FUNC_REVERSE_SUBTRACT:
+ eqnA = R300_COMB_FCN_RSUB_CLAMP;
+ break;
+
+ case GL_MIN:
+ eqnA = R300_COMB_FCN_MIN;
+ funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+ break;
+
+ case GL_MAX:
+ eqnA = R300_COMB_FCN_MAX;
+ funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+ break;
+
+ default:
+ fprintf(stderr,
+ "[%s:%u] Invalid A blend equation (0x%04x).\n",
+ __FUNCTION__, __LINE__, ctx->Color.BlendEquationA);
+ return;
+ }
+
+ r300SetBlendCntl(r300,
+ func, eqn,
+ R300_BLEND_UNKNOWN | R300_BLEND_ENABLE, funcA, eqnA);
+}
+
+static void r300BlendEquationSeparate(GLcontext * ctx,
+ GLenum modeRGB, GLenum modeA)
+{
+ r300SetBlendState(ctx);
+}
+
+static void r300BlendFuncSeparate(GLcontext * ctx,
+ GLenum sfactorRGB, GLenum dfactorRGB,
+ GLenum sfactorA, GLenum dfactorA)
+{
+ r300SetBlendState(ctx);
+}
+
+/**
+ * Update our tracked culling state based on Mesa's state.
+ */
+static void r300UpdateCulling(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ uint32_t val = 0;
+
+ R300_STATECHANGE(r300, cul);
+ if (ctx->Polygon.CullFlag) {
+ if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+ val = R300_CULL_FRONT | R300_CULL_BACK;
+ else if (ctx->Polygon.CullFaceMode == GL_FRONT)
+ val = R300_CULL_FRONT;
+ else
+ val = R300_CULL_BACK;
+
+ if (ctx->Polygon.FrontFace == GL_CW)
+ val |= R300_FRONT_FACE_CW;
+ else
+ val |= R300_FRONT_FACE_CCW;
+ }
+ r300->hw.cul.cmd[R300_CUL_CULL] = val;
+}
+
+static void r300SetEarlyZState(GLcontext * ctx)
+{
+ /* updates register R300_RB3D_EARLY_Z (0x4F14)
+ if depth test is not enabled it should be R300_EARLY_Z_DISABLE
+ if depth is enabled and alpha not it should be R300_EARLY_Z_ENABLE
+ if depth and alpha is enabled it should be R300_EARLY_Z_DISABLE
+ */
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(r300, zstencil_format);
+ if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS)
+ /* disable early Z */
+ r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE;
+ else {
+ if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER)
+ /* enable early Z */
+ r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_ENABLE;
+ else
+ /* disable early Z */
+ r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE;
+ }
+}
+
+static void r300SetAlphaState(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLubyte refByte;
+ uint32_t pp_misc = 0x0;
+ GLboolean really_enabled = ctx->Color.AlphaEnabled;
+
+ CLAMPED_FLOAT_TO_UBYTE(refByte, ctx->Color.AlphaRef);
+
+ switch (ctx->Color.AlphaFunc) {
+ case GL_NEVER:
+ pp_misc |= R300_ALPHA_TEST_FAIL;
+ break;
+ case GL_LESS:
+ pp_misc |= R300_ALPHA_TEST_LESS;
+ break;
+ case GL_EQUAL:
+ pp_misc |= R300_ALPHA_TEST_EQUAL;
+ break;
+ case GL_LEQUAL:
+ pp_misc |= R300_ALPHA_TEST_LEQUAL;
+ break;
+ case GL_GREATER:
+ pp_misc |= R300_ALPHA_TEST_GREATER;
+ break;
+ case GL_NOTEQUAL:
+ pp_misc |= R300_ALPHA_TEST_NEQUAL;
+ break;
+ case GL_GEQUAL:
+ pp_misc |= R300_ALPHA_TEST_GEQUAL;
+ break;
+ case GL_ALWAYS:
+ /*pp_misc |= R300_ALPHA_TEST_PASS; */
+ really_enabled = GL_FALSE;
+ break;
+ }
+
+ if (really_enabled) {
+ pp_misc |= R300_ALPHA_TEST_ENABLE;
+ pp_misc |= (refByte & R300_REF_ALPHA_MASK);
+ } else {
+ pp_misc = 0x0;
+ }
+
+ R300_STATECHANGE(r300, at);
+ r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc;
+
+ r300SetEarlyZState(ctx);
+}
+
+static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
+{
+ (void)func;
+ (void)ref;
+ r300SetAlphaState(ctx);
+}
+
+static int translate_func(int func)
+{
+ switch (func) {
+ case GL_NEVER:
+ return R300_ZS_NEVER;
+ case GL_LESS:
+ return R300_ZS_LESS;
+ case GL_EQUAL:
+ return R300_ZS_EQUAL;
+ case GL_LEQUAL:
+ return R300_ZS_LEQUAL;
+ case GL_GREATER:
+ return R300_ZS_GREATER;
+ case GL_NOTEQUAL:
+ return R300_ZS_NOTEQUAL;
+ case GL_GEQUAL:
+ return R300_ZS_GEQUAL;
+ case GL_ALWAYS:
+ return R300_ZS_ALWAYS;
+ }
+ return 0;
+}
+
+static void r300SetDepthState(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(r300, zs);
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE;
+ r300->hw.zs.cmd[R300_ZS_CNTL_1] &=
+ ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT);
+
+ if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) {
+ if (ctx->Depth.Mask)
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
+ R300_RB3D_Z_TEST_AND_WRITE;
+ else
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST;
+
+ r300->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ translate_func(ctx->Depth.
+ Func) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
+ } else {
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_DISABLED_1;
+ r300->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ translate_func(GL_NEVER) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
+ }
+
+ r300SetEarlyZState(ctx);
+}
+
+static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq );
+
+/**
+ * Handle glEnable()/glDisable().
+ *
+ * \note Mesa already filters redundant calls to glEnable/glDisable.
+ */
+static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLuint p;
+ if (RADEON_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__,
+ _mesa_lookup_enum_by_nr(cap),
+ state ? "GL_TRUE" : "GL_FALSE");
+
+ switch (cap) {
+ /* Fast track this one...
+ */
+ case GL_TEXTURE_1D:
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_3D:
+ break;
+
+ case GL_FOG:
+ R300_STATECHANGE(r300, fogs);
+ if (state) {
+ r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FOG_ENABLE;
+
+ ctx->Driver.Fogfv(ctx, GL_FOG_MODE, NULL);
+ ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY,
+ &ctx->Fog.Density);
+ ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
+ ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
+ ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
+ } else {
+ r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FOG_ENABLE;
+ }
+
+ break;
+
+ case GL_ALPHA_TEST:
+ r300SetAlphaState(ctx);
+ break;
+
+ case GL_BLEND:
+ case GL_COLOR_LOGIC_OP:
+ r300SetBlendState(ctx);
+ break;
+
+
+ case GL_CLIP_PLANE0:
+ case GL_CLIP_PLANE1:
+ case GL_CLIP_PLANE2:
+ case GL_CLIP_PLANE3:
+ case GL_CLIP_PLANE4:
+ case GL_CLIP_PLANE5:
+ /* no VAP UCP on non-TCL chipsets */
+ if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ return;
+
+ p = cap-GL_CLIP_PLANE0;
+ R300_STATECHANGE( r300, vap_clip_cntl );
+ if (state) {
+ r300->hw.vap_clip_cntl.cmd[1] |= (R300_VAP_UCP_ENABLE_0<<p);
+ r300ClipPlane( ctx, cap, NULL );
+ }
+ else {
+ r300->hw.vap_clip_cntl.cmd[1] &= ~(R300_VAP_UCP_ENABLE_0<<p);
+ }
+ break;
+ case GL_DEPTH_TEST:
+ r300SetDepthState(ctx);
+ break;
+
+ case GL_STENCIL_TEST:
+ if (r300->state.stencil.hw_stencil) {
+ R300_STATECHANGE(r300, zs);
+ if (state) {
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
+ R300_RB3D_STENCIL_ENABLE;
+ } else {
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] &=
+ ~R300_RB3D_STENCIL_ENABLE;
+ }
+ } else {
+#if R200_MERGED
+ FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state);
+#endif
+ }
+ break;
+
+ case GL_CULL_FACE:
+ r300UpdateCulling(ctx);
+ break;
+
+ case GL_POLYGON_OFFSET_POINT:
+ case GL_POLYGON_OFFSET_LINE:
+ break;
+
+ case GL_POLYGON_OFFSET_FILL:
+ R300_STATECHANGE(r300, occlusion_cntl);
+ if (state) {
+ r300->hw.occlusion_cntl.cmd[1] |= (3 << 0);
+ } else {
+ r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0);
+ }
+ break;
+ default:
+ radeonEnable(ctx, cap, state);
+ return;
+ }
+}
+
+static void r300UpdatePolygonMode(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ uint32_t hw_mode = 0;
+
+ if (ctx->Polygon.FrontMode != GL_FILL ||
+ ctx->Polygon.BackMode != GL_FILL) {
+ GLenum f, b;
+
+ if (ctx->Polygon.FrontFace == GL_CCW) {
+ f = ctx->Polygon.FrontMode;
+ b = ctx->Polygon.BackMode;
+ } else {
+ f = ctx->Polygon.BackMode;
+ b = ctx->Polygon.FrontMode;
+ }
+
+ hw_mode |= R300_PM_ENABLED;
+
+ switch (f) {
+ case GL_LINE:
+ hw_mode |= R300_PM_FRONT_LINE;
+ break;
+ case GL_POINT: /* noop */
+ hw_mode |= R300_PM_FRONT_POINT;
+ break;
+ case GL_FILL:
+ hw_mode |= R300_PM_FRONT_FILL;
+ break;
+ }
+
+ switch (b) {
+ case GL_LINE:
+ hw_mode |= R300_PM_BACK_LINE;
+ break;
+ case GL_POINT: /* noop */
+ hw_mode |= R300_PM_BACK_POINT;
+ break;
+ case GL_FILL:
+ hw_mode |= R300_PM_BACK_FILL;
+ break;
+ }
+ }
+
+ if (r300->hw.polygon_mode.cmd[1] != hw_mode) {
+ R300_STATECHANGE(r300, polygon_mode);
+ r300->hw.polygon_mode.cmd[1] = hw_mode;
+ }
+}
+
+/**
+ * Change the culling mode.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300CullFace(GLcontext * ctx, GLenum mode)
+{
+ (void)mode;
+
+ r300UpdateCulling(ctx);
+}
+
+/**
+ * Change the polygon orientation.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300FrontFace(GLcontext * ctx, GLenum mode)
+{
+ (void)mode;
+
+ r300UpdateCulling(ctx);
+ r300UpdatePolygonMode(ctx);
+}
+
+/**
+ * Change the depth testing function.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300DepthFunc(GLcontext * ctx, GLenum func)
+{
+ (void)func;
+ r300SetDepthState(ctx);
+}
+
+/**
+ * Enable/Disable depth writing.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300DepthMask(GLcontext * ctx, GLboolean mask)
+{
+ (void)mask;
+ r300SetDepthState(ctx);
+}
+
+/**
+ * Handle glColorMask()
+ */
+static void r300ColorMask(GLcontext * ctx,
+ GLboolean r, GLboolean g, GLboolean b, GLboolean a)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int mask = (r ? R300_COLORMASK0_R : 0) |
+ (g ? R300_COLORMASK0_G : 0) |
+ (b ? R300_COLORMASK0_B : 0) | (a ? R300_COLORMASK0_A : 0);
+
+ if (mask != r300->hw.cmk.cmd[R300_CMK_COLORMASK]) {
+ R300_STATECHANGE(r300, cmk);
+ r300->hw.cmk.cmd[R300_CMK_COLORMASK] = mask;
+ }
+}
+
+/* =============================================================
+ * Fog
+ */
+static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ union {
+ int i;
+ float f;
+ } fogScale, fogStart;
+
+ (void)param;
+
+ fogScale.i = r300->hw.fogp.cmd[R300_FOGP_SCALE];
+ fogStart.i = r300->hw.fogp.cmd[R300_FOGP_START];
+
+ switch (pname) {
+ case GL_FOG_MODE:
+ if (!ctx->Fog.Enabled)
+ return;
+ switch (ctx->Fog.Mode) {
+ case GL_LINEAR:
+ R300_STATECHANGE(r300, fogs);
+ r300->hw.fogs.cmd[R300_FOGS_STATE] =
+ (r300->hw.fogs.
+ cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) |
+ R300_FOG_MODE_LINEAR;
+
+ if (ctx->Fog.Start == ctx->Fog.End) {
+ fogScale.f = -1.0;
+ fogStart.f = 1.0;
+ } else {
+ fogScale.f =
+ 1.0 / (ctx->Fog.End - ctx->Fog.Start);
+ fogStart.f =
+ -ctx->Fog.Start / (ctx->Fog.End -
+ ctx->Fog.Start);
+ }
+ break;
+ case GL_EXP:
+ R300_STATECHANGE(r300, fogs);
+ r300->hw.fogs.cmd[R300_FOGS_STATE] =
+ (r300->hw.fogs.
+ cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) |
+ R300_FOG_MODE_EXP;
+ fogScale.f = 0.0933 * ctx->Fog.Density;
+ fogStart.f = 0.0;
+ break;
+ case GL_EXP2:
+ R300_STATECHANGE(r300, fogs);
+ r300->hw.fogs.cmd[R300_FOGS_STATE] =
+ (r300->hw.fogs.
+ cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) |
+ R300_FOG_MODE_EXP2;
+ fogScale.f = 0.3 * ctx->Fog.Density;
+ fogStart.f = 0.0;
+ default:
+ return;
+ }
+ break;
+ case GL_FOG_DENSITY:
+ switch (ctx->Fog.Mode) {
+ case GL_EXP:
+ fogScale.f = 0.0933 * ctx->Fog.Density;
+ fogStart.f = 0.0;
+ break;
+ case GL_EXP2:
+ fogScale.f = 0.3 * ctx->Fog.Density;
+ fogStart.f = 0.0;
+ default:
+ break;
+ }
+ break;
+ case GL_FOG_START:
+ case GL_FOG_END:
+ if (ctx->Fog.Mode == GL_LINEAR) {
+ if (ctx->Fog.Start == ctx->Fog.End) {
+ fogScale.f = -1.0;
+ fogStart.f = 1.0;
+ } else {
+ fogScale.f =
+ 1.0 / (ctx->Fog.End - ctx->Fog.Start);
+ fogStart.f =
+ -ctx->Fog.Start / (ctx->Fog.End -
+ ctx->Fog.Start);
+ }
+ }
+ break;
+ case GL_FOG_COLOR:
+ R300_STATECHANGE(r300, fogc);
+ r300->hw.fogc.cmd[R300_FOGC_R] =
+ (GLuint) (ctx->Fog.Color[0] * 1023.0F) & 0x3FF;
+ r300->hw.fogc.cmd[R300_FOGC_G] =
+ (GLuint) (ctx->Fog.Color[1] * 1023.0F) & 0x3FF;
+ r300->hw.fogc.cmd[R300_FOGC_B] =
+ (GLuint) (ctx->Fog.Color[2] * 1023.0F) & 0x3FF;
+ break;
+ case GL_FOG_COORD_SRC:
+ break;
+ default:
+ return;
+ }
+
+ if (fogScale.i != r300->hw.fogp.cmd[R300_FOGP_SCALE] ||
+ fogStart.i != r300->hw.fogp.cmd[R300_FOGP_START]) {
+ R300_STATECHANGE(r300, fogp);
+ r300->hw.fogp.cmd[R300_FOGP_SCALE] = fogScale.i;
+ r300->hw.fogp.cmd[R300_FOGP_START] = fogStart.i;
+ }
+}
+
+/* =============================================================
+ * Point state
+ */
+static void r300PointSize(GLcontext * ctx, GLfloat size)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ size = ctx->Point._Size;
+
+ R300_STATECHANGE(r300, ps);
+ r300->hw.ps.cmd[R300_PS_POINTSIZE] =
+ ((int)(size * 6) << R300_POINTSIZE_X_SHIFT) |
+ ((int)(size * 6) << R300_POINTSIZE_Y_SHIFT);
+}
+
+/* =============================================================
+ * Line state
+ */
+static void r300LineWidth(GLcontext * ctx, GLfloat widthf)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ widthf = ctx->Line._Width;
+
+ R300_STATECHANGE(r300, lcntl);
+ r300->hw.lcntl.cmd[1] = (int)(widthf * 6.0);
+ r300->hw.lcntl.cmd[1] |= R300_LINE_CNT_VE;
+}
+
+static void r300PolygonMode(GLcontext * ctx, GLenum face, GLenum mode)
+{
+ (void)face;
+ (void)mode;
+
+ r300UpdatePolygonMode(ctx);
+}
+
+/* =============================================================
+ * Stencil
+ */
+
+static int translate_stencil_op(int op)
+{
+ switch (op) {
+ case GL_KEEP:
+ return R300_ZS_KEEP;
+ case GL_ZERO:
+ return R300_ZS_ZERO;
+ case GL_REPLACE:
+ return R300_ZS_REPLACE;
+ case GL_INCR:
+ return R300_ZS_INCR;
+ case GL_DECR:
+ return R300_ZS_DECR;
+ case GL_INCR_WRAP_EXT:
+ return R300_ZS_INCR_WRAP;
+ case GL_DECR_WRAP_EXT:
+ return R300_ZS_DECR_WRAP;
+ case GL_INVERT:
+ return R300_ZS_INVERT;
+ default:
+ WARN_ONCE("Do not know how to translate stencil op");
+ return R300_ZS_KEEP;
+ }
+ return 0;
+}
+
+static void r300ShadeModel(GLcontext * ctx, GLenum mode)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(rmesa, shade);
+ switch (mode) {
+ case GL_FLAT:
+ rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT;
+ break;
+ case GL_SMOOTH:
+ rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH;
+ break;
+ default:
+ return;
+ }
+}
+
+static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
+ GLenum func, GLint ref, GLuint mask)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLuint refmask =
+ (((ctx->Stencil.
+ Ref[0] & 0xff) << R300_RB3D_ZS2_STENCIL_REF_SHIFT) | ((ctx->
+ Stencil.
+ ValueMask
+ [0] &
+ 0xff)
+ <<
+ R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
+
+ GLuint flag;
+
+ R300_STATECHANGE(rmesa, zs);
+
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK <<
+ R300_RB3D_ZS1_FRONT_FUNC_SHIFT)
+ | (R300_ZS_MASK <<
+ R300_RB3D_ZS1_BACK_FUNC_SHIFT));
+
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
+ ~((R300_RB3D_ZS2_STENCIL_MASK <<
+ R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
+ (R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
+
+ flag = translate_func(ctx->Stencil.Function[0]);
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT);
+
+ if (ctx->Stencil._TestTwoSide)
+ flag = translate_func(ctx->Stencil.Function[1]);
+
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask;
+}
+
+static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(rmesa, zs);
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
+ ~(R300_RB3D_ZS2_STENCIL_MASK <<
+ R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT);
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |=
+ (ctx->Stencil.
+ WriteMask[0] & 0xff) << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT;
+}
+
+static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
+ GLenum fail, GLenum zfail, GLenum zpass)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(rmesa, zs);
+ /* It is easier to mask what's left.. */
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &=
+ (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT) |
+ (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) |
+ (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
+
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ (translate_stencil_op(ctx->Stencil.FailFunc[0]) <<
+ R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT)
+ | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) <<
+ R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT)
+ | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) <<
+ R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT);
+
+ if (ctx->Stencil._TestTwoSide) {
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ (translate_stencil_op(ctx->Stencil.FailFunc[1]) <<
+ R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
+ | (translate_stencil_op(ctx->Stencil.ZFailFunc[1]) <<
+ R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT)
+ | (translate_stencil_op(ctx->Stencil.ZPassFunc[1]) <<
+ R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT);
+ } else {
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ (translate_stencil_op(ctx->Stencil.FailFunc[0]) <<
+ R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
+ | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) <<
+ R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT)
+ | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) <<
+ R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT);
+ }
+}
+
+static void r300ClearStencil(GLcontext * ctx, GLint s)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ rmesa->state.stencil.clear =
+ ((GLuint) (ctx->Stencil.Clear & 0xff) |
+ (R300_RB3D_ZS2_STENCIL_MASK <<
+ R300_RB3D_ZS2_STENCIL_MASK_SHIFT) | ((ctx->Stencil.
+ WriteMask[0] & 0xff) <<
+ R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT));
+}
+
+/* =============================================================
+ * Window position and viewport transformation
+ */
+
+/*
+ * To correctly position primitives:
+ */
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+
+static void r300UpdateWindow(GLcontext * ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+ GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+ GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+ GLfloat sx = v[MAT_SX];
+ GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
+ GLfloat sy = -v[MAT_SY];
+ GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+ GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
+ GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
+
+ R300_FIREVERTICES(rmesa);
+ R300_STATECHANGE(rmesa, vpt);
+
+ rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx);
+ rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx);
+ rmesa->hw.vpt.cmd[R300_VPT_YSCALE] = r300PackFloat32(sy);
+ rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty);
+ rmesa->hw.vpt.cmd[R300_VPT_ZSCALE] = r300PackFloat32(sz);
+ rmesa->hw.vpt.cmd[R300_VPT_ZOFFSET] = r300PackFloat32(tz);
+}
+
+static void r300Viewport(GLcontext * ctx, GLint x, GLint y,
+ GLsizei width, GLsizei height)
+{
+ /* Don't pipeline viewport changes, conflict with window offset
+ * setting below. Could apply deltas to rescue pipelined viewport
+ * values, or keep the originals hanging around.
+ */
+ r300UpdateWindow(ctx);
+}
+
+static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval)
+{
+ r300UpdateWindow(ctx);
+}
+
+void r300UpdateViewportOffset(GLcontext * ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ __DRIdrawablePrivate *dPriv = ((radeonContextPtr) rmesa)->dri.drawable;
+ GLfloat xoffset = (GLfloat) dPriv->x;
+ GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+ GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
+ GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+
+ if (rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] != r300PackFloat32(tx) ||
+ rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] != r300PackFloat32(ty)) {
+ /* Note: this should also modify whatever data the context reset
+ * code uses...
+ */
+ R300_STATECHANGE(rmesa, vpt);
+ rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx);
+ rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty);
+
+ }
+
+ radeonUpdateScissor(ctx);
+}
+
+/**
+ * Tell the card where to render (offset, pitch).
+ * Effected by glDrawBuffer, etc
+ */
+void r300UpdateDrawBuffer(GLcontext * ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ r300ContextPtr r300 = rmesa;
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ driRenderbuffer *drb;
+
+ if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
+ /* draw to front */
+ drb =
+ (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].
+ Renderbuffer;
+ } else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) {
+ /* draw to back */
+ drb =
+ (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].
+ Renderbuffer;
+ } else {
+ /* drawing to multiple buffers, or none */
+ return;
+ }
+
+ assert(drb);
+ assert(drb->flippedPitch);
+
+ R300_STATECHANGE(rmesa, cb);
+
+ r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset + //r300->radeon.state.color.drawOffset +
+ r300->radeon.radeonScreen->fbLocation;
+ r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch; //r300->radeon.state.color.drawPitch;
+
+ if (r300->radeon.radeonScreen->cpp == 4)
+ r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+ else
+ r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+
+ if (r300->radeon.sarea->tiling_enabled)
+ r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+#if 0
+ R200_STATECHANGE(rmesa, ctx);
+
+ /* Note: we used the (possibly) page-flipped values */
+ rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
+ = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
+ & R200_COLOROFFSET_MASK);
+ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
+
+ if (rmesa->sarea->tiling_enabled) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
+ R200_COLOR_TILE_ENABLE;
+ }
+#endif
+}
+
+static void
+r300FetchStateParameter(GLcontext * ctx,
+ const gl_state_index state[STATE_LENGTH],
+ GLfloat * value)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ switch (state[0]) {
+ case STATE_INTERNAL:
+ switch (state[1]) {
+ case STATE_R300_WINDOW_DIMENSION:
+ value[0] = r300->radeon.dri.drawable->w * 0.5f; /* width*0.5 */
+ value[1] = r300->radeon.dri.drawable->h * 0.5f; /* height*0.5 */
+ value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */
+ value[3] = 1.0F; /* not used */
+ break;
+
+ case STATE_R300_TEXRECT_FACTOR:{
+ struct gl_texture_object *t =
+ ctx->Texture.Unit[state[2]].CurrentRect;
+
+ if (t && t->Image[0][t->BaseLevel]) {
+ struct gl_texture_image *image =
+ t->Image[0][t->BaseLevel];
+ value[0] = 1.0 / image->Width2;
+ value[1] = 1.0 / image->Height2;
+ } else {
+ value[0] = 1.0;
+ value[1] = 1.0;
+ }
+ value[2] = 1.0;
+ value[3] = 1.0;
+ break;
+ }
+
+ default:
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+/**
+ * Update R300's own internal state parameters.
+ * For now just STATE_R300_WINDOW_DIMENSION
+ */
+void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
+{
+ struct r300_fragment_program *fp;
+ struct gl_program_parameter_list *paramList;
+ GLuint i;
+
+ if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM)))
+ return;
+
+ fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current;
+ if (!fp)
+ return;
+
+ paramList = fp->mesa_program.Base.Parameters;
+
+ if (!paramList)
+ return;
+
+ for (i = 0; i < paramList->NumParameters; i++) {
+ if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
+ r300FetchStateParameter(ctx,
+ paramList->Parameters[i].
+ StateIndexes,
+ paramList->ParameterValues[i]);
+ }
+ }
+}
+
+/* =============================================================
+ * Polygon state
+ */
+static void r300PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLfloat constant = units;
+
+ switch (ctx->Visual.depthBits) {
+ case 16:
+ constant *= 4.0;
+ break;
+ case 24:
+ constant *= 2.0;
+ break;
+ }
+
+ factor *= 12.0;
+
+/* fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */
+
+ R300_STATECHANGE(rmesa, zbs);
+ rmesa->hw.zbs.cmd[R300_ZBS_T_FACTOR] = r300PackFloat32(factor);
+ rmesa->hw.zbs.cmd[R300_ZBS_T_CONSTANT] = r300PackFloat32(constant);
+ rmesa->hw.zbs.cmd[R300_ZBS_W_FACTOR] = r300PackFloat32(factor);
+ rmesa->hw.zbs.cmd[R300_ZBS_W_CONSTANT] = r300PackFloat32(constant);
+}
+
+/* Routing and texture-related */
+
+/* r300 doesnt handle GL_CLAMP and GL_MIRROR_CLAMP_EXT correctly when filter is NEAREST.
+ * Since texwrap produces same results for GL_CLAMP and GL_CLAMP_TO_EDGE we use them instead.
+ * We need to recalculate wrap modes whenever filter mode is changed because someone might do:
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ * Since r300 completely ignores R300_TX_CLAMP when either min or mag is nearest it cant handle
+ * combinations where only one of them is nearest.
+ */
+static unsigned long gen_fixed_filter(unsigned long f)
+{
+ unsigned long mag, min, needs_fixing = 0;
+ //return f;
+
+ /* We ignore MIRROR bit so we dont have to do everything twice */
+ if ((f & ((7 - 1) << R300_TX_WRAP_S_SHIFT)) ==
+ (R300_TX_CLAMP << R300_TX_WRAP_S_SHIFT)) {
+ needs_fixing |= 1;
+ }
+ if ((f & ((7 - 1) << R300_TX_WRAP_T_SHIFT)) ==
+ (R300_TX_CLAMP << R300_TX_WRAP_T_SHIFT)) {
+ needs_fixing |= 2;
+ }
+ if ((f & ((7 - 1) << R300_TX_WRAP_Q_SHIFT)) ==
+ (R300_TX_CLAMP << R300_TX_WRAP_Q_SHIFT)) {
+ needs_fixing |= 4;
+ }
+
+ if (!needs_fixing)
+ return f;
+
+ mag = f & R300_TX_MAG_FILTER_MASK;
+ min = f & R300_TX_MIN_FILTER_MASK;
+
+ /* TODO: Check for anisto filters too */
+ if ((mag != R300_TX_MAG_FILTER_NEAREST)
+ && (min != R300_TX_MIN_FILTER_NEAREST))
+ return f;
+
+ /* r300 cant handle these modes hence we force nearest to linear */
+ if ((mag == R300_TX_MAG_FILTER_NEAREST)
+ && (min != R300_TX_MIN_FILTER_NEAREST)) {
+ f &= ~R300_TX_MAG_FILTER_NEAREST;
+ f |= R300_TX_MAG_FILTER_LINEAR;
+ return f;
+ }
+
+ if ((min == R300_TX_MIN_FILTER_NEAREST)
+ && (mag != R300_TX_MAG_FILTER_NEAREST)) {
+ f &= ~R300_TX_MIN_FILTER_NEAREST;
+ f |= R300_TX_MIN_FILTER_LINEAR;
+ return f;
+ }
+
+ /* Both are nearest */
+ if (needs_fixing & 1) {
+ f &= ~((7 - 1) << R300_TX_WRAP_S_SHIFT);
+ f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_S_SHIFT;
+ }
+ if (needs_fixing & 2) {
+ f &= ~((7 - 1) << R300_TX_WRAP_T_SHIFT);
+ f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT;
+ }
+ if (needs_fixing & 4) {
+ f &= ~((7 - 1) << R300_TX_WRAP_Q_SHIFT);
+ f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_Q_SHIFT;
+ }
+ return f;
+}
+
+static void r300SetupTextures(GLcontext * ctx)
+{
+ int i, mtu;
+ struct r300_tex_obj *t;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int hw_tmu = 0;
+ int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */
+ int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, };
+ struct r300_fragment_program *fp = (struct r300_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+
+ R300_STATECHANGE(r300, txe);
+ R300_STATECHANGE(r300, tex.filter);
+ R300_STATECHANGE(r300, tex.filter_1);
+ R300_STATECHANGE(r300, tex.size);
+ R300_STATECHANGE(r300, tex.format);
+ R300_STATECHANGE(r300, tex.pitch);
+ R300_STATECHANGE(r300, tex.offset);
+ R300_STATECHANGE(r300, tex.chroma_key);
+ R300_STATECHANGE(r300, tex.border_color);
+
+ r300->hw.txe.cmd[R300_TXE_ENABLE] = 0x0;
+
+ mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
+ if (RADEON_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "mtu=%d\n", mtu);
+
+ if (mtu > R300_MAX_TEXTURE_UNITS) {
+ fprintf(stderr,
+ "Aiiee ! mtu=%d is greater than R300_MAX_TEXTURE_UNITS=%d\n",
+ mtu, R300_MAX_TEXTURE_UNITS);
+ _mesa_exit(-1);
+ }
+
+ /* We cannot let disabled tmu offsets pass DRM */
+ for (i = 0; i < mtu; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
+
+#if 0 /* Enables old behaviour */
+ hw_tmu = i;
+#endif
+ tmu_mappings[i] = hw_tmu;
+
+ t = r300->state.texture.unit[i].texobj;
+ /* XXX questionable fix for bug 9170: */
+ if (!t)
+ continue;
+
+ if ((t->format & 0xffffff00) == 0xffffff00) {
+ WARN_ONCE
+ ("unknown texture format (entry %x) encountered. Help me !\n",
+ t->format & 0xff);
+ }
+
+ if (RADEON_DEBUG & DEBUG_STATE)
+ fprintf(stderr,
+ "Activating texture unit %d\n", i);
+
+ r300->hw.txe.cmd[R300_TXE_ENABLE] |= (1 << hw_tmu);
+
+ r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 +
+ hw_tmu] =
+ gen_fixed_filter(t->filter) | (hw_tmu << 28);
+ /* Currently disabled! */
+ r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80;
+ r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+ t->size;
+ r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
+ hw_tmu] = t->format;
+ r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+ t->pitch_reg;
+ r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 +
+ hw_tmu] = t->offset;
+
+ if (t->offset & R300_TXO_MACRO_TILE) {
+ WARN_ONCE("macro tiling enabled!\n");
+ }
+
+ if (t->offset & R300_TXO_MICRO_TILE) {
+ WARN_ONCE("micro tiling enabled!\n");
+ }
+
+ r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 +
+ hw_tmu] = 0x0;
+ r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0 +
+ hw_tmu] =
+ t->pp_border_color;
+
+ last_hw_tmu = hw_tmu;
+
+ hw_tmu++;
+ }
+ }
+
+ r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_FILTER_0, last_hw_tmu + 1);
+ r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
+ r300->hw.tex.size.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1);
+ r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1);
+ r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_PITCH_0, last_hw_tmu + 1);
+ r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1);
+ r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
+ r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
+
+ if (!fp) /* should only happenen once, just after context is created */
+ return;
+
+ R300_STATECHANGE(r300, fpt);
+
+ for (i = 0; i < fp->tex.length; i++) {
+ int unit;
+ int opcode;
+ unsigned long val;
+
+ unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT;
+ unit &= 15;
+
+ val = fp->tex.inst[i];
+ val &= ~R300_FPITX_IMAGE_MASK;
+
+ opcode =
+ (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT;
+ if (opcode == R300_FPITX_OP_KIL) {
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ } else {
+ if (tmu_mappings[unit] >= 0) {
+ val |=
+ tmu_mappings[unit] <<
+ R300_FPITX_IMAGE_SHIFT;
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ } else {
+ // We get here when the corresponding texture image is incomplete
+ // (e.g. incomplete mipmaps etc.)
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ }
+ }
+ }
+
+ r300->hw.fpt.cmd[R300_FPT_CMD_0] =
+ cmdpacket0(R300_PFS_TEXI_0, fp->tex.length);
+
+ if (RADEON_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n",
+ r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu);
+}
+
+union r300_outputs_written {
+ GLuint vp_outputs; /* hw_tcl_on */
+ DECLARE_RENDERINPUTS(index_bitset); /* !hw_tcl_on */
+};
+
+#define R300_OUTPUTS_WRITTEN_TEST(ow, vp_result, tnl_attrib) \
+ ((hw_tcl_on) ? (ow).vp_outputs & (1 << (vp_result)) : \
+ RENDERINPUTS_TEST( (ow.index_bitset), (tnl_attrib) ))
+
+static void r300SetupRSUnit(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ /* I'm still unsure if these are needed */
+ GLuint interp_magic[8] = {
+ 0x00,
+ R300_RS_INTERP_1_UNKNOWN,
+ R300_RS_INTERP_2_UNKNOWN,
+ R300_RS_INTERP_3_UNKNOWN,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00
+ };
+ union r300_outputs_written OutputsWritten;
+ GLuint InputsRead;
+ int fp_reg, high_rr;
+ int in_texcoords, col_interp_nr;
+ int i;
+
+ if (hw_tcl_on)
+ OutputsWritten.vp_outputs =
+ CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+ else
+ RENDERINPUTS_COPY(OutputsWritten.index_bitset,
+ r300->state.render_inputs_bitset);
+
+ if (ctx->FragmentProgram._Current)
+ InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
+ else {
+ fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+ return; /* This should only ever happen once.. */
+ }
+
+ R300_STATECHANGE(r300, ri);
+ R300_STATECHANGE(r300, rc);
+ R300_STATECHANGE(r300, rr);
+
+ fp_reg = in_texcoords = col_interp_nr = high_rr = 0;
+
+ r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0;
+
+ if (InputsRead & FRAG_BIT_WPOS) {
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+ if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
+ break;
+
+ if (i == ctx->Const.MaxTextureUnits) {
+ fprintf(stderr, "\tno free texcoord found...\n");
+ _mesa_exit(-1);
+ }
+
+ InputsRead |= (FRAG_BIT_TEX0 << i);
+ InputsRead &= ~FRAG_BIT_WPOS;
+ }
+
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0
+ | R300_RS_INTERP_USED
+ | (in_texcoords << R300_RS_INTERP_SRC_SHIFT)
+ | interp_magic[i];
+
+ r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0;
+ if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+ //assert(r300->state.texture.tc_count != 0);
+ r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R300_RS_ROUTE_ENABLE | i /* source INTERP */
+ | (fp_reg << R300_RS_ROUTE_DEST_SHIFT);
+ high_rr = fp_reg;
+
+ if (!R300_OUTPUTS_WRITTEN_TEST
+ (OutputsWritten, VERT_RESULT_TEX0 + i,
+ _TNL_ATTRIB_TEX(i))) {
+ /* Passing invalid data here can lock the GPU. */
+ WARN_ONCE
+ ("fragprog wants coords for tex%d, vp doesn't provide them!\n",
+ i);
+ //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base);
+ //_mesa_exit(-1);
+ }
+ InputsRead &= ~(FRAG_BIT_TEX0 << i);
+ fp_reg++;
+ }
+ /* Need to count all coords enabled at vof */
+ if (R300_OUTPUTS_WRITTEN_TEST
+ (OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i)))
+ in_texcoords++;
+ }
+
+ if (InputsRead & FRAG_BIT_COL0) {
+ if (!R300_OUTPUTS_WRITTEN_TEST
+ (OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
+ WARN_ONCE
+ ("fragprog wants col0, vp doesn't provide it\n");
+ goto out; /* FIXME */
+ //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base);
+ //_mesa_exit(-1);
+ }
+
+ r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
+ | R300_RS_ROUTE_0_COLOR
+ | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT);
+ InputsRead &= ~FRAG_BIT_COL0;
+ col_interp_nr++;
+ }
+ out:
+
+ if (InputsRead & FRAG_BIT_COL1) {
+ if (!R300_OUTPUTS_WRITTEN_TEST
+ (OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
+ WARN_ONCE
+ ("fragprog wants col1, vp doesn't provide it\n");
+ //_mesa_exit(-1);
+ }
+
+ r300->hw.rr.cmd[R300_RR_ROUTE_1] |=
+ R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 |
+ (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT);
+ InputsRead &= ~FRAG_BIT_COL1;
+ if (high_rr < 1)
+ high_rr = 1;
+ col_interp_nr++;
+ }
+
+ /* Need at least one. This might still lock as the values are undefined... */
+ if (in_texcoords == 0 && col_interp_nr == 0) {
+ r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
+ | R300_RS_ROUTE_0_COLOR
+ | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT);
+ col_interp_nr++;
+ }
+
+ r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_RS_CNTL_TC_CNT_SHIFT)
+ | (col_interp_nr << R300_RS_CNTL_CI_CNT_SHIFT)
+ | R300_RS_CNTL_0_UNKNOWN_18;
+
+ assert(high_rr >= 0);
+ r300->hw.rr.cmd[R300_RR_CMD_0] =
+ cmdpacket0(R300_RS_ROUTE_0, high_rr + 1);
+ r300->hw.rc.cmd[2] = 0xC0 | high_rr;
+
+ if (InputsRead)
+ WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n",
+ InputsRead);
+}
+
+#define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
+
+#define bump_vpu_count(ptr, new_count) do{\
+ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
+ int _nc=(new_count)/4; \
+ assert(_nc < 256); \
+ if(_nc>_p->vpu.count)_p->vpu.count=_nc;\
+ }while(0)
+
+void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, struct
+ r300_vertex_shader_fragment
+ *vsf)
+{
+ int i;
+
+ if (vsf->length == 0)
+ return;
+
+ if (vsf->length & 0x3) {
+ fprintf(stderr,
+ "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n");
+ _mesa_exit(-1);
+ }
+
+ switch ((dest >> 8) & 0xf) {
+ case 0:
+ R300_STATECHANGE(r300, vpi);
+ for (i = 0; i < vsf->length; i++)
+ r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i +
+ 4 * (dest & 0xff)] = (vsf->body.d[i]);
+ bump_vpu_count(r300->hw.vpi.cmd,
+ vsf->length + 4 * (dest & 0xff));
+ break;
+
+ case 2:
+ R300_STATECHANGE(r300, vpp);
+ for (i = 0; i < vsf->length; i++)
+ r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i +
+ 4 * (dest & 0xff)] = (vsf->body.d[i]);
+ bump_vpu_count(r300->hw.vpp.cmd,
+ vsf->length + 4 * (dest & 0xff));
+ break;
+ case 4:
+ R300_STATECHANGE(r300, vps);
+ for (i = 0; i < vsf->length; i++)
+ r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] =
+ (vsf->body.d[i]);
+ bump_vpu_count(r300->hw.vps.cmd,
+ vsf->length + 4 * (dest & 0xff));
+ break;
+ default:
+ fprintf(stderr,
+ "%s:%s don't know how to handle dest %04x\n",
+ __FILE__, __FUNCTION__, dest);
+ _mesa_exit(-1);
+ }
+}
+
+/* just a skeleton for now.. */
+
+/* Generate a vertex shader that simply transforms vertex and texture coordinates,
+ while leaving colors intact. Nothing fancy (like lights)
+
+ If implementing lights make a copy first, so it is easy to switch between the two versions */
+static void r300GenerateSimpleVertexShader(r300ContextPtr r300)
+{
+ int i;
+ GLuint o_reg = 0;
+
+ /* Allocate parameters */
+ r300->state.vap_param.transform_offset = 0x0; /* transform matrix */
+ r300->state.vertex_shader.param_offset = 0x0;
+ r300->state.vertex_shader.param_count = 0x4; /* 4 vector values - 4x4 matrix */
+
+ r300->state.vertex_shader.program_start = 0x0;
+ r300->state.vertex_shader.unknown_ptr1 = 0x4; /* magic value ? */
+ r300->state.vertex_shader.program_end = 0x0;
+
+ r300->state.vertex_shader.unknown_ptr2 = 0x0; /* magic value */
+ r300->state.vertex_shader.unknown_ptr3 = 0x4; /* magic value */
+
+ r300->state.vertex_shader.unknown1.length = 0;
+ r300->state.vertex_shader.unknown2.length = 0;
+
+#define WRITE_OP(oper,source1,source2,source3) {\
+ r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].op=(oper); \
+ r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[0]=(source1); \
+ r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[1]=(source2); \
+ r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[2]=(source3); \
+ r300->state.vertex_shader.program_end++; \
+ }
+
+ for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++)
+ if (r300->state.sw_tcl_inputs[i] != -1) {
+ WRITE_OP(EASY_VSF_OP(MUL, o_reg++, ALL, RESULT),
+ VSF_REG(r300->state.sw_tcl_inputs[i]),
+ VSF_ATTR_UNITY(r300->state.
+ sw_tcl_inputs[i]),
+ VSF_UNITY(r300->state.sw_tcl_inputs[i])
+ )
+
+ }
+
+ r300->state.vertex_shader.program_end--; /* r300 wants program length to be one more - no idea why */
+ r300->state.vertex_shader.program.length =
+ (r300->state.vertex_shader.program_end + 1) * 4;
+
+ r300->state.vertex_shader.unknown_ptr1 = r300->state.vertex_shader.program_end; /* magic value ? */
+ r300->state.vertex_shader.unknown_ptr2 = r300->state.vertex_shader.program_end; /* magic value ? */
+ r300->state.vertex_shader.unknown_ptr3 = r300->state.vertex_shader.program_end; /* magic value ? */
+
+}
+
+static void r300SetupVertexProgram(r300ContextPtr rmesa)
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+ int inst_count;
+ int param_count;
+ struct r300_vertex_program *prog =
+ (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
+
+ ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
+ R300_STATECHANGE(rmesa, vpp);
+ param_count =
+ r300VertexProgUpdateParams(ctx, (struct r300_vertex_program_cont *)
+ ctx->VertexProgram._Current /*prog */ ,
+ (float *)&rmesa->hw.vpp.
+ cmd[R300_VPP_PARAM_0]);
+ bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
+ param_count /= 4;
+
+ /* Reset state, in case we don't use something */
+ ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
+
+ setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, &(prog->program));
+
+#if 0
+ setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1,
+ &(rmesa->state.vertex_shader.unknown1));
+ setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2,
+ &(rmesa->state.vertex_shader.unknown2));
+#endif
+
+ inst_count = prog->program.length / 4 - 1;
+
+ R300_STATECHANGE(rmesa, pvs);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
+ (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT)
+ | (inst_count /*pos_end */ << R300_PVS_CNTL_1_POS_END_SHIFT)
+ | (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] =
+ (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT)
+ | (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] =
+ (0 /*rmesa->state.vertex_shader.unknown_ptr2 */ <<
+ R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT)
+ | (inst_count /*rmesa->state.vertex_shader.unknown_ptr3 */ <<
+ 0);
+
+ /* This is done for vertex shader fragments, but also needs to be done for vap_pvs,
+ so I leave it as a reminder */
+#if 0
+ reg_start(R300_VAP_PVS_WAITIDLE, 0);
+ e32(0x00000000);
+#endif
+}
+
+static void r300SetupVertexShader(r300ContextPtr rmesa)
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+
+ /* Reset state, in case we don't use something */
+ ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
+
+ /* Not sure why this doesnt work...
+ 0x400 area might have something to do with pixel shaders as it appears right after pfs programming.
+ 0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */
+ //setup_vertex_shader_fragment(rmesa, 0x406, &unk4);
+ if (hw_tcl_on
+ && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->
+ translated) {
+ r300SetupVertexProgram(rmesa);
+ return;
+ }
+
+ /* This needs to be replaced by vertex shader generation code */
+ r300GenerateSimpleVertexShader(rmesa);
+
+ setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM,
+ &(rmesa->state.vertex_shader.program));
+
+#if 0
+ setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1,
+ &(rmesa->state.vertex_shader.unknown1));
+ setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2,
+ &(rmesa->state.vertex_shader.unknown2));
+#endif
+
+ R300_STATECHANGE(rmesa, pvs);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
+ (rmesa->state.vertex_shader.
+ program_start << R300_PVS_CNTL_1_PROGRAM_START_SHIFT)
+ | (rmesa->state.vertex_shader.
+ unknown_ptr1 << R300_PVS_CNTL_1_POS_END_SHIFT)
+ | (rmesa->state.vertex_shader.
+ program_end << R300_PVS_CNTL_1_PROGRAM_END_SHIFT);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] =
+ (rmesa->state.vertex_shader.
+ param_offset << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT)
+ | (rmesa->state.vertex_shader.
+ param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] =
+ (rmesa->state.vertex_shader.
+ unknown_ptr2 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT)
+ | (rmesa->state.vertex_shader.unknown_ptr3 << 0);
+
+ /* This is done for vertex shader fragments, but also needs to be done for vap_pvs,
+ so I leave it as a reminder */
+#if 0
+ reg_start(R300_VAP_PVS_WAITIDLE, 0);
+ e32(0x00000000);
+#endif
+}
+
+/**
+ * Completely recalculates hardware state based on the Mesa state.
+ */
+static void r300ResetHwState(r300ContextPtr r300)
+{
+ GLcontext *ctx = r300->radeon.glCtx;
+ int has_tcl = 1;
+
+ if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ has_tcl = 0;
+
+ if (RADEON_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ /* This is a place to initialize registers which
+ have bitfields accessed by different functions
+ and not all bits are used */
+
+ /* go and compute register values from GL state */
+
+ r300UpdateWindow(ctx);
+
+ r300ColorMask(ctx,
+ ctx->Color.ColorMask[RCOMP],
+ ctx->Color.ColorMask[GCOMP],
+ ctx->Color.ColorMask[BCOMP], ctx->Color.ColorMask[ACOMP]);
+
+ r300Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
+ r300DepthMask(ctx, ctx->Depth.Mask);
+ r300DepthFunc(ctx, ctx->Depth.Func);
+
+ /* stencil */
+ r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+ r300StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]);
+ r300StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0],
+ ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]);
+ r300StencilOpSeparate(ctx, 0, ctx->Stencil.FailFunc[0],
+ ctx->Stencil.ZFailFunc[0],
+ ctx->Stencil.ZPassFunc[0]);
+
+ r300UpdateCulling(ctx);
+
+ r300UpdateTextureState(ctx);
+
+ r300SetBlendState(ctx);
+
+ r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
+ r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
+
+ /* Initialize magic registers
+ TODO : learn what they really do, or get rid of
+ those we don't have to touch */
+ if (!has_tcl)
+ r300->hw.vap_cntl.cmd[1] = 0x0014045a;
+ else
+ r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */
+ r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA
+ | R300_VPORT_X_OFFSET_ENA
+ | R300_VPORT_Y_SCALE_ENA
+ | R300_VPORT_Y_OFFSET_ENA
+ | R300_VPORT_Z_SCALE_ENA
+ | R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT;
+ r300->hw.vte.cmd[2] = 0x00000008;
+
+ r300->hw.unk2134.cmd[1] = 0x00FFFFFF;
+ r300->hw.unk2134.cmd[2] = 0x00000000;
+ if (_mesa_little_endian())
+ r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP;
+ else
+ r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP;
+
+ /* disable VAP/TCL on non-TCL capable chips */
+ if (!has_tcl)
+ r300->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS;
+
+ r300->hw.unk21DC.cmd[1] = 0xAAAAAAAA;
+
+ r300->hw.vap_clip_cntl.cmd[1] = R300_221C_NORMAL;
+
+ r300->hw.unk2220.cmd[1] = r300PackFloat32(1.0);
+ r300->hw.unk2220.cmd[2] = r300PackFloat32(1.0);
+ r300->hw.unk2220.cmd[3] = r300PackFloat32(1.0);
+ r300->hw.unk2220.cmd[4] = r300PackFloat32(1.0);
+
+ /* what about other chips than r300 or rv350??? */
+ if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300)
+ r300->hw.unk2288.cmd[1] = R300_2288_R300;
+ else
+ r300->hw.unk2288.cmd[1] = R300_2288_RV350;
+
+ r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE
+ | R300_GB_LINE_STUFF_ENABLE
+ | R300_GB_TRIANGLE_STUFF_ENABLE /*| R300_GB_UNK31 */ ;
+
+ r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666;
+ r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666;
+ if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) ||
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350))
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
+ R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_R300 |
+ R300_GB_TILE_SIZE_16;
+ else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410)
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
+ R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_RV410 |
+ R300_GB_TILE_SIZE_16;
+ else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
+ R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_R420 |
+ R300_GB_TILE_SIZE_16;
+ else
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
+ R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_RV300 |
+ R300_GB_TILE_SIZE_16;
+ /* set to 0 when fog is disabled? */
+ r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = R300_GB_FOG_SELECT_1_1_W;
+ r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = R300_AA_DISABLE; /* No antialiasing */
+
+ r300->hw.unk4200.cmd[1] = r300PackFloat32(0.0);
+ r300->hw.unk4200.cmd[2] = r300PackFloat32(0.0);
+ r300->hw.unk4200.cmd[3] = r300PackFloat32(1.0);
+ r300->hw.unk4200.cmd[4] = r300PackFloat32(1.0);
+
+ r300->hw.unk4214.cmd[1] = 0x00050005;
+
+ r300PointSize(ctx, 0.0);
+
+ r300->hw.unk4230.cmd[1] = 0x18000006;
+ r300->hw.unk4230.cmd[2] = 0x00020006;
+ r300->hw.unk4230.cmd[3] = r300PackFloat32(1.0 / 192.0);
+
+ r300LineWidth(ctx, 0.0);
+
+ r300->hw.unk4260.cmd[1] = 0;
+ r300->hw.unk4260.cmd[2] = r300PackFloat32(0.0);
+ r300->hw.unk4260.cmd[3] = r300PackFloat32(1.0);
+
+ r300->hw.shade.cmd[1] = 0x00000002;
+ r300ShadeModel(ctx, ctx->Light.ShadeModel);
+ r300->hw.shade.cmd[3] = 0x00000000;
+ r300->hw.shade.cmd[4] = 0x00000000;
+
+ r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode);
+ r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode);
+ r300->hw.polygon_mode.cmd[2] = 0x00000001;
+ r300->hw.polygon_mode.cmd[3] = 0x00000000;
+ r300->hw.zbias_cntl.cmd[1] = 0x00000000;
+
+ r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor,
+ ctx->Polygon.OffsetUnits);
+ r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill);
+
+ r300->hw.unk42C0.cmd[1] = 0x4B7FFFFF;
+ r300->hw.unk42C0.cmd[2] = 0x00000000;
+
+ r300->hw.unk43A4.cmd[1] = 0x0000001C;
+ r300->hw.unk43A4.cmd[2] = 0x2DA49525;
+
+ r300->hw.unk43E8.cmd[1] = 0x00FFFFFF;
+
+ r300->hw.unk46A4.cmd[1] = 0x00001B01;
+ r300->hw.unk46A4.cmd[2] = 0x00001B0F;
+ r300->hw.unk46A4.cmd[3] = 0x00001B0F;
+ r300->hw.unk46A4.cmd[4] = 0x00001B0F;
+ r300->hw.unk46A4.cmd[5] = 0x00000001;
+
+ r300Enable(ctx, GL_FOG, ctx->Fog.Enabled);
+ ctx->Driver.Fogfv(ctx, GL_FOG_MODE, NULL);
+ ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
+ ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
+ ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
+ ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
+ ctx->Driver.Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL);
+
+ r300->hw.at.cmd[R300_AT_UNKNOWN] = 0;
+ r300->hw.unk4BD8.cmd[1] = 0;
+
+ r300->hw.unk4E00.cmd[1] = 0;
+
+ r300BlendColor(ctx, ctx->Color.BlendColor);
+ r300->hw.blend_color.cmd[2] = 0;
+ r300->hw.blend_color.cmd[3] = 0;
+
+ /* Again, r300ClearBuffer uses this */
+ r300->hw.cb.cmd[R300_CB_OFFSET] =
+ r300->radeon.state.color.drawOffset +
+ r300->radeon.radeonScreen->fbLocation;
+ r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
+
+ if (r300->radeon.radeonScreen->cpp == 4)
+ r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+ else
+ r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+
+ if (r300->radeon.sarea->tiling_enabled)
+ r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+
+ r300->hw.unk4E50.cmd[1] = 0;
+ r300->hw.unk4E50.cmd[2] = 0;
+ r300->hw.unk4E50.cmd[3] = 0;
+ r300->hw.unk4E50.cmd[4] = 0;
+ r300->hw.unk4E50.cmd[5] = 0;
+ r300->hw.unk4E50.cmd[6] = 0;
+ r300->hw.unk4E50.cmd[7] = 0;
+ r300->hw.unk4E50.cmd[8] = 0;
+ r300->hw.unk4E50.cmd[9] = 0;
+
+ r300->hw.unk4E88.cmd[1] = 0;
+
+ r300->hw.unk4EA0.cmd[1] = 0x00000000;
+ r300->hw.unk4EA0.cmd[2] = 0xffffffff;
+
+ switch (ctx->Visual.depthBits) {
+ case 16:
+ r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z;
+ break;
+ case 24:
+ r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z;
+ break;
+ default:
+ fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
+ ctx->Visual.depthBits);
+ _mesa_exit(-1);
+
+ }
+ /* z compress? */
+ //r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32;
+
+ r300->hw.zstencil_format.cmd[3] = 0x00000003;
+ r300->hw.zstencil_format.cmd[4] = 0x00000000;
+
+ r300->hw.zb.cmd[R300_ZB_OFFSET] =
+ r300->radeon.radeonScreen->depthOffset +
+ r300->radeon.radeonScreen->fbLocation;
+ r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
+
+ if (r300->radeon.sarea->tiling_enabled) {
+ /* Turn off when clearing buffers ? */
+ r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTH_TILE_ENABLE;
+
+ if (ctx->Visual.depthBits == 24)
+ r300->hw.zb.cmd[R300_ZB_PITCH] |=
+ R300_DEPTH_MICROTILE_ENABLE;
+ }
+
+ r300->hw.unk4F28.cmd[1] = 0;
+
+ r300->hw.unk4F30.cmd[1] = 0;
+ r300->hw.unk4F30.cmd[2] = 0;
+
+ r300->hw.unk4F44.cmd[1] = 0;
+
+ r300->hw.unk4F54.cmd[1] = 0;
+
+ if (has_tcl) {
+ r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0;
+ r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0;
+ r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0);
+ r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0;
+ }
+//END: TODO
+ r300->hw.all_dirty = GL_TRUE;
+}
+
+
+extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx);
+
+extern int future_hw_tcl_on;
+void r300UpdateShaders(r300ContextPtr rmesa)
+{
+ GLcontext *ctx;
+ struct r300_vertex_program *vp;
+ int i;
+
+ ctx = rmesa->radeon.glCtx;
+
+ if (rmesa->NewGLState && hw_tcl_on) {
+ rmesa->NewGLState = 0;
+
+ for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ rmesa->temp_attrib[i] =
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i];
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
+ &rmesa->dummy_attrib[i];
+ }
+
+ _tnl_UpdateFixedFunctionProgram(ctx);
+
+ for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
+ rmesa->temp_attrib[i];
+ }
+
+ r300SelectVertexShader(rmesa);
+ vp = (struct r300_vertex_program *)
+ CURRENT_VERTEX_SHADER(ctx);
+ /*if (vp->translated == GL_FALSE)
+ r300TranslateVertexShader(vp); */
+ if (vp->translated == GL_FALSE) {
+ fprintf(stderr, "Failing back to sw-tcl\n");
+ hw_tcl_on = future_hw_tcl_on = 0;
+ r300ResetHwState(rmesa);
+
+ return;
+ }
+ r300UpdateStateParameters(ctx, _NEW_PROGRAM);
+ }
+
+}
+
+static void r300SetupPixelShader(r300ContextPtr rmesa)
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+ struct r300_fragment_program *fp = (struct r300_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+ int i, k;
+
+ if (!fp) /* should only happenen once, just after context is created */
+ return;
+
+ r300TranslateFragmentShader(rmesa, fp);
+ if (!fp->translated) {
+ fprintf(stderr, "%s: No valid fragment shader, exiting\n",
+ __FUNCTION__);
+ return;
+ }
+#define OUTPUT_FIELD(st, reg, field) \
+ R300_STATECHANGE(rmesa, st); \
+ for(i=0;i<=fp->alu_end;i++) \
+ rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=fp->alu.inst[i].field;\
+ rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmdpacket0(reg, fp->alu_end+1);
+
+ OUTPUT_FIELD(fpi[0], R300_PFS_INSTR0_0, inst0);
+ OUTPUT_FIELD(fpi[1], R300_PFS_INSTR1_0, inst1);
+ OUTPUT_FIELD(fpi[2], R300_PFS_INSTR2_0, inst2);
+ OUTPUT_FIELD(fpi[3], R300_PFS_INSTR3_0, inst3);
+#undef OUTPUT_FIELD
+
+ R300_STATECHANGE(rmesa, fp);
+ /* I just want to say, the way these nodes are stored.. weird.. */
+ for (i = 0, k = (4 - (fp->cur_node + 1)); i < 4; i++, k++) {
+ if (i < (fp->cur_node + 1)) {
+ rmesa->hw.fp.cmd[R300_FP_NODE0 + k] =
+ (fp->node[i].
+ alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT)
+ | (fp->node[i].
+ alu_end << R300_PFS_NODE_ALU_END_SHIFT)
+ | (fp->node[i].
+ tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT)
+ | (fp->node[i].
+ tex_end << R300_PFS_NODE_TEX_END_SHIFT)
+ | fp->node[i].flags; /* ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0); */
+ } else {
+ rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0;
+ }
+ }
+
+ /* PFS_CNTL_0 */
+ rmesa->hw.fp.cmd[R300_FP_CNTL0] =
+ fp->cur_node | (fp->first_node_has_tex << 3);
+ /* PFS_CNTL_1 */
+ rmesa->hw.fp.cmd[R300_FP_CNTL1] = fp->max_temp_idx;
+ /* PFS_CNTL_2 */
+ rmesa->hw.fp.cmd[R300_FP_CNTL2] =
+ (fp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
+ | (fp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT)
+ | (fp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
+ | (fp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT);
+
+ R300_STATECHANGE(rmesa, fpp);
+ for (i = 0; i < fp->const_nr; i++) {
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] =
+ r300PackFloat24(fp->constant[i][0]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] =
+ r300PackFloat24(fp->constant[i][1]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] =
+ r300PackFloat24(fp->constant[i][2]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] =
+ r300PackFloat24(fp->constant[i][3]);
+ }
+ rmesa->hw.fpp.cmd[R300_FPP_CMD_0] =
+ cmdpacket0(R300_PFS_PARAM_0_X, fp->const_nr * 4);
+}
+
+void r300UpdateShaderStates(r300ContextPtr rmesa)
+{
+ GLcontext *ctx;
+ ctx = rmesa->radeon.glCtx;
+
+ r300UpdateTextureState(ctx);
+
+ r300SetupPixelShader(rmesa);
+ r300SetupTextures(ctx);
+
+ if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ r300SetupVertexShader(rmesa);
+ r300SetupRSUnit(ctx);
+}
+
+/**
+ * Called by Mesa after an internal state update.
+ */
+static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ _swrast_InvalidateState(ctx, new_state);
+ _swsetup_InvalidateState(ctx, new_state);
+ _vbo_InvalidateState(ctx, new_state);
+ _tnl_InvalidateState(ctx, new_state);
+ _ae_invalidate_state(ctx, new_state);
+
+ if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+ r300UpdateDrawBuffer(ctx);
+ }
+
+ r300UpdateStateParameters(ctx, new_state);
+
+ r300->NewGLState |= new_state;
+}
+
+/**
+ * Calculate initial hardware state and register state functions.
+ * Assumes that the command buffer and state atoms have been
+ * initialized already.
+ */
+void r300InitState(r300ContextPtr r300)
+{
+ GLcontext *ctx = r300->radeon.glCtx;
+ GLuint depth_fmt;
+
+ radeonInitState(&r300->radeon);
+
+ switch (ctx->Visual.depthBits) {
+ case 16:
+ r300->state.depth.scale = 1.0 / (GLfloat) 0xffff;
+ depth_fmt = R300_DEPTH_FORMAT_16BIT_INT_Z;
+ r300->state.stencil.clear = 0x00000000;
+ break;
+ case 24:
+ r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff;
+ depth_fmt = R300_DEPTH_FORMAT_24BIT_INT_Z;
+ r300->state.stencil.clear = 0x00ff0000;
+ break;
+
+
+ default:
+ fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
+ ctx->Visual.depthBits);
+ _mesa_exit(-1);
+ }
+
+ /* Only have hw stencil when depth buffer is 24 bits deep */
+ r300->state.stencil.hw_stencil = (ctx->Visual.stencilBits > 0 &&
+ ctx->Visual.depthBits == 24);
+
+ memset(&(r300->state.texture), 0, sizeof(r300->state.texture));
+
+ r300ResetHwState(r300);
+}
+
+static void r300RenderMode(GLcontext * ctx, GLenum mode)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ (void)rmesa;
+ (void)mode;
+}
+
+static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+ GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+ R300_STATECHANGE( rmesa, vpucp[p] );
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3];
+}
+
+
+void r300UpdateClipPlanes( GLcontext *ctx )
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLuint p;
+
+ for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+ if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
+ GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+ R300_STATECHANGE( rmesa, vpucp[p] );
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3];
+ }
+ }
+}
+
+/**
+ * Initialize driver's state callback functions
+ */
+void r300InitStateFuncs(struct dd_function_table *functions)
+{
+ radeonInitStateFuncs(functions);
+
+ functions->UpdateState = r300InvalidateState;
+ functions->AlphaFunc = r300AlphaFunc;
+ functions->BlendColor = r300BlendColor;
+ functions->BlendEquationSeparate = r300BlendEquationSeparate;
+ functions->BlendFuncSeparate = r300BlendFuncSeparate;
+ functions->Enable = r300Enable;
+ functions->ColorMask = r300ColorMask;
+ functions->DepthFunc = r300DepthFunc;
+ functions->DepthMask = r300DepthMask;
+ functions->CullFace = r300CullFace;
+ functions->Fogfv = r300Fogfv;
+ functions->FrontFace = r300FrontFace;
+ functions->ShadeModel = r300ShadeModel;
+
+ /* Stencil related */
+ functions->ClearStencil = r300ClearStencil;
+ functions->StencilFuncSeparate = r300StencilFuncSeparate;
+ functions->StencilMaskSeparate = r300StencilMaskSeparate;
+ functions->StencilOpSeparate = r300StencilOpSeparate;
+
+ /* Viewport related */
+ functions->Viewport = r300Viewport;
+ functions->DepthRange = r300DepthRange;
+ functions->PointSize = r300PointSize;
+ functions->LineWidth = r300LineWidth;
+
+ functions->PolygonOffset = r300PolygonOffset;
+ functions->PolygonMode = r300PolygonMode;
+
+ functions->RenderMode = r300RenderMode;
+
+ functions->ClipPlane = r300ClipPlane;
+}
diff --git a/r300/r300_state.h b/r300/r300_state.h
new file mode 100644
index 0000000..21a49b7
--- /dev/null
+++ b/r300/r300_state.h
@@ -0,0 +1,70 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R300_STATE_H__
+#define __R300_STATE_H__
+
+#include "r300_context.h"
+
+#define R300_STATECHANGE(r300, atom) \
+ do { \
+ r300->hw.atom.dirty = GL_TRUE; \
+ r300->hw.is_dirty = GL_TRUE; \
+ } while(0)
+
+#define R300_PRINT_STATE(r300, atom) \
+ r300PrintStateAtom(r300, &r300->hw.atom)
+
+/* Fire the buffered vertices no matter what.
+ TODO: This has not been implemented yet
+ */
+#define R300_FIREVERTICES( r300 ) \
+do { \
+ \
+ if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) { \
+ r300Flush( (r300)->radeon.glCtx ); \
+ } \
+ \
+} while (0)
+
+extern void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state);
+extern void r300InitState(r300ContextPtr r300);
+extern void r300InitStateFuncs(struct dd_function_table *functions);
+extern void r300UpdateViewportOffset(GLcontext * ctx);
+extern void r300UpdateDrawBuffer(GLcontext * ctx);
+
+extern void r300UpdateShaders(r300ContextPtr rmesa);
+extern void r300UpdateShaderStates(r300ContextPtr rmesa);
+
+#endif /* __R300_STATE_H__ */
diff --git a/r300/r300_tex.c b/r300/r300_tex.c
new file mode 100644
index 0000000..2a21c61
--- /dev/null
+++ b/r300/r300_tex.c
@@ -0,0 +1,1166 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "colormac.h"
+#include "context.h"
+#include "enums.h"
+#include "image.h"
+#include "simple_list.h"
+#include "texformat.h"
+#include "texstore.h"
+#include "texmem.h"
+#include "teximage.h"
+#include "texobj.h"
+
+#include "r300_context.h"
+#include "r300_state.h"
+#include "r300_ioctl.h"
+#include "r300_tex.h"
+
+#include "xmlpool.h"
+
+/**
+ * Set the texture wrap modes.
+ *
+ * \param t Texture object whose wrap modes are to be set
+ * \param swrap Wrap mode for the \a s texture coordinate
+ * \param twrap Wrap mode for the \a t texture coordinate
+ */
+
+static void r300SetTexWrap(r300TexObjPtr t, GLenum swrap, GLenum twrap,
+ GLenum rwrap)
+{
+ unsigned long hw_swrap = 0, hw_twrap = 0, hw_qwrap = 0;
+
+ t->filter &=
+ ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_Q_MASK);
+
+ switch (swrap) {
+ case GL_REPEAT:
+ hw_swrap |= R300_TX_REPEAT;
+ break;
+ case GL_CLAMP:
+ hw_swrap |= R300_TX_CLAMP;
+ break;
+ case GL_CLAMP_TO_EDGE:
+ hw_swrap |= R300_TX_CLAMP_TO_EDGE;
+ break;
+ case GL_CLAMP_TO_BORDER:
+ hw_swrap |= R300_TX_CLAMP_TO_BORDER;
+ break;
+ case GL_MIRRORED_REPEAT:
+ hw_swrap |= R300_TX_REPEAT | R300_TX_MIRRORED;
+ break;
+ case GL_MIRROR_CLAMP_EXT:
+ hw_swrap |= R300_TX_CLAMP | R300_TX_MIRRORED;
+ break;
+ case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+ hw_swrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
+ break;
+ case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+ hw_swrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
+ break;
+ default:
+ _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
+ }
+
+ switch (twrap) {
+ case GL_REPEAT:
+ hw_twrap |= R300_TX_REPEAT;
+ break;
+ case GL_CLAMP:
+ hw_twrap |= R300_TX_CLAMP;
+ break;
+ case GL_CLAMP_TO_EDGE:
+ hw_twrap |= R300_TX_CLAMP_TO_EDGE;
+ break;
+ case GL_CLAMP_TO_BORDER:
+ hw_twrap |= R300_TX_CLAMP_TO_BORDER;
+ break;
+ case GL_MIRRORED_REPEAT:
+ hw_twrap |= R300_TX_REPEAT | R300_TX_MIRRORED;
+ break;
+ case GL_MIRROR_CLAMP_EXT:
+ hw_twrap |= R300_TX_CLAMP | R300_TX_MIRRORED;
+ break;
+ case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+ hw_twrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
+ break;
+ case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+ hw_twrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
+ break;
+ default:
+ _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
+ }
+
+ switch (rwrap) {
+ case GL_REPEAT:
+ hw_qwrap |= R300_TX_REPEAT;
+ break;
+ case GL_CLAMP:
+ hw_qwrap |= R300_TX_CLAMP;
+ break;
+ case GL_CLAMP_TO_EDGE:
+ hw_qwrap |= R300_TX_CLAMP_TO_EDGE;
+ break;
+ case GL_CLAMP_TO_BORDER:
+ hw_qwrap |= R300_TX_CLAMP_TO_BORDER;
+ break;
+ case GL_MIRRORED_REPEAT:
+ hw_qwrap |= R300_TX_REPEAT | R300_TX_MIRRORED;
+ break;
+ case GL_MIRROR_CLAMP_EXT:
+ hw_qwrap |= R300_TX_CLAMP | R300_TX_MIRRORED;
+ break;
+ case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+ hw_qwrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
+ break;
+ case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+ hw_qwrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
+ break;
+ default:
+ _mesa_problem(NULL, "bad R wrap mode in %s", __FUNCTION__);
+ }
+
+ t->filter |= hw_swrap << R300_TX_WRAP_S_SHIFT;
+ t->filter |= hw_twrap << R300_TX_WRAP_T_SHIFT;
+ t->filter |= hw_qwrap << R300_TX_WRAP_Q_SHIFT;
+}
+
+static void r300SetTexMaxAnisotropy(r300TexObjPtr t, GLfloat max)
+{
+
+ t->filter &= ~R300_TX_MAX_ANISO_MASK;
+
+ if (max <= 1.0) {
+ t->filter |= R300_TX_MAX_ANISO_1_TO_1;
+ } else if (max <= 2.0) {
+ t->filter |= R300_TX_MAX_ANISO_2_TO_1;
+ } else if (max <= 4.0) {
+ t->filter |= R300_TX_MAX_ANISO_4_TO_1;
+ } else if (max <= 8.0) {
+ t->filter |= R300_TX_MAX_ANISO_8_TO_1;
+ } else {
+ t->filter |= R300_TX_MAX_ANISO_16_TO_1;
+ }
+}
+
+/**
+ * Set the texture magnification and minification modes.
+ *
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ */
+
+static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf)
+{
+ GLuint anisotropy = (t->filter & R300_TX_MAX_ANISO_MASK);
+
+ t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MAG_FILTER_MASK);
+
+ if (anisotropy == R300_TX_MAX_ANISO_1_TO_1) {
+ switch (minf) {
+ case GL_NEAREST:
+ t->filter |= R300_TX_MIN_FILTER_NEAREST;
+ break;
+ case GL_LINEAR:
+ t->filter |= R300_TX_MIN_FILTER_LINEAR;
+ break;
+ case GL_NEAREST_MIPMAP_NEAREST:
+ t->filter |= R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST;
+ break;
+ case GL_NEAREST_MIPMAP_LINEAR:
+ t->filter |= R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR;
+ break;
+ case GL_LINEAR_MIPMAP_NEAREST:
+ t->filter |= R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST;
+ break;
+ case GL_LINEAR_MIPMAP_LINEAR:
+ t->filter |= R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR;
+ break;
+ }
+ } else {
+ switch (minf) {
+ case GL_NEAREST:
+ t->filter |= R300_TX_MIN_FILTER_ANISO_NEAREST;
+ break;
+ case GL_LINEAR:
+ t->filter |= R300_TX_MIN_FILTER_ANISO_LINEAR;
+ break;
+ case GL_NEAREST_MIPMAP_NEAREST:
+ case GL_LINEAR_MIPMAP_NEAREST:
+ t->filter |=
+ R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST;
+ break;
+ case GL_NEAREST_MIPMAP_LINEAR:
+ case GL_LINEAR_MIPMAP_LINEAR:
+ t->filter |=
+ R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR;
+ break;
+ }
+ }
+
+ /* Note we don't have 3D mipmaps so only use the mag filter setting
+ * to set the 3D texture filter mode.
+ */
+ switch (magf) {
+ case GL_NEAREST:
+ t->filter |= R300_TX_MAG_FILTER_NEAREST;
+ break;
+ case GL_LINEAR:
+ t->filter |= R300_TX_MAG_FILTER_LINEAR;
+ break;
+ }
+}
+
+static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4])
+{
+ t->pp_border_color = PACK_COLOR_8888(c[0], c[1], c[2], c[3]);
+}
+
+/**
+ * Allocate space for and load the mesa images into the texture memory block.
+ * This will happen before drawing with a new texture, or drawing with a
+ * texture after it was swapped out or teximaged again.
+ */
+
+static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj)
+{
+ r300TexObjPtr t;
+
+ t = CALLOC_STRUCT(r300_tex_obj);
+ texObj->DriverData = t;
+ if (t != NULL) {
+ if (RADEON_DEBUG & DEBUG_TEXTURE) {
+ fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
+ (void *)texObj, (void *)t);
+ }
+
+ /* Initialize non-image-dependent parts of the state:
+ */
+ t->base.tObj = texObj;
+ t->border_fallback = GL_FALSE;
+
+ make_empty_list(&t->base);
+
+ r300SetTexWrap(t, texObj->WrapS, texObj->WrapT, texObj->WrapR);
+ r300SetTexMaxAnisotropy(t, texObj->MaxAnisotropy);
+ r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter);
+ r300SetTexBorderColor(t, texObj->_BorderChan);
+ }
+
+ return t;
+}
+
+/* try to find a format which will only need a memcopy */
+static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat,
+ GLenum srcType)
+{
+ const GLuint ui = 1;
+ const GLubyte littleEndian = *((const GLubyte *)&ui);
+
+ if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+ (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE
+ && !littleEndian) || (srcFormat == GL_ABGR_EXT
+ && srcType == GL_UNSIGNED_INT_8_8_8_8_REV)
+ || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE
+ && littleEndian)) {
+ return &_mesa_texformat_rgba8888;
+ } else
+ if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV)
+ || (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE
+ && littleEndian) || (srcFormat == GL_ABGR_EXT
+ && srcType == GL_UNSIGNED_INT_8_8_8_8)
+ || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE
+ && !littleEndian)) {
+ return &_mesa_texformat_rgba8888_rev;
+ } else if (srcFormat == GL_BGRA &&
+ ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+ srcType == GL_UNSIGNED_INT_8_8_8_8)) {
+ return &_mesa_texformat_argb8888_rev;
+ } else if (srcFormat == GL_BGRA &&
+ ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+ srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
+ return &_mesa_texformat_argb8888;
+ } else
+ return _dri_texformat_argb8888;
+}
+
+static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx,
+ GLint
+ internalFormat,
+ GLenum format,
+ GLenum type)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ const GLboolean do32bpt =
+ (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
+ const GLboolean force16bpt =
+ (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
+ (void)format;
+
+#if 0
+ fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n",
+ _mesa_lookup_enum_by_nr(internalFormat), internalFormat,
+ _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+ fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt);
+#endif
+
+ switch (internalFormat) {
+ case 4:
+ case GL_RGBA:
+ case GL_COMPRESSED_RGBA:
+ switch (type) {
+ case GL_UNSIGNED_INT_10_10_10_2:
+ case GL_UNSIGNED_INT_2_10_10_10_REV:
+ return do32bpt ? _dri_texformat_argb8888 :
+ _dri_texformat_argb1555;
+ case GL_UNSIGNED_SHORT_4_4_4_4:
+ case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+ return _dri_texformat_argb4444;
+ case GL_UNSIGNED_SHORT_5_5_5_1:
+ case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+ return _dri_texformat_argb1555;
+ default:
+ return do32bpt ? r300Choose8888TexFormat(format, type) :
+ _dri_texformat_argb4444;
+ }
+
+ case 3:
+ case GL_RGB:
+ case GL_COMPRESSED_RGB:
+ switch (type) {
+ case GL_UNSIGNED_SHORT_4_4_4_4:
+ case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+ return _dri_texformat_argb4444;
+ case GL_UNSIGNED_SHORT_5_5_5_1:
+ case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+ return _dri_texformat_argb1555;
+ case GL_UNSIGNED_SHORT_5_6_5:
+ case GL_UNSIGNED_SHORT_5_6_5_REV:
+ return _dri_texformat_rgb565;
+ default:
+ return do32bpt ? _dri_texformat_argb8888 :
+ _dri_texformat_rgb565;
+ }
+
+ case GL_RGBA8:
+ case GL_RGB10_A2:
+ case GL_RGBA12:
+ case GL_RGBA16:
+ return !force16bpt ?
+ r300Choose8888TexFormat(format,
+ type) : _dri_texformat_argb4444;
+
+ case GL_RGBA4:
+ case GL_RGBA2:
+ return _dri_texformat_argb4444;
+
+ case GL_RGB5_A1:
+ return _dri_texformat_argb1555;
+
+ case GL_RGB8:
+ case GL_RGB10:
+ case GL_RGB12:
+ case GL_RGB16:
+ return !force16bpt ? _dri_texformat_argb8888 :
+ _dri_texformat_rgb565;
+
+ case GL_RGB5:
+ case GL_RGB4:
+ case GL_R3_G3_B2:
+ return _dri_texformat_rgb565;
+
+ case GL_ALPHA:
+ case GL_ALPHA4:
+ case GL_ALPHA8:
+ case GL_ALPHA12:
+ case GL_ALPHA16:
+ case GL_COMPRESSED_ALPHA:
+ return _dri_texformat_a8;
+
+ case 1:
+ case GL_LUMINANCE:
+ case GL_LUMINANCE4:
+ case GL_LUMINANCE8:
+ case GL_LUMINANCE12:
+ case GL_LUMINANCE16:
+ case GL_COMPRESSED_LUMINANCE:
+ return _dri_texformat_l8;
+
+ case 2:
+ case GL_LUMINANCE_ALPHA:
+ case GL_LUMINANCE4_ALPHA4:
+ case GL_LUMINANCE6_ALPHA2:
+ case GL_LUMINANCE8_ALPHA8:
+ case GL_LUMINANCE12_ALPHA4:
+ case GL_LUMINANCE12_ALPHA12:
+ case GL_LUMINANCE16_ALPHA16:
+ case GL_COMPRESSED_LUMINANCE_ALPHA:
+ return _dri_texformat_al88;
+
+ case GL_INTENSITY:
+ case GL_INTENSITY4:
+ case GL_INTENSITY8:
+ case GL_INTENSITY12:
+ case GL_INTENSITY16:
+ case GL_COMPRESSED_INTENSITY:
+ return _dri_texformat_i8;
+
+ case GL_YCBCR_MESA:
+ if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+ type == GL_UNSIGNED_BYTE)
+ return &_mesa_texformat_ycbcr;
+ else
+ return &_mesa_texformat_ycbcr_rev;
+
+ case GL_RGB_S3TC:
+ case GL_RGB4_S3TC:
+ case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+ return &_mesa_texformat_rgb_dxt1;
+
+ case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+ return &_mesa_texformat_rgba_dxt1;
+
+ case GL_RGBA_S3TC:
+ case GL_RGBA4_S3TC:
+ case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+ return &_mesa_texformat_rgba_dxt3;
+
+ case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+ return &_mesa_texformat_rgba_dxt5;
+
+ case GL_ALPHA16F_ARB:
+ return &_mesa_texformat_alpha_float16;
+ case GL_ALPHA32F_ARB:
+ return &_mesa_texformat_alpha_float32;
+ case GL_LUMINANCE16F_ARB:
+ return &_mesa_texformat_luminance_float16;
+ case GL_LUMINANCE32F_ARB:
+ return &_mesa_texformat_luminance_float32;
+ case GL_LUMINANCE_ALPHA16F_ARB:
+ return &_mesa_texformat_luminance_alpha_float16;
+ case GL_LUMINANCE_ALPHA32F_ARB:
+ return &_mesa_texformat_luminance_alpha_float32;
+ case GL_INTENSITY16F_ARB:
+ return &_mesa_texformat_intensity_float16;
+ case GL_INTENSITY32F_ARB:
+ return &_mesa_texformat_intensity_float32;
+ case GL_RGB16F_ARB:
+ return &_mesa_texformat_rgba_float16;
+ case GL_RGB32F_ARB:
+ return &_mesa_texformat_rgba_float32;
+ case GL_RGBA16F_ARB:
+ return &_mesa_texformat_rgba_float16;
+ case GL_RGBA32F_ARB:
+ return &_mesa_texformat_rgba_float32;
+
+ default:
+ _mesa_problem(ctx,
+ "unexpected internalFormat 0x%x in r300ChooseTextureFormat",
+ (int)internalFormat);
+ return NULL;
+ }
+
+ return NULL; /* never get here */
+}
+
+static GLboolean
+r300ValidateClientStorage(GLcontext * ctx, GLenum target,
+ GLint internalFormat,
+ GLint srcWidth, GLint srcHeight,
+ GLenum format, GLenum type, const void *pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "intformat %s format %s type %s\n",
+ _mesa_lookup_enum_by_nr(internalFormat),
+ _mesa_lookup_enum_by_nr(format),
+ _mesa_lookup_enum_by_nr(type));
+
+ if (!ctx->Unpack.ClientStorage)
+ return 0;
+
+ if (ctx->_ImageTransferState ||
+ texImage->IsCompressed || texObj->GenerateMipmap)
+ return 0;
+
+ /* This list is incomplete, may be different on ppc???
+ */
+ switch (internalFormat) {
+ case GL_RGBA:
+ if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) {
+ texImage->TexFormat = _dri_texformat_argb8888;
+ } else
+ return 0;
+ break;
+
+ case GL_RGB:
+ if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
+ texImage->TexFormat = _dri_texformat_rgb565;
+ } else
+ return 0;
+ break;
+
+ case GL_YCBCR_MESA:
+ if (format == GL_YCBCR_MESA &&
+ type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
+ texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
+ } else if (format == GL_YCBCR_MESA &&
+ (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+ type == GL_UNSIGNED_BYTE)) {
+ texImage->TexFormat = &_mesa_texformat_ycbcr;
+ } else
+ return 0;
+ break;
+
+ default:
+ return 0;
+ }
+
+ /* Could deal with these packing issues, but currently don't:
+ */
+ if (packing->SkipPixels ||
+ packing->SkipRows || packing->SwapBytes || packing->LsbFirst) {
+ return 0;
+ }
+
+ {
+ GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
+ format, type);
+
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: srcRowStride %d/%x\n",
+ __FUNCTION__, srcRowStride, srcRowStride);
+
+ /* Could check this later in upload, pitch restrictions could be
+ * relaxed, but would need to store the image pitch somewhere,
+ * as packing details might change before image is uploaded:
+ */
+ if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
+ || (srcRowStride & 63))
+ return 0;
+
+ /* Have validated that _mesa_transfer_teximage would be a straight
+ * memcpy at this point. NOTE: future calls to TexSubImage will
+ * overwrite the client data. This is explicitly mentioned in the
+ * extension spec.
+ */
+ texImage->Data = (void *)pixels;
+ texImage->IsClientData = GL_TRUE;
+ texImage->RowStride =
+ srcRowStride / texImage->TexFormat->TexelBytes;
+
+ return 1;
+ }
+}
+
+static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint border,
+ GLenum format, GLenum type, const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ driTextureObject *t = (driTextureObject *) texObj->DriverData;
+
+ if (t) {
+ driSwapOutTextureObject(t);
+ } else {
+ t = (driTextureObject *) r300AllocTexObj(texObj);
+ if (!t) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+ return;
+ }
+ }
+
+ /* Note, this will call ChooseTextureFormat */
+ _mesa_store_teximage1d(ctx, target, level, internalFormat,
+ width, border, format, type, pixels,
+ &ctx->Unpack, texObj, texImage);
+
+ t->dirty_images[0] |= (1 << level);
+}
+
+static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+ GLint xoffset,
+ GLsizei width,
+ GLenum format, GLenum type,
+ const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ driTextureObject *t = (driTextureObject *) texObj->DriverData;
+
+ assert(t); /* this _should_ be true */
+ if (t) {
+ driSwapOutTextureObject(t);
+ } else {
+ t = (driTextureObject *) r300AllocTexObj(texObj);
+ if (!t) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+ return;
+ }
+ }
+
+ _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+ format, type, pixels, packing, texObj,
+ texImage);
+
+ t->dirty_images[0] |= (1 << level);
+}
+
+static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint height, GLint border,
+ GLenum format, GLenum type, const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ driTextureObject *t = (driTextureObject *) texObj->DriverData;
+ GLuint face;
+
+ /* which cube face or ordinary 2D image */
+ switch (target) {
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ face =
+ (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+ ASSERT(face < 6);
+ break;
+ default:
+ face = 0;
+ }
+
+ if (t != NULL) {
+ driSwapOutTextureObject(t);
+ } else {
+ t = (driTextureObject *) r300AllocTexObj(texObj);
+ if (!t) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+ return;
+ }
+ }
+
+ texImage->IsClientData = GL_FALSE;
+
+ if (r300ValidateClientStorage(ctx, target,
+ internalFormat,
+ width, height,
+ format, type, pixels,
+ packing, texObj, texImage)) {
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: Using client storage\n",
+ __FUNCTION__);
+ } else {
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: Using normal storage\n",
+ __FUNCTION__);
+
+ /* Normal path: copy (to cached memory) and eventually upload
+ * via another copy to GART memory and then a blit... Could
+ * eliminate one copy by going straight to (permanent) GART.
+ *
+ * Note, this will call r300ChooseTextureFormat.
+ */
+ _mesa_store_teximage2d(ctx, target, level, internalFormat,
+ width, height, border, format, type,
+ pixels, &ctx->Unpack, texObj, texImage);
+
+ t->dirty_images[face] |= (1 << level);
+ }
+}
+
+static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset,
+ GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ driTextureObject *t = (driTextureObject *) texObj->DriverData;
+ GLuint face;
+
+ /* which cube face or ordinary 2D image */
+ switch (target) {
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ face =
+ (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+ ASSERT(face < 6);
+ break;
+ default:
+ face = 0;
+ }
+
+ assert(t); /* this _should_ be true */
+ if (t) {
+ driSwapOutTextureObject(t);
+ } else {
+ t = (driTextureObject *) r300AllocTexObj(texObj);
+ if (!t) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+ return;
+ }
+ }
+
+ _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+ height, format, type, pixels, packing, texObj,
+ texImage);
+
+ t->dirty_images[face] |= (1 << level);
+}
+
+static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
+ GLint level, GLint internalFormat,
+ GLint width, GLint height, GLint border,
+ GLsizei imageSize, const GLvoid * data,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ driTextureObject *t = (driTextureObject *) texObj->DriverData;
+ GLuint face;
+
+ /* which cube face or ordinary 2D image */
+ switch (target) {
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ face =
+ (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+ ASSERT(face < 6);
+ break;
+ default:
+ face = 0;
+ }
+
+ if (t != NULL) {
+ driSwapOutTextureObject(t);
+ } else {
+ t = (driTextureObject *) r300AllocTexObj(texObj);
+ if (!t) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY,
+ "glCompressedTexImage2D");
+ return;
+ }
+ }
+
+ texImage->IsClientData = GL_FALSE;
+
+ /* can't call this, different parameters. Would never evaluate to true anyway currently */
+#if 0
+ if (r300ValidateClientStorage(ctx, target,
+ internalFormat,
+ width, height,
+ format, type, pixels,
+ packing, texObj, texImage)) {
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: Using client storage\n",
+ __FUNCTION__);
+ } else
+#endif
+ {
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: Using normal storage\n",
+ __FUNCTION__);
+
+ /* Normal path: copy (to cached memory) and eventually upload
+ * via another copy to GART memory and then a blit... Could
+ * eliminate one copy by going straight to (permanent) GART.
+ *
+ * Note, this will call r300ChooseTextureFormat.
+ */
+ _mesa_store_compressed_teximage2d(ctx, target, level,
+ internalFormat, width, height,
+ border, imageSize, data,
+ texObj, texImage);
+
+ t->dirty_images[face] |= (1 << level);
+ }
+}
+
+static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+ GLint level, GLint xoffset,
+ GLint yoffset, GLsizei width,
+ GLsizei height, GLenum format,
+ GLsizei imageSize, const GLvoid * data,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ driTextureObject *t = (driTextureObject *) texObj->DriverData;
+ GLuint face;
+
+ /* which cube face or ordinary 2D image */
+ switch (target) {
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ face =
+ (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+ ASSERT(face < 6);
+ break;
+ default:
+ face = 0;
+ }
+
+ assert(t); /* this _should_ be true */
+ if (t) {
+ driSwapOutTextureObject(t);
+ } else {
+ t = (driTextureObject *) r300AllocTexObj(texObj);
+ if (!t) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY,
+ "glCompressedTexSubImage3D");
+ return;
+ }
+ }
+
+ _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset,
+ yoffset, width, height, format,
+ imageSize, data, texObj, texImage);
+
+ t->dirty_images[face] |= (1 << level);
+}
+
+static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint height, GLint depth,
+ GLint border,
+ GLenum format, GLenum type, const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ driTextureObject *t = (driTextureObject *) texObj->DriverData;
+
+ if (t) {
+ driSwapOutTextureObject(t);
+ } else {
+ t = (driTextureObject *) r300AllocTexObj(texObj);
+ if (!t) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
+ return;
+ }
+ }
+
+ texImage->IsClientData = GL_FALSE;
+
+#if 0
+ if (r300ValidateClientStorage(ctx, target,
+ internalFormat,
+ width, height,
+ format, type, pixels,
+ packing, texObj, texImage)) {
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: Using client storage\n",
+ __FUNCTION__);
+ } else
+#endif
+ {
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: Using normal storage\n",
+ __FUNCTION__);
+
+ /* Normal path: copy (to cached memory) and eventually upload
+ * via another copy to GART memory and then a blit... Could
+ * eliminate one copy by going straight to (permanent) GART.
+ *
+ * Note, this will call r300ChooseTextureFormat.
+ */
+ _mesa_store_teximage3d(ctx, target, level, internalFormat,
+ width, height, depth, border,
+ format, type, pixels,
+ &ctx->Unpack, texObj, texImage);
+
+ t->dirty_images[0] |= (1 << level);
+ }
+}
+
+static void
+r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type,
+ const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ driTextureObject *t = (driTextureObject *) texObj->DriverData;
+
+/* fprintf(stderr, "%s\n", __FUNCTION__); */
+
+ assert(t); /* this _should_ be true */
+ if (t) {
+ driSwapOutTextureObject(t);
+ } else {
+ t = (driTextureObject *) r300AllocTexObj(texObj);
+ if (!t) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
+ return;
+ }
+ texObj->DriverData = t;
+ }
+
+ _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
+ width, height, depth,
+ format, type, pixels, packing, texObj,
+ texImage);
+
+ t->dirty_images[0] |= (1 << level);
+}
+
+static void r300TexEnv(GLcontext * ctx, GLenum target,
+ GLenum pname, const GLfloat * param)
+{
+ if (RADEON_DEBUG & DEBUG_STATE) {
+ fprintf(stderr, "%s( %s )\n",
+ __FUNCTION__, _mesa_lookup_enum_by_nr(pname));
+ }
+
+ /* This is incorrect: Need to maintain this data for each of
+ * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch
+ * between them according to _ReallyEnabled.
+ */
+ switch (pname) {
+ case GL_TEXTURE_LOD_BIAS_EXT:{
+#if 0 /* Needs to be relocated in order to make sure we got the right tmu */
+ GLfloat bias, min;
+ GLuint b;
+
+ /* The R300's LOD bias is a signed 2's complement value with a
+ * range of -16.0 <= bias < 16.0.
+ *
+ * NOTE: Add a small bias to the bias for conform mipsel.c test.
+ */
+ bias = *param + .01;
+ min =
+ driQueryOptionb(&rmesa->radeon.optionCache,
+ "no_neg_lod_bias") ? 0.0 : -16.0;
+ bias = CLAMP(bias, min, 16.0);
+
+ /* 0.0 - 16.0 == 0x0 - 0x1000 */
+ /* 0.0 - -16.0 == 0x1001 - 0x1fff */
+ b = 0x1000 / 16.0 * bias;
+ b &= R300_LOD_BIAS_MASK;
+
+ if (b !=
+ (rmesa->hw.tex.unknown1.
+ cmd[R300_TEX_VALUE_0 +
+ unit] & R300_LOD_BIAS_MASK)) {
+ R300_STATECHANGE(rmesa, tex.unknown1);
+ rmesa->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 +
+ unit] &=
+ ~R300_LOD_BIAS_MASK;
+ rmesa->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 +
+ unit] |= b;
+ }
+#endif
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+
+static void r300TexParameter(GLcontext * ctx, GLenum target,
+ struct gl_texture_object *texObj,
+ GLenum pname, const GLfloat * params)
+{
+ r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData;
+
+ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+ fprintf(stderr, "%s( %s )\n", __FUNCTION__,
+ _mesa_lookup_enum_by_nr(pname));
+ }
+
+ switch (pname) {
+ case GL_TEXTURE_MIN_FILTER:
+ case GL_TEXTURE_MAG_FILTER:
+ case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+ r300SetTexMaxAnisotropy(t, texObj->MaxAnisotropy);
+ r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter);
+ break;
+
+ case GL_TEXTURE_WRAP_S:
+ case GL_TEXTURE_WRAP_T:
+ case GL_TEXTURE_WRAP_R:
+ r300SetTexWrap(t, texObj->WrapS, texObj->WrapT, texObj->WrapR);
+ break;
+
+ case GL_TEXTURE_BORDER_COLOR:
+ r300SetTexBorderColor(t, texObj->_BorderChan);
+ break;
+
+ case GL_TEXTURE_BASE_LEVEL:
+ case GL_TEXTURE_MAX_LEVEL:
+ case GL_TEXTURE_MIN_LOD:
+ case GL_TEXTURE_MAX_LOD:
+ /* This isn't the most efficient solution but there doesn't appear to
+ * be a nice alternative. Since there's no LOD clamping,
+ * we just have to rely on loading the right subset of mipmap levels
+ * to simulate a clamped LOD.
+ */
+ driSwapOutTextureObject((driTextureObject *) t);
+ break;
+
+ default:
+ return;
+ }
+
+ /* Mark this texobj as dirty (one bit per tex unit)
+ */
+ t->dirty_state = TEX_ALL;
+}
+
+static void r300BindTexture(GLcontext * ctx, GLenum target,
+ struct gl_texture_object *texObj)
+{
+ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+ fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__,
+ (void *)texObj, ctx->Texture.CurrentUnit);
+ }
+
+ if ((target == GL_TEXTURE_1D)
+ || (target == GL_TEXTURE_2D)
+ || (target == GL_TEXTURE_3D)
+ || (target == GL_TEXTURE_CUBE_MAP)
+ || (target == GL_TEXTURE_RECTANGLE_NV)) {
+ assert(texObj->DriverData != NULL);
+ }
+}
+
+static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ driTextureObject *t = (driTextureObject *) texObj->DriverData;
+
+ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+ (void *)texObj,
+ _mesa_lookup_enum_by_nr(texObj->Target));
+ }
+
+ if (t != NULL) {
+ if (rmesa) {
+ R300_FIREVERTICES(rmesa);
+ }
+
+ driDestroyTextureObject(t);
+ }
+ /* Free mipmap images and the texture object itself */
+ _mesa_delete_texture_object(ctx, texObj);
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object. Not done at this time.
+ * Fixup MaxAnisotropy according to user preference.
+ */
+static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
+ GLuint name,
+ GLenum target)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct gl_texture_object *obj;
+ obj = _mesa_new_texture_object(ctx, name, target);
+ if (!obj)
+ return NULL;
+ obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
+
+ r300AllocTexObj(obj);
+ return obj;
+}
+
+void r300InitTextureFuncs(struct dd_function_table *functions)
+{
+ /* Note: we only plug in the functions we implement in the driver
+ * since _mesa_init_driver_functions() was already called.
+ */
+ functions->ChooseTextureFormat = r300ChooseTextureFormat;
+ functions->TexImage1D = r300TexImage1D;
+ functions->TexImage2D = r300TexImage2D;
+ functions->TexImage3D = r300TexImage3D;
+ functions->TexSubImage1D = r300TexSubImage1D;
+ functions->TexSubImage2D = r300TexSubImage2D;
+ functions->TexSubImage3D = r300TexSubImage3D;
+ functions->NewTextureObject = r300NewTextureObject;
+ functions->BindTexture = r300BindTexture;
+ functions->DeleteTexture = r300DeleteTexture;
+ functions->IsTextureResident = driIsTextureResident;
+
+ functions->TexEnv = r300TexEnv;
+ functions->TexParameter = r300TexParameter;
+
+ functions->CompressedTexImage2D = r300CompressedTexImage2D;
+ functions->CompressedTexSubImage2D = r300CompressedTexSubImage2D;
+
+ driInitTextureFormats();
+}
diff --git a/r300/r300_tex.h b/r300/r300_tex.h
new file mode 100644
index 0000000..f67a8e6
--- /dev/null
+++ b/r300/r300_tex.h
@@ -0,0 +1,51 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __r300_TEX_H__
+#define __r300_TEX_H__
+
+extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth,
+ GLuint pitch);
+
+extern void r300UpdateTextureState(GLcontext * ctx);
+
+extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t,
+ GLuint face);
+
+extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t);
+
+extern void r300InitTextureFuncs(struct dd_function_table *functions);
+
+#endif /* __r300_TEX_H__ */
diff --git a/r300/r300_texmem.c b/r300/r300_texmem.c
new file mode 100644
index 0000000..e2e8355
--- /dev/null
+++ b/r300/r300_texmem.c
@@ -0,0 +1,584 @@
+/**************************************************************************
+
+Copyright (C) Tungsten Graphics 2002. All Rights Reserved.
+The Weather Channel, Inc. funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86
+license. This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation on the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
+SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Gareth Hughes <gareth@valinux.com>
+ *
+ * \author Kevin E. Martin <martin@valinux.com>
+ */
+
+#include <errno.h>
+
+#include "glheader.h"
+#include "imports.h"
+#include "context.h"
+#include "colormac.h"
+#include "macros.h"
+#include "simple_list.h"
+#include "radeon_reg.h" /* gets definition for usleep */
+#include "r300_context.h"
+#include "r300_state.h"
+#include "r300_cmdbuf.h"
+#include "radeon_ioctl.h"
+#include "r300_tex.h"
+#include "r300_ioctl.h"
+#include <unistd.h> /* for usleep() */
+
+#ifdef USER_BUFFERS
+#include "r300_mem.h"
+#endif
+
+/**
+ * Destroy any device-dependent state associated with the texture. This may
+ * include NULLing out hardware state that points to the texture.
+ */
+void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t)
+{
+ if (RADEON_DEBUG & DEBUG_TEXTURE) {
+ fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
+ (void *)t, (void *)t->base.tObj);
+ }
+
+ if (rmesa != NULL) {
+ unsigned i;
+
+ for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
+ if (t == rmesa->state.texture.unit[i].texobj) {
+ rmesa->state.texture.unit[i].texobj = NULL;
+ /* This code below is meant to shorten state
+ pushed to the hardware by not programming
+ unneeded units.
+
+ This does not appear to be worthwhile on R300 */
+#if 0
+ remove_from_list(&rmesa->hw.tex[i]);
+ make_empty_list(&rmesa->hw.tex[i]);
+ remove_from_list(&rmesa->hw.cube[i]);
+ make_empty_list(&rmesa->hw.cube[i]);
+#endif
+ }
+ }
+ }
+}
+
+/* ------------------------------------------------------------
+ * Texture image conversions
+ */
+
+static void r300UploadGARTClientSubImage(r300ContextPtr rmesa,
+ r300TexObjPtr t,
+ struct gl_texture_image *texImage,
+ GLint hwlevel,
+ GLint x, GLint y,
+ GLint width, GLint height)
+{
+ const struct gl_texture_format *texFormat = texImage->TexFormat;
+ GLuint srcPitch, dstPitch;
+ int blit_format;
+ int srcOffset;
+
+ /*
+ * XXX it appears that we always upload the full image, not a subimage.
+ * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever
+ * changed, the src pitch will have to change.
+ */
+ switch (texFormat->TexelBytes) {
+ case 1:
+ blit_format = R300_CP_COLOR_FORMAT_CI8;
+ srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+ dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+ break;
+ case 2:
+ blit_format = R300_CP_COLOR_FORMAT_RGB565;
+ srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+ dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+ break;
+ case 4:
+ blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
+ srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+ dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+ break;
+ case 8:
+ case 16:
+ blit_format = R300_CP_COLOR_FORMAT_CI8;
+ srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+ dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+ break;
+ default:
+ return;
+ }
+
+ t->image[0][hwlevel].data = texImage->Data;
+ srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
+
+ assert(srcOffset != ~0);
+
+ /* Don't currently need to cope with small pitches?
+ */
+ width = texImage->Width;
+ height = texImage->Height;
+
+ if (texFormat->TexelBytes > 4) {
+ width *= texFormat->TexelBytes;
+ }
+
+ r300EmitWait(rmesa, R300_WAIT_3D);
+
+ r300EmitBlit(rmesa, blit_format,
+ srcPitch,
+ srcOffset,
+ dstPitch,
+ t->bufAddr,
+ x,
+ y,
+ t->image[0][hwlevel].x + x,
+ t->image[0][hwlevel].y + y, width, height);
+
+ r300EmitWait(rmesa, R300_WAIT_2D);
+}
+
+static void r300UploadRectSubImage(r300ContextPtr rmesa,
+ r300TexObjPtr t,
+ struct gl_texture_image *texImage,
+ GLint x, GLint y, GLint width, GLint height)
+{
+ const struct gl_texture_format *texFormat = texImage->TexFormat;
+ int blit_format, dstPitch, done;
+
+ switch (texFormat->TexelBytes) {
+ case 1:
+ blit_format = R300_CP_COLOR_FORMAT_CI8;
+ break;
+ case 2:
+ blit_format = R300_CP_COLOR_FORMAT_RGB565;
+ break;
+ case 4:
+ blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
+ break;
+ case 8:
+ case 16:
+ blit_format = R300_CP_COLOR_FORMAT_CI8;
+ break;
+ default:
+ return;
+ }
+
+ t->image[0][0].data = texImage->Data;
+
+ /* Currently don't need to cope with small pitches.
+ */
+ width = texImage->Width;
+ height = texImage->Height;
+ dstPitch = t->pitch;
+
+ if (texFormat->TexelBytes > 4) {
+ width *= texFormat->TexelBytes;
+ }
+
+ if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
+ /* In this case, could also use GART texturing. This is
+ * currently disabled, but has been tested & works.
+ */
+ t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
+ t->pitch = texImage->RowStride * texFormat->TexelBytes - 32;
+
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr,
+ "Using GART texturing for rectangular client texture\n");
+
+ /* Release FB memory allocated for this image:
+ */
+ /* FIXME This may not be correct as driSwapOutTextureObject sets
+ * FIXME dirty_images. It may be fine, though.
+ */
+ if (t->base.memBlock) {
+ driSwapOutTextureObject((driTextureObject *) t);
+ }
+ } else if (texImage->IsClientData) {
+ /* Data already in GART memory, with usable pitch.
+ */
+ GLuint srcPitch;
+ srcPitch = texImage->RowStride * texFormat->TexelBytes;
+ r300EmitBlit(rmesa,
+ blit_format,
+ srcPitch,
+ r300GartOffsetFromVirtual(rmesa, texImage->Data),
+ dstPitch, t->bufAddr, 0, 0, 0, 0, width, height);
+ } else {
+ /* Data not in GART memory, or bad pitch.
+ */
+ for (done = 0; done < height;) {
+ struct r300_dma_region region;
+ int lines =
+ MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch);
+ int src_pitch;
+ char *tex;
+
+ src_pitch = texImage->RowStride * texFormat->TexelBytes;
+
+ tex = (char *)texImage->Data + done * src_pitch;
+
+ memset(&region, 0, sizeof(region));
+ r300AllocDmaRegion(rmesa, &region, lines * dstPitch,
+ 1024);
+
+ /* Copy texdata to dma:
+ */
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr,
+ "%s: src_pitch %d dst_pitch %d\n",
+ __FUNCTION__, src_pitch, dstPitch);
+
+ if (src_pitch == dstPitch) {
+ memcpy(region.address + region.start, tex,
+ lines * src_pitch);
+ } else {
+ char *buf = region.address + region.start;
+ int i;
+ for (i = 0; i < lines; i++) {
+ memcpy(buf, tex, src_pitch);
+ buf += dstPitch;
+ tex += src_pitch;
+ }
+ }
+
+ r300EmitWait(rmesa, R300_WAIT_3D);
+
+ /* Blit to framebuffer
+ */
+ r300EmitBlit(rmesa,
+ blit_format,
+ dstPitch, GET_START(&region),
+ dstPitch | (t->tile_bits >> 16),
+ t->bufAddr, 0, 0, 0, done, width, lines);
+
+ r300EmitWait(rmesa, R300_WAIT_2D);
+#ifdef USER_BUFFERS
+ r300_mem_use(rmesa, region.buf->id);
+#endif
+
+ r300ReleaseDmaRegion(rmesa, &region, __FUNCTION__);
+ done += lines;
+ }
+ }
+}
+
+/**
+ * Upload the texture image associated with texture \a t at the specified
+ * level at the address relative to \a start.
+ */
+static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
+ GLint hwlevel,
+ GLint x, GLint y, GLint width, GLint height,
+ GLuint face)
+{
+ struct gl_texture_image *texImage = NULL;
+ GLuint offset;
+ GLint imageWidth, imageHeight;
+ GLint ret;
+ drm_radeon_texture_t tex;
+ drm_radeon_tex_image_t tmp;
+ const int level = hwlevel + t->base.firstLevel;
+
+ if (RADEON_DEBUG & DEBUG_TEXTURE) {
+ fprintf(stderr,
+ "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
+ __FUNCTION__, (void *)t, (void *)t->base.tObj, level,
+ width, height, face);
+ }
+
+ ASSERT(face < 6);
+
+ /* Ensure we have a valid texture to upload */
+ if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) {
+ _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+ return;
+ }
+
+ texImage = t->base.tObj->Image[face][level];
+
+ if (!texImage) {
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: texImage %d is NULL!\n",
+ __FUNCTION__, level);
+ return;
+ }
+ if (!texImage->Data) {
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: image data is NULL!\n",
+ __FUNCTION__);
+ return;
+ }
+
+ if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+ assert(level == 0);
+ assert(hwlevel == 0);
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: image data is rectangular\n",
+ __FUNCTION__);
+ r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height);
+ return;
+ } else if (texImage->IsClientData) {
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr,
+ "%s: image data is in GART client storage\n",
+ __FUNCTION__);
+ r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y,
+ width, height);
+ return;
+ } else if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: image data is in normal memory\n",
+ __FUNCTION__);
+
+ imageWidth = texImage->Width;
+ imageHeight = texImage->Height;
+
+ offset = t->bufAddr + t->base.totalSize / 6 * face;
+
+ if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
+ GLint imageX = 0;
+ GLint imageY = 0;
+ GLint blitX = t->image[face][hwlevel].x;
+ GLint blitY = t->image[face][hwlevel].y;
+ GLint blitWidth = t->image[face][hwlevel].width;
+ GLint blitHeight = t->image[face][hwlevel].height;
+ fprintf(stderr, " upload image: %d,%d at %d,%d\n",
+ imageWidth, imageHeight, imageX, imageY);
+ fprintf(stderr, " upload blit: %d,%d at %d,%d\n",
+ blitWidth, blitHeight, blitX, blitY);
+ fprintf(stderr, " blit ofs: 0x%07x level: %d/%d\n",
+ (GLuint) offset, hwlevel, level);
+ }
+
+ t->image[face][hwlevel].data = texImage->Data;
+
+ /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
+ * NOTE: we're always use a 1KB-wide blit and I8 texture format.
+ * We used to use 1, 2 and 4-byte texels and used to use the texture
+ * width to dictate the blit width - but that won't work for compressed
+ * textures. (Brian)
+ * NOTE: can't do that with texture tiling. (sroland)
+ */
+ tex.offset = offset;
+ tex.image = &tmp;
+ /* copy (x,y,width,height,data) */
+ memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp));
+
+ if (texImage->TexFormat->TexelBytes > 4) {
+ const int log2TexelBytes =
+ (3 + (texImage->TexFormat->TexelBytes >> 4));
+ tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+ tex.pitch =
+ MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
+ 64, 1);
+ tex.height = imageHeight;
+ tex.width = imageWidth << log2TexelBytes;
+ tex.offset += (tmp.x << log2TexelBytes) & ~1023;
+ tmp.x = tmp.x % (1024 >> log2TexelBytes);
+ tmp.width = tmp.width << log2TexelBytes;
+ } else if (texImage->TexFormat->TexelBytes) {
+ /* use multi-byte upload scheme */
+ tex.height = imageHeight;
+ tex.width = imageWidth;
+ switch (texImage->TexFormat->TexelBytes) {
+ case 1:
+ tex.format = RADEON_TXFORMAT_I8;
+ break;
+ case 2:
+ tex.format = RADEON_TXFORMAT_AI88;
+ break;
+ case 4:
+ tex.format = RADEON_TXFORMAT_ARGB8888;
+ break;
+ }
+ tex.pitch =
+ MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
+ 64, 1);
+ tex.offset += tmp.x & ~1023;
+ tmp.x = tmp.x % 1024;
+
+ if (t->tile_bits & R300_TXO_MICRO_TILE) {
+ /* need something like "tiled coordinates" ? */
+ tmp.y = tmp.x / (tex.pitch * 128) * 2;
+ tmp.x =
+ tmp.x % (tex.pitch * 128) / 2 /
+ texImage->TexFormat->TexelBytes;
+ tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+ } else {
+ tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+ }
+#if 1
+ if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
+ (texImage->Width * texImage->TexFormat->TexelBytes >= 256)
+ && ((!(t->tile_bits & R300_TXO_MICRO_TILE)
+ && (texImage->Height >= 8))
+ || (texImage->Height >= 16))) {
+ /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
+ OR if height is smaller than 8 automatically, but if micro tiling is active
+ the limit is height 16 instead ? */
+ tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+ }
+#endif
+ } else {
+ /* In case of for instance 8x8 texture (2x2 dxt blocks),
+ padding after the first two blocks is needed (only
+ with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
+ /* set tex.height to 1/4 since 1 "macropixel" (dxt-block)
+ has 4 real pixels. Needed so the kernel module reads
+ the right amount of data. */
+ tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+ tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
+ tex.height = (imageHeight + 3) / 4;
+ tex.width = (imageWidth + 3) / 4;
+ if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
+ tex.width *= 8;
+ } else {
+ tex.width *= 16;
+ }
+ }
+
+ LOCK_HARDWARE(&rmesa->radeon);
+ do {
+ ret =
+ drmCommandWriteRead(rmesa->radeon.dri.fd,
+ DRM_RADEON_TEXTURE, &tex,
+ sizeof(drm_radeon_texture_t));
+ if (ret) {
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr,
+ "DRM_RADEON_TEXTURE: again!\n");
+ usleep(1);
+ }
+ } while (ret == -EAGAIN);
+
+ UNLOCK_HARDWARE(&rmesa->radeon);
+
+ if (ret) {
+ fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret);
+ fprintf(stderr, " offset=0x%08x\n", offset);
+ fprintf(stderr, " image width=%d height=%d\n",
+ imageWidth, imageHeight);
+ fprintf(stderr, " blit width=%d height=%d data=%p\n",
+ t->image[face][hwlevel].width,
+ t->image[face][hwlevel].height,
+ t->image[face][hwlevel].data);
+ _mesa_exit(-1);
+ }
+}
+
+/**
+ * Upload the texture images associated with texture \a t. This might
+ * require the allocation of texture memory.
+ *
+ * \param rmesa Context pointer
+ * \param t Texture to be uploaded
+ * \param face Cube map face to be uploaded. Zero for non-cube maps.
+ */
+
+int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
+{
+ const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+
+ if (t->image_override)
+ return 0;
+
+ if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
+ fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
+ (void *)rmesa->radeon.glCtx, (void *)t->base.tObj,
+ t->base.totalSize, t->base.firstLevel,
+ t->base.lastLevel);
+ }
+
+ if (!t || t->base.totalSize == 0)
+ return 0;
+
+ if (RADEON_DEBUG & DEBUG_SYNC) {
+ fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+ radeonFinish(rmesa->radeon.glCtx);
+ }
+
+ LOCK_HARDWARE(&rmesa->radeon);
+
+ if (t->base.memBlock == NULL) {
+ int heap;
+
+ heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps,
+ (driTextureObject *) t);
+ if (heap == -1) {
+ UNLOCK_HARDWARE(&rmesa->radeon);
+ return -1;
+ }
+
+ /* Set the base offset of the texture image */
+ t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap]
+ + t->base.memBlock->ofs;
+ t->offset = t->bufAddr;
+
+ if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+ /* hope it's safe to add that here... */
+ t->offset |= t->tile_bits;
+ }
+
+ /* Mark this texobj as dirty on all units:
+ */
+ t->dirty_state = TEX_ALL;
+ }
+
+ /* Let the world know we've used this memory recently.
+ */
+ driUpdateTextureLRU((driTextureObject *) t);
+ UNLOCK_HARDWARE(&rmesa->radeon);
+
+ /* Upload any images that are new */
+ if (t->base.dirty_images[face]) {
+ int i;
+ for (i = 0; i < numLevels; i++) {
+ if ((t->base.
+ dirty_images[face] & (1 <<
+ (i + t->base.firstLevel))) !=
+ 0) {
+ r300UploadSubImage(rmesa, t, i, 0, 0,
+ t->image[face][i].width,
+ t->image[face][i].height,
+ face);
+ }
+ }
+ t->base.dirty_images[face] = 0;
+ }
+
+ if (RADEON_DEBUG & DEBUG_SYNC) {
+ fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+ radeonFinish(rmesa->radeon.glCtx);
+ }
+
+ return 0;
+}
diff --git a/r300/r300_texstate.c b/r300/r300_texstate.c
new file mode 100644
index 0000000..8203189
--- /dev/null
+++ b/r300/r300_texstate.c
@@ -0,0 +1,620 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * \todo Enable R300 texture tiling code?
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "context.h"
+#include "macros.h"
+#include "texformat.h"
+#include "teximage.h"
+#include "texobj.h"
+#include "enums.h"
+
+#include "r300_context.h"
+#include "r300_state.h"
+#include "r300_ioctl.h"
+#include "radeon_ioctl.h"
+#include "r300_tex.h"
+#include "r300_reg.h"
+
+#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5 \
+ || ((f) >= MESA_FORMAT_RGBA_FLOAT32 && \
+ (f) <= MESA_FORMAT_INTENSITY_FLOAT16)) \
+ && tx_table_le[f].flag )
+
+#define _ASSIGN(entry, format) \
+ [ MESA_FORMAT_ ## entry ] = { format, 0, 1}
+
+/*
+ * Note that the _REV formats are the same as the non-REV formats. This is
+ * because the REV and non-REV formats are identical as a byte string, but
+ * differ when accessed as 16-bit or 32-bit words depending on the endianness of
+ * the host. Since the textures are transferred to the R300 as a byte string
+ * (i.e. without any byte-swapping), the R300 sees the REV and non-REV formats
+ * identically. -- paulus
+ */
+
+static const struct tx_table {
+ GLuint format, filter, flag;
+} tx_table_be[] = {
+ /* *INDENT-OFF* */
+ _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)),
+ _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)),
+ _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)),
+ _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)),
+ _ASSIGN(RGB888, 0xffffffff),
+ _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
+ _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
+ _ASSIGN(ARGB4444, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)),
+ _ASSIGN(ARGB4444_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)),
+ _ASSIGN(ARGB1555, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)),
+ _ASSIGN(ARGB1555_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)),
+ _ASSIGN(AL88, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)),
+ _ASSIGN(AL88_REV, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)),
+ _ASSIGN(RGB332, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2)),
+ _ASSIGN(A8, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)),
+ _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)),
+ _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)),
+ _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)),
+ _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE ),
+ _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE),
+ _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)),
+ _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)),
+ _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)),
+ _ASSIGN(RGBA_DXT5, R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5)),
+ _ASSIGN(RGBA_FLOAT32, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32)),
+ _ASSIGN(RGBA_FLOAT16, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16)),
+ _ASSIGN(RGB_FLOAT32, 0xffffffff),
+ _ASSIGN(RGB_FLOAT16, 0xffffffff),
+ _ASSIGN(ALPHA_FLOAT32, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32)),
+ _ASSIGN(ALPHA_FLOAT16, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16)),
+ _ASSIGN(LUMINANCE_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32)),
+ _ASSIGN(LUMINANCE_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16)),
+ _ASSIGN(LUMINANCE_ALPHA_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32)),
+ _ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)),
+ _ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)),
+ _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)),
+ /* *INDENT-ON* */
+};
+
+static const struct tx_table tx_table_le[] = {
+ /* *INDENT-OFF* */
+ _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)),
+ _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)),
+ _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)),
+ _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)),
+ _ASSIGN(RGB888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)),
+ _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
+ _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
+ _ASSIGN(ARGB4444, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)),
+ _ASSIGN(ARGB4444_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)),
+ _ASSIGN(ARGB1555, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)),
+ _ASSIGN(ARGB1555_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)),
+ _ASSIGN(AL88, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)),
+ _ASSIGN(AL88_REV, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)),
+ _ASSIGN(RGB332, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2)),
+ _ASSIGN(A8, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)),
+ _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)),
+ _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)),
+ _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)),
+ _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE ),
+ _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE),
+ _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)),
+ _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)),
+ _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)),
+ _ASSIGN(RGBA_DXT5, R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5)),
+ _ASSIGN(RGBA_FLOAT32, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32)),
+ _ASSIGN(RGBA_FLOAT16, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16)),
+ _ASSIGN(RGB_FLOAT32, 0xffffffff),
+ _ASSIGN(RGB_FLOAT16, 0xffffffff),
+ _ASSIGN(ALPHA_FLOAT32, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32)),
+ _ASSIGN(ALPHA_FLOAT16, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16)),
+ _ASSIGN(LUMINANCE_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32)),
+ _ASSIGN(LUMINANCE_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16)),
+ _ASSIGN(LUMINANCE_ALPHA_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32)),
+ _ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)),
+ _ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)),
+ _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)),
+ /* *INDENT-ON* */
+};
+
+#undef _ASSIGN
+
+/**
+ * This function computes the number of bytes of storage needed for
+ * the given texture object (all mipmap levels, all cube faces).
+ * The \c image[face][level].x/y/width/height parameters for upload/blitting
+ * are computed here. \c filter, \c format, etc. will be set here
+ * too.
+ *
+ * \param rmesa Context pointer
+ * \param tObj GL texture object whose images are to be posted to
+ * hardware state.
+ */
+static void r300SetTexImages(r300ContextPtr rmesa,
+ struct gl_texture_object *tObj)
+{
+ r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+ const struct gl_texture_image *baseImage =
+ tObj->Image[0][tObj->BaseLevel];
+ GLint curOffset, blitWidth;
+ GLint i, texelBytes;
+ GLint numLevels;
+ GLint log2Width, log2Height, log2Depth;
+
+ /* Set the hardware texture format
+ */
+ if (!t->image_override && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) {
+ if (_mesa_little_endian()) {
+ t->format =
+ tx_table_le[baseImage->TexFormat->MesaFormat].
+ format;
+ t->filter |=
+ tx_table_le[baseImage->TexFormat->MesaFormat].
+ filter;
+ } else {
+ t->format =
+ tx_table_be[baseImage->TexFormat->MesaFormat].
+ format;
+ t->filter |=
+ tx_table_be[baseImage->TexFormat->MesaFormat].
+ filter;
+ }
+ } else if (!t->image_override) {
+ _mesa_problem(NULL, "unexpected texture format in %s",
+ __FUNCTION__);
+ return;
+ }
+
+ texelBytes = baseImage->TexFormat->TexelBytes;
+
+ /* Compute which mipmap levels we really want to send to the hardware.
+ */
+ driCalculateTextureFirstLastLevel((driTextureObject *) t);
+ log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+ log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+ log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
+
+ numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+
+ assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
+
+ /* Calculate mipmap offsets and dimensions for blitting (uploading)
+ * The idea is that we lay out the mipmap levels within a block of
+ * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
+ */
+ curOffset = 0;
+ blitWidth = R300_BLIT_WIDTH_BYTES;
+ t->tile_bits = 0;
+
+ /* figure out if this texture is suitable for tiling. */
+#if 0 /* Disabled for now */
+ if (texelBytes) {
+ if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
+ /* texrect might be able to use micro tiling too in theory? */
+ (baseImage->Height > 1)) {
+
+ /* allow 32 (bytes) x 1 mip (which will use two times the space
+ the non-tiled version would use) max if base texture is large enough */
+ if ((numLevels == 1) ||
+ (((baseImage->Width * texelBytes /
+ baseImage->Height) <= 32)
+ && (baseImage->Width * texelBytes > 64))
+ ||
+ ((baseImage->Width * texelBytes /
+ baseImage->Height) <= 16)) {
+ t->tile_bits |= R300_TXO_MICRO_TILE;
+ }
+ }
+
+ if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
+ /* we can set macro tiling even for small textures, they will be untiled anyway */
+ t->tile_bits |= R300_TXO_MACRO_TILE;
+ }
+ }
+#endif
+
+ for (i = 0; i < numLevels; i++) {
+ const struct gl_texture_image *texImage;
+ GLuint size;
+
+ texImage = tObj->Image[0][i + t->base.firstLevel];
+ if (!texImage)
+ break;
+
+ /* find image size in bytes */
+ if (texImage->IsCompressed) {
+ if ((t->format & R300_TX_FORMAT_DXT1) ==
+ R300_TX_FORMAT_DXT1) {
+ // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
+ if ((texImage->Width + 3) < 8) /* width one block */
+ size = texImage->CompressedSize * 4;
+ else if ((texImage->Width + 3) < 16)
+ size = texImage->CompressedSize * 2;
+ else
+ size = texImage->CompressedSize;
+ } else {
+ /* DXT3/5, 16 bytes per block */
+ WARN_ONCE
+ ("DXT 3/5 suffers from multitexturing problems!\n");
+ // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
+ if ((texImage->Width + 3) < 8)
+ size = texImage->CompressedSize * 2;
+ else
+ size = texImage->CompressedSize;
+ }
+ } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+ size =
+ ((texImage->Width * texelBytes +
+ 63) & ~63) * texImage->Height;
+ blitWidth = 64 / texelBytes;
+ } else if (t->tile_bits & R300_TXO_MICRO_TILE) {
+ /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+ though the actual offset may be different (if texture is less than
+ 32 bytes width) to the untiled case */
+ int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+ size =
+ (w * ((texImage->Height + 1) / 2)) *
+ texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+ } else {
+ int w = (texImage->Width * texelBytes + 31) & ~31;
+ size = w * texImage->Height * texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+ }
+ assert(size > 0);
+
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
+ texImage->Width, texImage->Height,
+ texImage->Depth,
+ texImage->TexFormat->TexelBytes,
+ texImage->InternalFormat);
+
+ /* Align to 32-byte offset. It is faster to do this unconditionally
+ * (no branch penalty).
+ */
+
+ curOffset = (curOffset + 0x1f) & ~0x1f;
+
+ if (texelBytes) {
+ /* fix x and y coords up later together with offset */
+ t->image[0][i].x = curOffset;
+ t->image[0][i].y = 0;
+ t->image[0][i].width =
+ MIN2(size / texelBytes, blitWidth);
+ t->image[0][i].height =
+ (size / texelBytes) / t->image[0][i].width;
+ } else {
+ t->image[0][i].x = curOffset % R300_BLIT_WIDTH_BYTES;
+ t->image[0][i].y = curOffset / R300_BLIT_WIDTH_BYTES;
+ t->image[0][i].width =
+ MIN2(size, R300_BLIT_WIDTH_BYTES);
+ t->image[0][i].height = size / t->image[0][i].width;
+ }
+
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr,
+ "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+ i, texImage->Width, texImage->Height,
+ t->image[0][i].x, t->image[0][i].y,
+ t->image[0][i].width, t->image[0][i].height,
+ size, curOffset);
+
+ curOffset += size;
+ }
+
+ /* Align the total size of texture memory block.
+ */
+ t->base.totalSize =
+ (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+
+ /* Setup remaining cube face blits, if needed */
+ if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+ GLuint face;
+ for (face = 1; face < 6; face++) {
+ for (i = 0; i < numLevels; i++) {
+ t->image[face][i].x = t->image[0][i].x;
+ t->image[face][i].y = t->image[0][i].y;
+ t->image[face][i].width = t->image[0][i].width;
+ t->image[face][i].height =
+ t->image[0][i].height;
+ }
+ }
+ t->base.totalSize *= 6; /* total texmem needed */
+ }
+
+ if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+ ASSERT(log2Width == log2Height);
+ t->format |= R300_TX_FORMAT_CUBIC_MAP;
+ }
+
+ t->size =
+ (((tObj->Image[0][t->base.firstLevel]->Width -
+ 1) << R300_TX_WIDTHMASK_SHIFT)
+ | ((tObj->Image[0][t->base.firstLevel]->Height - 1) <<
+ R300_TX_HEIGHTMASK_SHIFT))
+ | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
+
+ /* Only need to round to nearest 32 for textures, but the blitter
+ * requires 64-byte aligned pitches, and we may/may not need the
+ * blitter. NPOT only!
+ */
+ if (baseImage->IsCompressed) {
+ t->pitch =
+ (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
+ } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+ unsigned int align = blitWidth - 1;
+ t->pitch = ((tObj->Image[0][t->base.firstLevel]->Width *
+ texelBytes) + 63) & ~(63);
+ t->size |= R300_TX_SIZE_TXPITCH_EN;
+ if (!t->image_override)
+ t->pitch_reg =
+ (((tObj->Image[0][t->base.firstLevel]->Width) +
+ align) & ~align) - 1;
+ } else {
+ t->pitch =
+ ((tObj->Image[0][t->base.firstLevel]->Width *
+ texelBytes) + 63) & ~(63);
+ }
+
+ t->dirty_state = TEX_ALL;
+
+ /* FYI: r300UploadTexImages( rmesa, t ) used to be called here */
+}
+
+/* ================================================================
+ * Texture unit state management
+ */
+
+static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ struct gl_texture_object *tObj = texUnit->_Current;
+ r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+
+ ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
+
+ if (t->base.dirty_images[0]) {
+ R300_FIREVERTICES(rmesa);
+
+ r300SetTexImages(rmesa, tObj);
+ r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+ if (!t->base.memBlock && !t->image_override)
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ struct gl_texture_object *tObj = texUnit->_Current;
+ r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+
+ ASSERT(tObj->Target == GL_TEXTURE_3D);
+
+ /* r300 does not support mipmaps for 3D textures. */
+ if ((tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR)) {
+ return GL_FALSE;
+ }
+
+ if (t->base.dirty_images[0]) {
+ R300_FIREVERTICES(rmesa);
+ r300SetTexImages(rmesa, tObj);
+ r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+ if (!t->base.memBlock)
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ struct gl_texture_object *tObj = texUnit->_Current;
+ r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+ GLuint face;
+
+ ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+
+ if (t->base.dirty_images[0] || t->base.dirty_images[1] ||
+ t->base.dirty_images[2] || t->base.dirty_images[3] ||
+ t->base.dirty_images[4] || t->base.dirty_images[5]) {
+ /* flush */
+ R300_FIREVERTICES(rmesa);
+ /* layout memory space, once for all faces */
+ r300SetTexImages(rmesa, tObj);
+ }
+
+ /* upload (per face) */
+ for (face = 0; face < 6; face++) {
+ if (t->base.dirty_images[face]) {
+ r300UploadTexImages(rmesa,
+ (r300TexObjPtr) tObj->DriverData,
+ face);
+ }
+ }
+
+ if (!t->base.memBlock) {
+ /* texmem alloc failed, use s/w fallback */
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ struct gl_texture_object *tObj = texUnit->_Current;
+ r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+
+ ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+
+ if (t->base.dirty_images[0]) {
+ R300_FIREVERTICES(rmesa);
+
+ r300SetTexImages(rmesa, tObj);
+ r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+ if (!t->base.memBlock && !t->image_override &&
+ !rmesa->prefer_gart_client_texturing)
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+static GLboolean r300UpdateTexture(GLcontext * ctx, int unit)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ struct gl_texture_object *tObj = texUnit->_Current;
+ r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+
+ /* Fallback if there's a texture border */
+ if (tObj->Image[0][tObj->BaseLevel]->Border > 0)
+ return GL_FALSE;
+
+ /* Update state if this is a different texture object to last
+ * time.
+ */
+ if (rmesa->state.texture.unit[unit].texobj != t) {
+ if (rmesa->state.texture.unit[unit].texobj != NULL) {
+ /* The old texture is no longer bound to this texture unit.
+ * Mark it as such.
+ */
+
+ rmesa->state.texture.unit[unit].texobj->base.bound &=
+ ~(1UL << unit);
+ }
+
+ rmesa->state.texture.unit[unit].texobj = t;
+ t->base.bound |= (1UL << unit);
+ t->dirty_state |= 1 << unit;
+ driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */
+ }
+
+ return !t->border_fallback;
+}
+
+void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth, GLuint pitch)
+{
+ r300ContextPtr rmesa =
+ (r300ContextPtr)((__DRIcontextPrivate*)pDRICtx->private)->driverPrivate;
+ struct gl_texture_object *tObj =
+ _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+ r300TexObjPtr t;
+ int idx;
+
+ if (!tObj)
+ return;
+
+ t = (r300TexObjPtr) tObj->DriverData;
+
+ t->image_override = GL_TRUE;
+
+ if (!offset)
+ return;
+
+ t->offset = offset;
+ t->pitch_reg = pitch;
+
+ switch (depth) {
+ case 32:
+ idx = 2;
+ t->pitch_reg /= 4;
+ break;
+ case 24:
+ default:
+ idx = 4;
+ t->pitch_reg /= 4;
+ break;
+ case 16:
+ idx = 5;
+ t->pitch_reg /= 2;
+ break;
+ }
+
+ t->pitch_reg--;
+
+ t->format = tx_table_le[idx].format;
+ t->filter |= tx_table_le[idx].filter;
+}
+
+static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit)
+{
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+ if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) {
+ return (r300EnableTextureRect(ctx, unit) &&
+ r300UpdateTexture(ctx, unit));
+ } else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
+ return (r300EnableTexture2D(ctx, unit) &&
+ r300UpdateTexture(ctx, unit));
+ } else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) {
+ return (r300EnableTexture3D(ctx, unit) &&
+ r300UpdateTexture(ctx, unit));
+ } else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) {
+ return (r300EnableTextureCube(ctx, unit) &&
+ r300UpdateTexture(ctx, unit));
+ } else if (texUnit->_ReallyEnabled) {
+ return GL_FALSE;
+ } else {
+ return GL_TRUE;
+ }
+}
+
+void r300UpdateTextureState(GLcontext * ctx)
+{
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ if (!r300UpdateTextureUnit(ctx, i)) {
+ _mesa_warning(ctx,
+ "failed to update texture state for unit %d.\n",
+ i);
+ }
+ }
+}
diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c
new file mode 100644
index 0000000..1d90ade
--- /dev/null
+++ b/r300/r300_vertprog.c
@@ -0,0 +1,1305 @@
+/**************************************************************************
+
+Copyright (C) 2005 Aapo Tahkola.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Aapo Tahkola <aet@rasterburn.org>
+ */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#include "program.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "tnl/tnl.h"
+
+#include "r300_context.h"
+
+#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
+ SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
+ SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
+ SWIZZLE_W != VSF_IN_COMPONENT_W || \
+ SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
+ SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
+ WRITEMASK_X != VSF_FLAG_X || \
+ WRITEMASK_Y != VSF_FLAG_Y || \
+ WRITEMASK_Z != VSF_FLAG_Z || \
+ WRITEMASK_W != VSF_FLAG_W
+#error Cannot change these!
+#endif
+
+#define SCALAR_FLAG (1<<31)
+#define FLAG_MASK (1<<31)
+#define OP_MASK (0xf) /* we are unlikely to have more than 15 */
+#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
+
+static struct {
+ char *name;
+ int opcode;
+ unsigned long ip; /* number of input operands and flags */
+} op_names[] = {
+ /* *INDENT-OFF* */
+ OPN(ABS, 1),
+ OPN(ADD, 2),
+ OPN(ARL, 1 | SCALAR_FLAG),
+ OPN(DP3, 2),
+ OPN(DP4, 2),
+ OPN(DPH, 2),
+ OPN(DST, 2),
+ OPN(EX2, 1 | SCALAR_FLAG),
+ OPN(EXP, 1 | SCALAR_FLAG),
+ OPN(FLR, 1),
+ OPN(FRC, 1),
+ OPN(LG2, 1 | SCALAR_FLAG),
+ OPN(LIT, 1),
+ OPN(LOG, 1 | SCALAR_FLAG),
+ OPN(MAD, 3),
+ OPN(MAX, 2),
+ OPN(MIN, 2),
+ OPN(MOV, 1),
+ OPN(MUL, 2),
+ OPN(POW, 2 | SCALAR_FLAG),
+ OPN(RCP, 1 | SCALAR_FLAG),
+ OPN(RSQ, 1 | SCALAR_FLAG),
+ OPN(SGE, 2),
+ OPN(SLT, 2),
+ OPN(SUB, 2),
+ OPN(SWZ, 1),
+ OPN(XPD, 2),
+ OPN(RCC, 0), //extra
+ OPN(PRINT, 0),
+ OPN(END, 0)
+ /* *INDENT-ON* */
+};
+
+#undef OPN
+
+int r300VertexProgUpdateParams(GLcontext * ctx,
+ struct r300_vertex_program_cont *vp, float *dst)
+{
+ int pi;
+ struct gl_vertex_program *mesa_vp = &vp->mesa_program;
+ float *dst_o = dst;
+ struct gl_program_parameter_list *paramList;
+
+ if (mesa_vp->IsNVProgram) {
+ _mesa_load_tracked_matrices(ctx);
+
+ for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
+ *dst++ = ctx->VertexProgram.Parameters[pi][0];
+ *dst++ = ctx->VertexProgram.Parameters[pi][1];
+ *dst++ = ctx->VertexProgram.Parameters[pi][2];
+ *dst++ = ctx->VertexProgram.Parameters[pi][3];
+ }
+ return dst - dst_o;
+ }
+
+ assert(mesa_vp->Base.Parameters);
+ _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
+
+ if (mesa_vp->Base.Parameters->NumParameters * 4 >
+ VSF_MAX_FRAGMENT_LENGTH) {
+ fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
+ _mesa_exit(-1);
+ }
+
+ paramList = mesa_vp->Base.Parameters;
+ for (pi = 0; pi < paramList->NumParameters; pi++) {
+ switch (paramList->Parameters[pi].Type) {
+
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
+ case PROGRAM_CONSTANT:
+ *dst++ = paramList->ParameterValues[pi][0];
+ *dst++ = paramList->ParameterValues[pi][1];
+ *dst++ = paramList->ParameterValues[pi][2];
+ *dst++ = paramList->ParameterValues[pi][3];
+ break;
+
+ default:
+ _mesa_problem(NULL, "Bad param type in %s",
+ __FUNCTION__);
+ }
+
+ }
+
+ return dst - dst_o;
+}
+
+static unsigned long t_dst_mask(GLuint mask)
+{
+ /* WRITEMASK_* is equivalent to VSF_FLAG_* */
+ return mask & VSF_FLAG_ALL;
+}
+
+static unsigned long t_dst_class(enum register_file file)
+{
+
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ return VSF_OUT_CLASS_TMP;
+ case PROGRAM_OUTPUT:
+ return VSF_OUT_CLASS_RESULT;
+ case PROGRAM_ADDRESS:
+ return VSF_OUT_CLASS_ADDR;
+ /*
+ case PROGRAM_INPUT:
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_WRITE_ONLY:
+ case PROGRAM_ADDRESS:
+ */
+ default:
+ fprintf(stderr, "problem in %s", __FUNCTION__);
+ _mesa_exit(-1);
+ return -1;
+ }
+}
+
+static unsigned long t_dst_index(struct r300_vertex_program *vp,
+ struct prog_dst_register *dst)
+{
+ if (dst->File == PROGRAM_OUTPUT)
+ return vp->outputs[dst->Index];
+
+ return dst->Index;
+}
+
+static unsigned long t_src_class(enum register_file file)
+{
+
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ return VSF_IN_CLASS_TMP;
+
+ case PROGRAM_INPUT:
+ return VSF_IN_CLASS_ATTR;
+
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_STATE_VAR:
+ return VSF_IN_CLASS_PARAM;
+ /*
+ case PROGRAM_OUTPUT:
+ case PROGRAM_WRITE_ONLY:
+ case PROGRAM_ADDRESS:
+ */
+ default:
+ fprintf(stderr, "problem in %s", __FUNCTION__);
+ _mesa_exit(-1);
+ return -1;
+ }
+}
+
+static __inline unsigned long t_swizzle(GLubyte swizzle)
+{
+/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+ return swizzle;
+}
+
+#if 0
+static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
+{
+ int i;
+
+ if (vp == NULL) {
+ fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
+ caller);
+ return;
+ }
+
+ fprintf(stderr, "%s:<", caller);
+ for (i = 0; i < VERT_ATTRIB_MAX; i++)
+ fprintf(stderr, "%d ", vp->inputs[i]);
+ fprintf(stderr, ">\n");
+
+}
+#endif
+
+static unsigned long t_src_index(struct r300_vertex_program *vp,
+ struct prog_src_register *src)
+{
+ int i;
+ int max_reg = -1;
+
+ if (src->File == PROGRAM_INPUT) {
+ if (vp->inputs[src->Index] != -1)
+ return vp->inputs[src->Index];
+
+ for (i = 0; i < VERT_ATTRIB_MAX; i++)
+ if (vp->inputs[i] > max_reg)
+ max_reg = vp->inputs[i];
+
+ vp->inputs[src->Index] = max_reg + 1;
+
+ //vp_dump_inputs(vp, __FUNCTION__);
+
+ return vp->inputs[src->Index];
+ } else {
+ if (src->Index < 0) {
+ fprintf(stderr,
+ "negative offsets for indirect addressing do not work.\n");
+ return 0;
+ }
+ return src->Index;
+ }
+}
+
+static unsigned long t_src(struct r300_vertex_program *vp,
+ struct prog_src_register *src)
+{
+ /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return MAKE_VSF_SOURCE(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 1)),
+ t_swizzle(GET_SWZ(src->Swizzle, 2)),
+ t_swizzle(GET_SWZ(src->Swizzle, 3)),
+ t_src_class(src->File),
+ src->NegateBase) | (src->RelAddr << 4);
+}
+
+static unsigned long t_src_scalar(struct r300_vertex_program *vp,
+ struct prog_src_register *src)
+{
+
+ return MAKE_VSF_SOURCE(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_src_class(src->File),
+ src->
+ NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src->RelAddr << 4);
+}
+
+static unsigned long t_opcode(enum prog_opcode opcode)
+{
+
+ switch (opcode) {
+ /* *INDENT-OFF* */
+ case OPCODE_ARL: return R300_VPI_OUT_OP_ARL;
+ case OPCODE_DST: return R300_VPI_OUT_OP_DST;
+ case OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
+ case OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
+ case OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
+ case OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
+ case OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
+ case OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
+ case OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
+ case OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
+ case OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
+ case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
+ case OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
+ case OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
+ case OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
+ /* *INDENT-ON* */
+
+ default:
+ fprintf(stderr, "%s: Should not be called with opcode %d!",
+ __FUNCTION__, opcode);
+ }
+ _mesa_exit(-1);
+ return 0;
+}
+
+static unsigned long op_operands(enum prog_opcode opcode)
+{
+ int i;
+
+ /* Can we trust mesas opcodes to be in order ? */
+ for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++)
+ if (op_names[i].opcode == opcode)
+ return op_names[i].ip;
+
+ fprintf(stderr, "op %d not found in op_names\n", opcode);
+ _mesa_exit(-1);
+ return 0;
+}
+
+static GLboolean valid_dst(struct r300_vertex_program *vp,
+ struct prog_dst_register *dst)
+{
+ if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
+ return GL_FALSE;
+ } else if (dst->File == PROGRAM_ADDRESS) {
+ assert(dst->Index == 0);
+ }
+
+ return GL_TRUE;
+}
+
+/* TODO: Get rid of t_src_class call */
+#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
+ ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
+ t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
+ (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
+ t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
+
+#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
+ SWIZZLE_ZERO, SWIZZLE_ZERO, \
+ SWIZZLE_ZERO, SWIZZLE_ZERO, \
+ t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
+
+#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
+ SWIZZLE_ZERO, SWIZZLE_ZERO, \
+ SWIZZLE_ZERO, SWIZZLE_ZERO, \
+ t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
+
+#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
+ SWIZZLE_ZERO, SWIZZLE_ZERO, \
+ SWIZZLE_ZERO, SWIZZLE_ZERO, \
+ t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
+
+#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
+ SWIZZLE_ONE, SWIZZLE_ONE, \
+ SWIZZLE_ONE, SWIZZLE_ONE, \
+ t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
+
+#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
+ SWIZZLE_ONE, SWIZZLE_ONE, \
+ SWIZZLE_ONE, SWIZZLE_ONE, \
+ t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
+
+#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
+ SWIZZLE_ONE, SWIZZLE_ONE, \
+ SWIZZLE_ONE, SWIZZLE_ONE, \
+ t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
+
+/* DP4 version seems to trigger some hw peculiarity */
+//#define PREFER_DP4
+
+#define FREE_TEMPS() \
+ do { \
+ if(u_temp_i < vp->num_temporaries) { \
+ WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
+ vp->native = GL_FALSE; \
+ } \
+ u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
+ } while (0)
+
+static void r300TranslateVertexShader(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi)
+{
+ int i, cur_reg = 0;
+ VERTEX_SHADER_INSTRUCTION *o_inst;
+ unsigned long operands;
+ int are_srcs_scalar;
+ unsigned long hw_op;
+ /* Initial value should be last tmp reg that hw supports.
+ Strangely enough r300 doesnt mind even though these would be out of range.
+ Smart enough to realize that it doesnt need it? */
+ int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
+ struct prog_src_register src[3];
+
+ vp->pos_end = 0; /* Not supported yet */
+ vp->program.length = 0;
+ /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
+
+ for (i = 0; i < VERT_ATTRIB_MAX; i++)
+ vp->inputs[i] = -1;
+
+ for (i = 0; i < VERT_RESULT_MAX; i++)
+ vp->outputs[i] = -1;
+
+ assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
+
+ /* Assign outputs */
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS))
+ vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
+
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0))
+ vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1))
+ vp->outputs[VERT_RESULT_COL1] = cur_reg++;
+
+#if 0 /* Not supported yet */
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0))
+ vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
+
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1))
+ vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
+
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC))
+ vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+#endif
+
+ for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++)
+ if (vp->key.OutputsWritten & (1 << i))
+ vp->outputs[i] = cur_reg++;
+
+ vp->translated = GL_TRUE;
+ vp->native = GL_TRUE;
+
+ o_inst = vp->program.body.i;
+ for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) {
+ FREE_TEMPS();
+
+ if (!valid_dst(vp, &vpi->DstReg)) {
+ /* redirect result to unused temp */
+ vpi->DstReg.File = PROGRAM_TEMPORARY;
+ vpi->DstReg.Index = u_temp_i;
+ }
+
+ operands = op_operands(vpi->Opcode);
+ are_srcs_scalar = operands & SCALAR_FLAG;
+ operands &= OP_MASK;
+
+ for (i = 0; i < operands; i++)
+ src[i] = vpi->SrcReg[i];
+
+ if (operands == 3) { /* TODO: scalars */
+ if (CMP_SRCS(src[1], src[2])
+ || CMP_SRCS(src[0], src[2])) {
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
+ VSF_FLAG_ALL,
+ VSF_OUT_CLASS_TMP);
+
+ o_inst->src[0] =
+ MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
+ SWIZZLE_X, SWIZZLE_Y,
+ SWIZZLE_Z, SWIZZLE_W,
+ t_src_class(src[2].File),
+ VSF_FLAG_NONE) | (src[2].
+ RelAddr <<
+ 4);
+
+ o_inst->src[1] = ZERO_SRC_2;
+ o_inst->src[2] = ZERO_SRC_2;
+ o_inst++;
+
+ src[2].File = PROGRAM_TEMPORARY;
+ src[2].Index = u_temp_i;
+ src[2].RelAddr = 0;
+ u_temp_i--;
+ }
+
+ }
+
+ if (operands >= 2) {
+ if (CMP_SRCS(src[1], src[0])) {
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
+ VSF_FLAG_ALL,
+ VSF_OUT_CLASS_TMP);
+
+ o_inst->src[0] =
+ MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+ SWIZZLE_X, SWIZZLE_Y,
+ SWIZZLE_Z, SWIZZLE_W,
+ t_src_class(src[0].File),
+ VSF_FLAG_NONE) | (src[0].
+ RelAddr <<
+ 4);
+
+ o_inst->src[1] = ZERO_SRC_0;
+ o_inst->src[2] = ZERO_SRC_0;
+ o_inst++;
+
+ src[0].File = PROGRAM_TEMPORARY;
+ src[0].Index = u_temp_i;
+ src[0].RelAddr = 0;
+ u_temp_i--;
+ }
+ }
+
+ /* These ops need special handling. */
+ switch (vpi->Opcode) {
+ case OPCODE_POW:
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_POW,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ o_inst->src[0] = t_src_scalar(vp, &src[0]);
+ o_inst->src[1] = ZERO_SRC_0;
+ o_inst->src[2] = t_src_scalar(vp, &src[1]);
+ goto next;
+
+ case OPCODE_MOV: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
+ case OPCODE_SWZ:
+#if 1
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] = ZERO_SRC_0;
+ o_inst->src[2] = ZERO_SRC_0;
+#else
+ hw_op =
+ (src[0].File ==
+ PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
+ R300_VPI_OUT_OP_MAD;
+
+ o_inst->op =
+ MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] = ONE_SRC_0;
+ o_inst->src[2] = ZERO_SRC_0;
+#endif
+
+ goto next;
+
+ case OPCODE_ADD:
+#if 1
+ hw_op = (src[0].File == PROGRAM_TEMPORARY &&
+ src[1].File ==
+ PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
+ R300_VPI_OUT_OP_MAD;
+
+ o_inst->op =
+ MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ o_inst->src[0] = ONE_SRC_0;
+ o_inst->src[1] = t_src(vp, &src[0]);
+ o_inst->src[2] = t_src(vp, &src[1]);
+#else
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] = t_src(vp, &src[1]);
+ o_inst->src[2] = ZERO_SRC_1;
+
+#endif
+ goto next;
+
+ case OPCODE_MAD:
+ hw_op = (src[0].File == PROGRAM_TEMPORARY &&
+ src[1].File == PROGRAM_TEMPORARY &&
+ src[2].File ==
+ PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
+ R300_VPI_OUT_OP_MAD;
+
+ o_inst->op =
+ MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] = t_src(vp, &src[1]);
+ o_inst->src[2] = t_src(vp, &src[2]);
+ goto next;
+
+ case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */
+ hw_op = (src[0].File == PROGRAM_TEMPORARY &&
+ src[1].File ==
+ PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
+ R300_VPI_OUT_OP_MAD;
+
+ o_inst->op =
+ MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] = t_src(vp, &src[1]);
+
+ o_inst->src[2] = ZERO_SRC_1;
+ goto next;
+
+ case OPCODE_DP3: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_DOT,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+
+ o_inst->src[0] =
+ MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 1)),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 2)),
+ SWIZZLE_ZERO,
+ t_src_class(src[0].File),
+ src[0].
+ NegateBase ? VSF_FLAG_XYZ :
+ VSF_FLAG_NONE) | (src[0].
+ RelAddr << 4);
+
+ o_inst->src[1] =
+ MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+ t_swizzle(GET_SWZ
+ (src[1].Swizzle, 0)),
+ t_swizzle(GET_SWZ
+ (src[1].Swizzle, 1)),
+ t_swizzle(GET_SWZ
+ (src[1].Swizzle, 2)),
+ SWIZZLE_ZERO,
+ t_src_class(src[1].File),
+ src[1].
+ NegateBase ? VSF_FLAG_XYZ :
+ VSF_FLAG_NONE) | (src[1].
+ RelAddr << 4);
+
+ o_inst->src[2] = ZERO_SRC_1;
+ goto next;
+
+ case OPCODE_SUB: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+#if 1
+ hw_op = (src[0].File == PROGRAM_TEMPORARY &&
+ src[1].File ==
+ PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
+ R300_VPI_OUT_OP_MAD;
+
+ o_inst->op =
+ MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] = ONE_SRC_0;
+ o_inst->src[2] =
+ MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+ t_swizzle(GET_SWZ
+ (src[1].Swizzle, 0)),
+ t_swizzle(GET_SWZ
+ (src[1].Swizzle, 1)),
+ t_swizzle(GET_SWZ
+ (src[1].Swizzle, 2)),
+ t_swizzle(GET_SWZ
+ (src[1].Swizzle, 3)),
+ t_src_class(src[1].File),
+ (!src[1].
+ NegateBase) ? VSF_FLAG_ALL :
+ VSF_FLAG_NONE) | (src[1].
+ RelAddr << 4);
+#else
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] =
+ MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+ t_swizzle(GET_SWZ
+ (src[1].Swizzle, 0)),
+ t_swizzle(GET_SWZ
+ (src[1].Swizzle, 1)),
+ t_swizzle(GET_SWZ
+ (src[1].Swizzle, 2)),
+ t_swizzle(GET_SWZ
+ (src[1].Swizzle, 3)),
+ t_src_class(src[1].File),
+ (!src[1].
+ NegateBase) ? VSF_FLAG_ALL :
+ VSF_FLAG_NONE) | (src[1].
+ RelAddr << 4);
+ o_inst->src[2] = 0;
+#endif
+ goto next;
+
+ case OPCODE_ABS: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_MAX,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] =
+ MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 1)),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 2)),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 3)),
+ t_src_class(src[0].File),
+ (!src[0].
+ NegateBase) ? VSF_FLAG_ALL :
+ VSF_FLAG_NONE) | (src[0].
+ RelAddr << 4);
+ o_inst->src[2] = 0;
+ goto next;
+
+ case OPCODE_FLR:
+ /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
+ ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
+
+ o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
+ t_dst_mask(vpi->DstReg.
+ WriteMask),
+ VSF_OUT_CLASS_TMP);
+
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] = ZERO_SRC_0;
+ o_inst->src[2] = ZERO_SRC_0;
+ o_inst++;
+
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] = MAKE_VSF_SOURCE(u_temp_i,
+ VSF_IN_COMPONENT_X,
+ VSF_IN_COMPONENT_Y,
+ VSF_IN_COMPONENT_Z,
+ VSF_IN_COMPONENT_W,
+ VSF_IN_CLASS_TMP,
+ /* Not 100% sure about this */
+ (!src[0].
+ NegateBase) ?
+ VSF_FLAG_ALL :
+ VSF_FLAG_NONE
+ /*VSF_FLAG_ALL */ );
+
+ o_inst->src[2] = ZERO_SRC_0;
+ u_temp_i--;
+ goto next;
+
+ case OPCODE_LG2: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_LG2,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+
+ o_inst->src[0] =
+ MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 0)),
+ t_src_class(src[0].File),
+ src[0].
+ NegateBase ? VSF_FLAG_ALL :
+ VSF_FLAG_NONE) | (src[0].
+ RelAddr << 4);
+ o_inst->src[1] = ZERO_SRC_0;
+ o_inst->src[2] = ZERO_SRC_0;
+ goto next;
+
+ case OPCODE_LIT: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_LIT,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ /* NOTE: Users swizzling might not work. */
+ o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+ VSF_IN_COMPONENT_ZERO, // z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+ t_src_class(src[0].
+ File),
+ src[0].
+ NegateBase ?
+ VSF_FLAG_ALL :
+ VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+ o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+ VSF_IN_COMPONENT_ZERO, // z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+ t_src_class(src[0].
+ File),
+ src[0].
+ NegateBase ?
+ VSF_FLAG_ALL :
+ VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+ o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+ VSF_IN_COMPONENT_ZERO, // z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+ t_src_class(src[0].
+ File),
+ src[0].
+ NegateBase ?
+ VSF_FLAG_ALL :
+ VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+ goto next;
+
+ case OPCODE_DPH: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_DOT,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+
+ o_inst->src[0] =
+ MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 1)),
+ t_swizzle(GET_SWZ
+ (src[0].Swizzle, 2)),
+ VSF_IN_COMPONENT_ONE,
+ t_src_class(src[0].File),
+ src[0].
+ NegateBase ? VSF_FLAG_XYZ :
+ VSF_FLAG_NONE) | (src[0].
+ RelAddr << 4);
+ o_inst->src[1] = t_src(vp, &src[1]);
+ o_inst->src[2] = ZERO_SRC_1;
+ goto next;
+
+ case OPCODE_XPD:
+ /* mul r0, r1.yzxw, r2.zxyw
+ mad r0, -r2.yzxw, r1.zxyw, r0
+ NOTE: might need MAD_2
+ */
+
+ o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
+ t_dst_mask(vpi->DstReg.
+ WriteMask),
+ VSF_OUT_CLASS_TMP);
+
+ o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+ t_src_class(src[0].
+ File),
+ src[0].
+ NegateBase ?
+ VSF_FLAG_ALL :
+ VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+
+ o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
+ t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
+ t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
+ t_src_class(src[1].
+ File),
+ src[1].
+ NegateBase ?
+ VSF_FLAG_ALL :
+ VSF_FLAG_NONE) |
+ (src[1].RelAddr << 4);
+
+ o_inst->src[2] = ZERO_SRC_1;
+ o_inst++;
+ u_temp_i--;
+
+ o_inst->op =
+ MAKE_VSF_OP(R300_VPI_OUT_OP_MAD,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+
+ o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
+ t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
+ t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
+ t_src_class(src[1].
+ File),
+ (!src[1].
+ NegateBase) ?
+ VSF_FLAG_ALL :
+ VSF_FLAG_NONE) |
+ (src[1].RelAddr << 4);
+
+ o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+ t_src_class(src[0].
+ File),
+ src[0].
+ NegateBase ?
+ VSF_FLAG_ALL :
+ VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+
+ o_inst->src[2] = MAKE_VSF_SOURCE(u_temp_i + 1,
+ VSF_IN_COMPONENT_X,
+ VSF_IN_COMPONENT_Y,
+ VSF_IN_COMPONENT_Z,
+ VSF_IN_COMPONENT_W,
+ VSF_IN_CLASS_TMP,
+ VSF_FLAG_NONE);
+
+ goto next;
+
+ case OPCODE_RCC:
+ fprintf(stderr, "Dont know how to handle op %d yet\n",
+ vpi->Opcode);
+ _mesa_exit(-1);
+ break;
+ case OPCODE_END:
+ break;
+ default:
+ break;
+ }
+
+ o_inst->op =
+ MAKE_VSF_OP(t_opcode(vpi->Opcode),
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+
+ if (are_srcs_scalar) {
+ switch (operands) {
+ case 1:
+ o_inst->src[0] = t_src_scalar(vp, &src[0]);
+ o_inst->src[1] = ZERO_SRC_0;
+ o_inst->src[2] = ZERO_SRC_0;
+ break;
+
+ case 2:
+ o_inst->src[0] = t_src_scalar(vp, &src[0]);
+ o_inst->src[1] = t_src_scalar(vp, &src[1]);
+ o_inst->src[2] = ZERO_SRC_1;
+ break;
+
+ case 3:
+ o_inst->src[0] = t_src_scalar(vp, &src[0]);
+ o_inst->src[1] = t_src_scalar(vp, &src[1]);
+ o_inst->src[2] = t_src_scalar(vp, &src[2]);
+ break;
+
+ default:
+ fprintf(stderr,
+ "scalars and op RCC not handled yet");
+ _mesa_exit(-1);
+ break;
+ }
+ } else {
+ switch (operands) {
+ case 1:
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] = ZERO_SRC_0;
+ o_inst->src[2] = ZERO_SRC_0;
+ break;
+
+ case 2:
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] = t_src(vp, &src[1]);
+ o_inst->src[2] = ZERO_SRC_1;
+ break;
+
+ case 3:
+ o_inst->src[0] = t_src(vp, &src[0]);
+ o_inst->src[1] = t_src(vp, &src[1]);
+ o_inst->src[2] = t_src(vp, &src[2]);
+ break;
+
+ default:
+ fprintf(stderr,
+ "scalars and op RCC not handled yet");
+ _mesa_exit(-1);
+ break;
+ }
+ }
+ next:;
+ }
+
+ /* Will most likely segfault before we get here... fix later. */
+ if (o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH / 4) {
+ vp->program.length = 0;
+ vp->native = GL_FALSE;
+ return;
+ }
+ vp->program.length = (o_inst - vp->program.body.i) * 4;
+#if 0
+ fprintf(stderr, "hw program:\n");
+ for (i = 0; i < vp->program.length; i++)
+ fprintf(stderr, "%08x\n", vp->program.body.d[i]);
+#endif
+}
+
+static void position_invariant(struct gl_program *prog)
+{
+ struct prog_instruction *vpi;
+ struct gl_program_parameter_list *paramList;
+ int i;
+
+ gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
+
+ /* tokens[4] = matrix modifier */
+#ifdef PREFER_DP4
+ tokens[4] = 0; /* not transposed or inverted */
+#else
+ tokens[4] = STATE_MATRIX_TRANSPOSE;
+#endif
+ paramList = prog->Parameters;
+
+ vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
+ _mesa_init_instructions(vpi, prog->NumInstructions + 4);
+
+ for (i = 0; i < 4; i++) {
+ GLint idx;
+ tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
+ idx = _mesa_add_state_reference(paramList, tokens);
+#ifdef PREFER_DP4
+ vpi[i].Opcode = OPCODE_DP4;
+ vpi[i].StringPos = 0;
+ vpi[i].Data = 0;
+
+ vpi[i].DstReg.File = PROGRAM_OUTPUT;
+ vpi[i].DstReg.Index = VERT_RESULT_HPOS;
+ vpi[i].DstReg.WriteMask = 1 << i;
+ vpi[i].DstReg.CondMask = COND_TR;
+
+ vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
+ vpi[i].SrcReg[0].Index = idx;
+ vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ vpi[i].SrcReg[1].File = PROGRAM_INPUT;
+ vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
+ vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
+#else
+ if (i == 0)
+ vpi[i].Opcode = OPCODE_MUL;
+ else
+ vpi[i].Opcode = OPCODE_MAD;
+
+ vpi[i].StringPos = 0;
+ vpi[i].Data = 0;
+
+ if (i == 3)
+ vpi[i].DstReg.File = PROGRAM_OUTPUT;
+ else
+ vpi[i].DstReg.File = PROGRAM_TEMPORARY;
+ vpi[i].DstReg.Index = 0;
+ vpi[i].DstReg.WriteMask = 0xf;
+ vpi[i].DstReg.CondMask = COND_TR;
+
+ vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
+ vpi[i].SrcReg[0].Index = idx;
+ vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ vpi[i].SrcReg[1].File = PROGRAM_INPUT;
+ vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
+ vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
+
+ if (i > 0) {
+ vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
+ vpi[i].SrcReg[2].Index = 0;
+ vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
+ }
+#endif
+ }
+
+ _mesa_copy_instructions(&vpi[i], prog->Instructions,
+ prog->NumInstructions);
+
+ free(prog->Instructions);
+
+ prog->Instructions = vpi;
+
+ prog->NumInstructions += 4;
+ vpi = &prog->Instructions[prog->NumInstructions - 1];
+
+ assert(vpi->Opcode == OPCODE_END);
+}
+
+static void insert_wpos(struct r300_vertex_program *vp,
+ struct gl_program *prog, GLuint temp_index)
+{
+ struct prog_instruction *vpi;
+ struct prog_instruction *vpi_insert;
+ int i = 0;
+
+ vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
+ _mesa_init_instructions(vpi, prog->NumInstructions + 2);
+ /* all but END */
+ _mesa_copy_instructions(vpi, prog->Instructions,
+ prog->NumInstructions - 1);
+ /* END */
+ _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
+ &prog->Instructions[prog->NumInstructions - 1],
+ 1);
+ vpi_insert = &vpi[prog->NumInstructions - 1];
+
+ vpi_insert[i].Opcode = OPCODE_MOV;
+
+ vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
+ vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
+ vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
+ vpi_insert[i].DstReg.CondMask = COND_TR;
+
+ vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
+ vpi_insert[i].SrcReg[0].Index = temp_index;
+ vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ i++;
+
+ vpi_insert[i].Opcode = OPCODE_MOV;
+
+ vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
+ vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
+ vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
+ vpi_insert[i].DstReg.CondMask = COND_TR;
+
+ vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
+ vpi_insert[i].SrcReg[0].Index = temp_index;
+ vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ i++;
+
+ free(prog->Instructions);
+
+ prog->Instructions = vpi;
+
+ prog->NumInstructions += i;
+ vpi = &prog->Instructions[prog->NumInstructions - 1];
+
+ assert(vpi->Opcode == OPCODE_END);
+}
+
+static void pos_as_texcoord(struct r300_vertex_program *vp,
+ struct gl_program *prog)
+{
+ struct prog_instruction *vpi;
+ GLuint tempregi = prog->NumTemporaries;
+ /* should do something else if no temps left... */
+ prog->NumTemporaries++;
+
+ for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
+ if (vpi->DstReg.File == PROGRAM_OUTPUT &&
+ vpi->DstReg.Index == VERT_RESULT_HPOS) {
+ vpi->DstReg.File = PROGRAM_TEMPORARY;
+ vpi->DstReg.Index = tempregi;
+ }
+ }
+ insert_wpos(vp, prog, tempregi);
+}
+
+static struct r300_vertex_program *build_program(struct r300_vertex_program_key
+ *wanted_key, struct gl_vertex_program
+ *mesa_vp, GLint wpos_idx)
+{
+ struct r300_vertex_program *vp;
+
+ vp = _mesa_calloc(sizeof(*vp));
+ _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
+
+ vp->wpos_idx = wpos_idx;
+
+ if (mesa_vp->IsPositionInvariant) {
+ position_invariant(&mesa_vp->Base);
+ }
+
+ if (wpos_idx > -1)
+ pos_as_texcoord(vp, &mesa_vp->Base);
+
+ assert(mesa_vp->Base.NumInstructions);
+
+ vp->num_temporaries = mesa_vp->Base.NumTemporaries;
+
+ r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
+
+ return vp;
+}
+
+void r300SelectVertexShader(r300ContextPtr r300)
+{
+ GLcontext *ctx = ctx = r300->radeon.glCtx;
+ GLuint InputsRead;
+ struct r300_vertex_program_key wanted_key = { 0 };
+ GLint i;
+ struct r300_vertex_program_cont *vpc;
+ struct r300_vertex_program *vp;
+ GLint wpos_idx;
+
+ vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+ InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
+
+ wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS;
+
+ wpos_idx = -1;
+ if (InputsRead & FRAG_BIT_WPOS) {
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+ if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
+ break;
+
+ if (i == ctx->Const.MaxTextureUnits) {
+ fprintf(stderr, "\tno free texcoord found\n");
+ _mesa_exit(-1);
+ }
+
+ InputsRead |= (FRAG_BIT_TEX0 << i);
+ wpos_idx = i;
+ }
+
+ if (InputsRead & FRAG_BIT_COL0)
+ wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0;
+
+ if ((InputsRead & FRAG_BIT_COL1) /*||
+ (InputsRead & FRAG_BIT_FOGC) */ )
+ wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1;
+
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+ if (InputsRead & (FRAG_BIT_TEX0 << i))
+ wanted_key.OutputsWritten |=
+ 1 << (VERT_RESULT_TEX0 + i);
+
+ wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
+ if (vpc->mesa_program.IsPositionInvariant) {
+ /* we wan't position don't we ? */
+ wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
+ }
+
+ for (vp = vpc->progs; vp; vp = vp->next)
+ if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) ==
+ 0) {
+ r300->selected_vp = vp;
+ return;
+ }
+ //_mesa_print_program(&vpc->mesa_program.Base);
+
+ vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
+ vp->next = vpc->progs;
+ vpc->progs = vp;
+ r300->selected_vp = vp;
+}
diff --git a/r300/r300_vertprog.h b/r300/r300_vertprog.h
new file mode 100644
index 0000000..252d5a9
--- /dev/null
+++ b/r300/r300_vertprog.h
@@ -0,0 +1,89 @@
+#ifndef __R300_VERTPROG_H_
+#define __R300_VERTPROG_H_
+
+#include "r300_reg.h"
+
+typedef struct {
+ GLuint op;
+ GLuint src[3];
+} VERTEX_SHADER_INSTRUCTION;
+
+#define VSF_FLAG_X 1
+#define VSF_FLAG_Y 2
+#define VSF_FLAG_Z 4
+#define VSF_FLAG_W 8
+#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z)
+#define VSF_FLAG_ALL 0xf
+#define VSF_FLAG_NONE 0
+
+#define VSF_OUT_CLASS_TMP 0
+#define VSF_OUT_CLASS_ADDR 1
+#define VSF_OUT_CLASS_RESULT 2
+
+/* first DWORD of an instruction */
+
+/* possible operations:
+ DOT, MUL, ADD, MAD, FRC, MAX, MIN, SGE, SLT, EXP, LOG, LIT, POW, RCP, RSQ, EX2,
+ LG2, MAD_2 */
+
+#define MAKE_VSF_OP(op, out_reg_index, out_reg_fields, class) \
+ ((op) \
+ | ((out_reg_index) << R300_VPI_OUT_REG_INDEX_SHIFT) \
+ | ((out_reg_fields) << 20) \
+ | ( (class) << 8 ) )
+
+#define EASY_VSF_OP(op, out_reg_index, out_reg_fields, class) \
+ MAKE_VSF_OP(R300_VPI_OUT_OP_##op, out_reg_index, VSF_FLAG_##out_reg_fields, VSF_OUT_CLASS_##class) \
+
+/* according to Nikolai, the subsequent 3 DWORDs are sources, use same define for each */
+
+#define VSF_IN_CLASS_TMP 0
+#define VSF_IN_CLASS_ATTR 1
+#define VSF_IN_CLASS_PARAM 2
+#define VSF_IN_CLASS_NONE 9
+
+#define VSF_IN_COMPONENT_X 0
+#define VSF_IN_COMPONENT_Y 1
+#define VSF_IN_COMPONENT_Z 2
+#define VSF_IN_COMPONENT_W 3
+#define VSF_IN_COMPONENT_ZERO 4
+#define VSF_IN_COMPONENT_ONE 5
+
+#define MAKE_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \
+ ( ((in_reg_index)<<R300_VPI_IN_REG_INDEX_SHIFT) \
+ | ((comp_x)<<R300_VPI_IN_X_SHIFT) \
+ | ((comp_y)<<R300_VPI_IN_Y_SHIFT) \
+ | ((comp_z)<<R300_VPI_IN_Z_SHIFT) \
+ | ((comp_w)<<R300_VPI_IN_W_SHIFT) \
+ | ((negate)<<25) | ((class)))
+
+#define EASY_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \
+ MAKE_VSF_SOURCE(in_reg_index, \
+ VSF_IN_COMPONENT_##comp_x, \
+ VSF_IN_COMPONENT_##comp_y, \
+ VSF_IN_COMPONENT_##comp_z, \
+ VSF_IN_COMPONENT_##comp_w, \
+ VSF_IN_CLASS_##class, VSF_FLAG_##negate)
+
+/* special sources: */
+
+/* (1.0,1.0,1.0,1.0) vector (ATTR, plain ) */
+#define VSF_ATTR_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, ATTR, NONE)
+#define VSF_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, NONE, NONE)
+
+/* contents of unmodified register */
+#define VSF_REG(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, ATTR, NONE)
+
+/* contents of unmodified parameter */
+#define VSF_PARAM(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, PARAM, NONE)
+
+/* contents of unmodified temporary register */
+#define VSF_TMP(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, TMP, NONE)
+
+/* components of ATTR register */
+#define VSF_ATTR_X(reg) EASY_VSF_SOURCE(reg, X, X, X, X, ATTR, NONE)
+#define VSF_ATTR_Y(reg) EASY_VSF_SOURCE(reg, Y, Y, Y, Y, ATTR, NONE)
+#define VSF_ATTR_Z(reg) EASY_VSF_SOURCE(reg, Z, Z, Z, Z, ATTR, NONE)
+#define VSF_ATTR_W(reg) EASY_VSF_SOURCE(reg, W, W, W, W, ATTR, NONE)
+
+#endif
diff --git a/r300/radeon_context.c b/r300/radeon_context.c
new file mode 100644
index 0000000..e9634b4
--- /dev/null
+++ b/r300/radeon_context.c
@@ -0,0 +1,327 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file radeon_context.c
+ * Common context initialization.
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <dlfcn.h>
+
+#include "glheader.h"
+#include "imports.h"
+#include "context.h"
+#include "state.h"
+#include "matrix.h"
+#include "framebuffer.h"
+
+#include "drivers/common/driverfuncs.h"
+#include "swrast/swrast.h"
+
+#include "radeon_screen.h"
+#include "radeon_ioctl.h"
+#include "radeon_macros.h"
+#include "radeon_reg.h"
+
+#include "radeon_state.h"
+#include "r300_state.h"
+
+#include "utils.h"
+#include "vblank.h"
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+
+#define DRIVER_DATE "20060815"
+
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ static char buffer[128];
+
+ switch (name) {
+ case GL_VENDOR:
+ if (IS_R300_CLASS(radeon->radeonScreen))
+ return (GLubyte *) "DRI R300 Project";
+ else
+ return (GLubyte *) "Tungsten Graphics, Inc.";
+
+ case GL_RENDERER:
+ {
+ unsigned offset;
+ GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
+ radeon->radeonScreen->AGPMode;
+ const char* chipname;
+
+ if (IS_R300_CLASS(radeon->radeonScreen))
+ chipname = "R300";
+ else
+ chipname = "R200";
+
+ offset = driGetRendererString(buffer, chipname, DRIVER_DATE,
+ agp_mode);
+
+ if (IS_R300_CLASS(radeon->radeonScreen)) {
+ sprintf(&buffer[offset], " %sTCL",
+ (radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
+ ? "" : "NO-");
+ } else {
+ sprintf(&buffer[offset], " %sTCL",
+ !(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+ ? "" : "NO-");
+ }
+
+ return (GLubyte *) buffer;
+ }
+
+ default:
+ return NULL;
+ }
+}
+
+/* Initialize the driver's misc functions.
+ */
+static void radeonInitDriverFuncs(struct dd_function_table *functions)
+{
+ functions->GetString = radeonGetString;
+}
+
+
+/**
+ * Create and initialize all common fields of the context,
+ * including the Mesa context itself.
+ */
+GLboolean radeonInitContext(radeonContextPtr radeon,
+ struct dd_function_table* functions,
+ const __GLcontextModes * glVisual,
+ __DRIcontextPrivate * driContextPriv,
+ void *sharedContextPrivate)
+{
+ __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+ radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+ GLcontext* ctx;
+ GLcontext* shareCtx;
+ int fthrottle_mode;
+
+ /* Fill in additional standard functions. */
+ radeonInitDriverFuncs(functions);
+
+ /* Allocate and initialize the Mesa context */
+ if (sharedContextPrivate)
+ shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
+ else
+ shareCtx = NULL;
+ radeon->glCtx = _mesa_create_context(glVisual, shareCtx,
+ functions, (void *)radeon);
+ if (!radeon->glCtx)
+ return GL_FALSE;
+
+ ctx = radeon->glCtx;
+ driContextPriv->driverPrivate = radeon;
+
+ /* DRI fields */
+ radeon->dri.context = driContextPriv;
+ radeon->dri.screen = sPriv;
+ radeon->dri.drawable = NULL;
+ radeon->dri.readable = NULL;
+ radeon->dri.hwContext = driContextPriv->hHWContext;
+ radeon->dri.hwLock = &sPriv->pSAREA->lock;
+ radeon->dri.fd = sPriv->fd;
+ radeon->dri.drmMinor = sPriv->drmMinor;
+
+ radeon->radeonScreen = screen;
+ radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
+ screen->sarea_priv_offset);
+
+ /* Setup IRQs */
+ fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
+ radeon->iw.irq_seq = -1;
+ radeon->irqsEmitted = 0;
+ radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
+ radeon->radeonScreen->irq);
+
+ radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+
+ if (!radeon->do_irqs)
+ fprintf(stderr,
+ "IRQ's not enabled, falling back to %s: %d %d\n",
+ radeon->do_usleeps ? "usleeps" : "busy waits",
+ fthrottle_mode, radeon->radeonScreen->irq);
+
+ radeon->vblank_flags = (radeon->radeonScreen->irq != 0)
+ ? driGetDefaultVBlankFlags(&radeon->optionCache) : VBLANK_FLAG_NO_IRQ;
+
+ (*dri_interface->getUST) (&radeon->swap_ust);
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Cleanup common context fields.
+ * Called by r200DestroyContext/r300DestroyContext
+ */
+void radeonCleanupContext(radeonContextPtr radeon)
+{
+ /* _mesa_destroy_context() might result in calls to functions that
+ * depend on the DriverCtx, so don't set it to NULL before.
+ *
+ * radeon->glCtx->DriverCtx = NULL;
+ */
+
+ /* free the Mesa context */
+ _mesa_destroy_context(radeon->glCtx);
+
+ if (radeon->state.scissor.pClipRects) {
+ FREE(radeon->state.scissor.pClipRects);
+ radeon->state.scissor.pClipRects = 0;
+ }
+}
+
+
+/**
+ * Swap front and back buffer.
+ */
+void radeonSwapBuffers(__DRIdrawablePrivate * dPriv)
+{
+ if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+ radeonContextPtr radeon;
+ GLcontext *ctx;
+
+ radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+ ctx = radeon->glCtx;
+
+ if (ctx->Visual.doubleBufferMode) {
+ _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */
+ if (radeon->doPageFlip) {
+ radeonPageFlip(dPriv);
+ } else {
+ radeonCopyBuffer(dPriv, NULL);
+ }
+ }
+ } else {
+ /* XXX this shouldn't be an error but we can't handle it for now */
+ _mesa_problem(NULL, "%s: drawable has no context!",
+ __FUNCTION__);
+ }
+}
+
+void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+ int x, int y, int w, int h )
+{
+ if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+ radeonContextPtr radeon;
+ GLcontext *ctx;
+
+ radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+ ctx = radeon->glCtx;
+
+ if (ctx->Visual.doubleBufferMode) {
+ drm_clip_rect_t rect;
+ rect.x1 = x + dPriv->x;
+ rect.y1 = (dPriv->h - y - h) + dPriv->y;
+ rect.x2 = rect.x1 + w;
+ rect.y2 = rect.y1 + h;
+ _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */
+ radeonCopyBuffer(dPriv, &rect);
+ }
+ } else {
+ /* XXX this shouldn't be an error but we can't handle it for now */
+ _mesa_problem(NULL, "%s: drawable has no context!",
+ __FUNCTION__);
+ }
+}
+
+/* Force the context `c' to be the current context and associate with it
+ * buffer `b'.
+ */
+GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+ __DRIdrawablePrivate * driDrawPriv,
+ __DRIdrawablePrivate * driReadPriv)
+{
+ if (driContextPriv) {
+ radeonContextPtr radeon =
+ (radeonContextPtr) driContextPriv->driverPrivate;
+
+ if (RADEON_DEBUG & DEBUG_DRI)
+ fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
+ radeon->glCtx);
+
+ if (radeon->dri.drawable != driDrawPriv) {
+ driDrawableInitVBlank(driDrawPriv,
+ radeon->vblank_flags,
+ &radeon->vbl_seq);
+ }
+
+ radeon->dri.readable = driReadPriv;
+
+ if (radeon->dri.drawable != driDrawPriv ||
+ radeon->lastStamp != driDrawPriv->lastStamp) {
+ radeon->dri.drawable = driDrawPriv;
+
+ radeonSetCliprects(radeon);
+ r300UpdateViewportOffset(radeon->glCtx);
+ }
+
+ _mesa_make_current(radeon->glCtx,
+ (GLframebuffer *) driDrawPriv->
+ driverPrivate,
+ (GLframebuffer *) driReadPriv->
+ driverPrivate);
+
+ _mesa_update_state(radeon->glCtx);
+
+ radeonUpdatePageFlipping(radeon);
+ } else {
+ if (RADEON_DEBUG & DEBUG_DRI)
+ fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+ _mesa_make_current(0, 0, 0);
+ }
+
+ if (RADEON_DEBUG & DEBUG_DRI)
+ fprintf(stderr, "End %s\n", __FUNCTION__);
+ return GL_TRUE;
+}
+
+/* Force the context `c' to be unbound from its buffer.
+ */
+GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv)
+{
+ radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+
+ if (RADEON_DEBUG & DEBUG_DRI)
+ fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
+ radeon->glCtx);
+
+ return GL_TRUE;
+}
+
diff --git a/r300/radeon_context.h b/r300/radeon_context.h
new file mode 100644
index 0000000..2f23941
--- /dev/null
+++ b/r300/radeon_context.h
@@ -0,0 +1,246 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+ VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Gareth Hughes <gareth@valinux.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Kevin E. Martin <martin@valinux.com>
+ * Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __RADEON_CONTEXT_H__
+#define __RADEON_CONTEXT_H__
+
+#include "mtypes.h"
+#include "radeon_screen.h"
+#include "drm.h"
+#include "dri_util.h"
+#include "colormac.h"
+
+struct radeon_context;
+typedef struct radeon_context radeonContextRec;
+typedef struct radeon_context *radeonContextPtr;
+
+#define TEX_0 0x1
+#define TEX_1 0x2
+#define TEX_2 0x4
+#define TEX_3 0x8
+#define TEX_4 0x10
+#define TEX_5 0x20
+#define TEX_6 0x40
+#define TEX_7 0x80
+#define TEX_ALL 0xff
+
+/* Rasterizing fallbacks */
+/* See correponding strings in r200_swtcl.c */
+#define RADEON_FALLBACK_TEXTURE 0x0001
+#define RADEON_FALLBACK_DRAW_BUFFER 0x0002
+#define RADEON_FALLBACK_STENCIL 0x0004
+#define RADEON_FALLBACK_RENDER_MODE 0x0008
+#define RADEON_FALLBACK_BLEND_EQ 0x0010
+#define RADEON_FALLBACK_BLEND_FUNC 0x0020
+#define RADEON_FALLBACK_DISABLE 0x0040
+#define RADEON_FALLBACK_BORDER_MODE 0x0080
+
+#if R200_MERGED
+extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
+
+#define FALLBACK( radeon, bit, mode ) do { \
+ if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \
+ __FUNCTION__, bit, mode ); \
+ radeonFallback( (radeon)->glCtx, bit, mode ); \
+} while (0)
+#else
+#define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__);
+#endif
+
+/* TCL fallbacks */
+extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
+
+#define RADEON_TCL_FALLBACK_RASTER 0x0001 /* rasterization */
+#define RADEON_TCL_FALLBACK_UNFILLED 0x0002 /* unfilled tris */
+#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE 0x0004 /* twoside tris */
+#define RADEON_TCL_FALLBACK_MATERIAL 0x0008 /* material in vb */
+#define RADEON_TCL_FALLBACK_TEXGEN_0 0x0010 /* texgen, unit 0 */
+#define RADEON_TCL_FALLBACK_TEXGEN_1 0x0020 /* texgen, unit 1 */
+#define RADEON_TCL_FALLBACK_TEXGEN_2 0x0040 /* texgen, unit 2 */
+#define RADEON_TCL_FALLBACK_TEXGEN_3 0x0080 /* texgen, unit 3 */
+#define RADEON_TCL_FALLBACK_TEXGEN_4 0x0100 /* texgen, unit 4 */
+#define RADEON_TCL_FALLBACK_TEXGEN_5 0x0200 /* texgen, unit 5 */
+#define RADEON_TCL_FALLBACK_TCL_DISABLE 0x0400 /* user disable */
+#define RADEON_TCL_FALLBACK_BITMAP 0x0800 /* draw bitmap with points */
+#define RADEON_TCL_FALLBACK_VERTEX_PROGRAM 0x1000 /* vertex program active */
+
+#if R200_MERGED
+#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode )
+#else
+#define TCL_FALLBACK( ctx, bit, mode ) ;
+#endif
+
+struct radeon_dri_mirror {
+ __DRIcontextPrivate *context; /* DRI context */
+ __DRIscreenPrivate *screen; /* DRI screen */
+ /**
+ * DRI drawable bound to this context for drawing.
+ */
+ __DRIdrawablePrivate *drawable;
+
+ /**
+ * DRI drawable bound to this context for reading.
+ */
+ __DRIdrawablePrivate *readable;
+
+ drm_context_t hwContext;
+ drm_hw_lock_t *hwLock;
+ int fd;
+ int drmMinor;
+};
+
+/**
+ * Derived state for internal purposes.
+ */
+struct radeon_scissor_state {
+ drm_clip_rect_t rect;
+ GLboolean enabled;
+
+ GLuint numClipRects; /* Cliprects active */
+ GLuint numAllocedClipRects; /* Cliprects available */
+ drm_clip_rect_t *pClipRects;
+};
+
+struct radeon_colorbuffer_state {
+ GLuint clear;
+ GLint drawOffset, drawPitch;
+};
+
+struct radeon_state {
+ struct radeon_colorbuffer_state color;
+ struct radeon_scissor_state scissor;
+};
+
+/**
+ * Common per-context variables shared by R200 and R300.
+ * R200- and R300-specific code "derive" their own context from this
+ * structure.
+ */
+struct radeon_context {
+ GLcontext *glCtx; /* Mesa context */
+ radeonScreenPtr radeonScreen; /* Screen private DRI data */
+
+ /* Fallback state */
+ GLuint Fallback;
+ GLuint TclFallback;
+
+ /* Page flipping */
+ GLuint doPageFlip;
+
+ /* Drawable, cliprect and scissor information */
+ GLuint numClipRects; /* Cliprects for the draw buffer */
+ drm_clip_rect_t *pClipRects;
+ unsigned int lastStamp;
+ GLboolean lost_context;
+ drm_radeon_sarea_t *sarea; /* Private SAREA data */
+
+ /* Mirrors of some DRI state */
+ struct radeon_dri_mirror dri;
+
+ /* Busy waiting */
+ GLuint do_usleeps;
+ GLuint do_irqs;
+ GLuint irqsEmitted;
+ drm_radeon_irq_wait_t iw;
+
+ /* VBI / buffer swap */
+ GLuint vbl_seq;
+ GLuint vblank_flags;
+
+ int64_t swap_ust;
+ int64_t swap_missed_ust;
+
+ GLuint swap_count;
+ GLuint swap_missed_count;
+
+ /* Derived state */
+ struct radeon_state state;
+
+ /* Configuration cache
+ */
+ driOptionCache optionCache;
+};
+
+#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
+
+extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
+extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+ int x, int y, int w, int h);
+extern GLboolean radeonInitContext(radeonContextPtr radeon,
+ struct dd_function_table *functions,
+ const __GLcontextModes * glVisual,
+ __DRIcontextPrivate * driContextPriv,
+ void *sharedContextPrivate);
+extern void radeonCleanupContext(radeonContextPtr radeon);
+extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+ __DRIdrawablePrivate * driDrawPriv,
+ __DRIdrawablePrivate * driReadPriv);
+extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
+
+/* ================================================================
+ * Debugging:
+ */
+#define DO_DEBUG 1
+
+#if DO_DEBUG
+extern int RADEON_DEBUG;
+#else
+#define RADEON_DEBUG 0
+#endif
+
+#define DEBUG_TEXTURE 0x0001
+#define DEBUG_STATE 0x0002
+#define DEBUG_IOCTL 0x0004
+#define DEBUG_PRIMS 0x0008
+#define DEBUG_VERTS 0x0010
+#define DEBUG_FALLBACKS 0x0020
+#define DEBUG_VFMT 0x0040
+#define DEBUG_CODEGEN 0x0080
+#define DEBUG_VERBOSE 0x0100
+#define DEBUG_DRI 0x0200
+#define DEBUG_DMA 0x0400
+#define DEBUG_SANITY 0x0800
+#define DEBUG_SYNC 0x1000
+#define DEBUG_PIXEL 0x2000
+#define DEBUG_MEMORY 0x4000
+
+#endif /* __RADEON_CONTEXT_H__ */
diff --git a/r300/radeon_ioctl.c b/r300/radeon_ioctl.c
new file mode 100644
index 0000000..0b8656b
--- /dev/null
+++ b/r300/radeon_ioctl.c
@@ -0,0 +1,394 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <sched.h>
+#include <errno.h>
+
+#include "glheader.h"
+#include "imports.h"
+#include "macros.h"
+#include "context.h"
+#include "swrast/swrast.h"
+#include "r300_context.h"
+#include "radeon_ioctl.h"
+#include "r300_ioctl.h"
+#include "r300_state.h"
+#include "radeon_reg.h"
+
+#include "drirenderbuffer.h"
+#include "vblank.h"
+
+static void radeonWaitForIdle(radeonContextPtr radeon);
+
+/* ================================================================
+ * SwapBuffers with client-side throttling
+ */
+
+static uint32_t radeonGetLastFrame(radeonContextPtr radeon)
+{
+ drm_radeon_getparam_t gp;
+ int ret;
+ uint32_t frame;
+
+ gp.param = RADEON_PARAM_LAST_FRAME;
+ gp.value = (int *)&frame;
+ ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+ &gp, sizeof(gp));
+ if (ret) {
+ fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+ ret);
+ exit(1);
+ }
+
+ return frame;
+}
+
+uint32_t radeonGetAge(radeonContextPtr radeon)
+{
+ drm_radeon_getparam_t gp;
+ int ret;
+ uint32_t age;
+
+ gp.param = RADEON_PARAM_LAST_CLEAR;
+ gp.value = (int *)&age;
+ ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+ &gp, sizeof(gp));
+ if (ret) {
+ fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+ ret);
+ exit(1);
+ }
+
+ return age;
+}
+
+static void radeonEmitIrqLocked(radeonContextPtr radeon)
+{
+ drm_radeon_irq_emit_t ie;
+ int ret;
+
+ ie.irq_seq = &radeon->iw.irq_seq;
+ ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT,
+ &ie, sizeof(ie));
+ if (ret) {
+ fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__,
+ ret);
+ exit(1);
+ }
+}
+
+static void radeonWaitIrq(radeonContextPtr radeon)
+{
+ int ret;
+
+ do {
+ ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT,
+ &radeon->iw, sizeof(radeon->iw));
+ } while (ret && (errno == EINTR || errno == EBUSY));
+
+ if (ret) {
+ fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__,
+ ret);
+ exit(1);
+ }
+}
+
+static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
+{
+ drm_radeon_sarea_t *sarea = radeon->sarea;
+
+ if (radeon->do_irqs) {
+ if (radeonGetLastFrame(radeon) < sarea->last_frame) {
+ if (!radeon->irqsEmitted) {
+ while (radeonGetLastFrame(radeon) <
+ sarea->last_frame) ;
+ } else {
+ UNLOCK_HARDWARE(radeon);
+ radeonWaitIrq(radeon);
+ LOCK_HARDWARE(radeon);
+ }
+ radeon->irqsEmitted = 10;
+ }
+
+ if (radeon->irqsEmitted) {
+ radeonEmitIrqLocked(radeon);
+ radeon->irqsEmitted--;
+ }
+ } else {
+ while (radeonGetLastFrame(radeon) < sarea->last_frame) {
+ UNLOCK_HARDWARE(radeon);
+ if (radeon->do_usleeps)
+ DO_USLEEP(1);
+ LOCK_HARDWARE(radeon);
+ }
+ }
+}
+
+/* Copy the back color buffer to the front color buffer.
+ */
+void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv,
+ const drm_clip_rect_t * rect)
+{
+ radeonContextPtr radeon;
+ GLint nbox, i, ret;
+ GLboolean missed_target;
+ int64_t ust;
+
+ assert(dPriv);
+ assert(dPriv->driContextPriv);
+ assert(dPriv->driContextPriv->driverPrivate);
+
+ radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+
+ if (RADEON_DEBUG & DEBUG_IOCTL) {
+ fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__,
+ (void *)radeon->glCtx);
+ }
+
+ r300Flush(radeon->glCtx);
+
+ LOCK_HARDWARE(radeon);
+
+ /* Throttle the frame rate -- only allow one pending swap buffers
+ * request at a time.
+ */
+ radeonWaitForFrameCompletion(radeon);
+ if (!rect)
+ {
+ UNLOCK_HARDWARE(radeon);
+ driWaitForVBlank(dPriv, &radeon->vbl_seq, radeon->vblank_flags,
+ &missed_target);
+ LOCK_HARDWARE(radeon);
+ }
+
+ nbox = dPriv->numClipRects; /* must be in locked region */
+
+ for (i = 0; i < nbox;) {
+ GLint nr = MIN2(i + RADEON_NR_SAREA_CLIPRECTS, nbox);
+ drm_clip_rect_t *box = dPriv->pClipRects;
+ drm_clip_rect_t *b = radeon->sarea->boxes;
+ GLint n = 0;
+
+ for ( ; i < nr ; i++ ) {
+
+ *b = box[i];
+
+ if (rect)
+ {
+ if (rect->x1 > b->x1)
+ b->x1 = rect->x1;
+ if (rect->y1 > b->y1)
+ b->y1 = rect->y1;
+ if (rect->x2 < b->x2)
+ b->x2 = rect->x2;
+ if (rect->y2 < b->y2)
+ b->y2 = rect->y2;
+
+ if (b->x1 < b->x2 && b->y1 < b->y2)
+ b++;
+ }
+ else
+ b++;
+
+ n++;
+ }
+ radeon->sarea->nbox = n;
+
+ ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP);
+
+ if (ret) {
+ fprintf(stderr, "DRM_RADEON_SWAP: return = %d\n",
+ ret);
+ UNLOCK_HARDWARE(radeon);
+ exit(1);
+ }
+ }
+
+ UNLOCK_HARDWARE(radeon);
+ if (!rect)
+ {
+ ((r300ContextPtr)radeon)->hw.all_dirty = GL_TRUE;
+
+ radeon->swap_count++;
+ (*dri_interface->getUST) (&ust);
+ if (missed_target) {
+ radeon->swap_missed_count++;
+ radeon->swap_missed_ust = ust - radeon->swap_ust;
+ }
+
+ radeon->swap_ust = ust;
+
+ sched_yield();
+ }
+}
+
+void radeonPageFlip(const __DRIdrawablePrivate * dPriv)
+{
+ radeonContextPtr radeon;
+ GLint ret;
+ GLboolean missed_target;
+
+ assert(dPriv);
+ assert(dPriv->driContextPriv);
+ assert(dPriv->driContextPriv->driverPrivate);
+
+ radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+
+ if (RADEON_DEBUG & DEBUG_IOCTL) {
+ fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
+ radeon->sarea->pfCurrentPage);
+ }
+
+ r300Flush(radeon->glCtx);
+ LOCK_HARDWARE(radeon);
+
+ if (!dPriv->numClipRects) {
+ UNLOCK_HARDWARE(radeon);
+ usleep(10000); /* throttle invisible client 10ms */
+ return;
+ }
+
+ /* Need to do this for the perf box placement:
+ */
+ {
+ drm_clip_rect_t *box = dPriv->pClipRects;
+ drm_clip_rect_t *b = radeon->sarea->boxes;
+ b[0] = box[0];
+ radeon->sarea->nbox = 1;
+ }
+
+ /* Throttle the frame rate -- only allow a few pending swap buffers
+ * request at a time.
+ */
+ radeonWaitForFrameCompletion(radeon);
+ UNLOCK_HARDWARE(radeon);
+ driWaitForVBlank(dPriv, &radeon->vbl_seq, radeon->vblank_flags,
+ &missed_target);
+ if (missed_target) {
+ radeon->swap_missed_count++;
+ (void)(*dri_interface->getUST) (&radeon->swap_missed_ust);
+ }
+ LOCK_HARDWARE(radeon);
+
+ ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_FLIP);
+
+ UNLOCK_HARDWARE(radeon);
+
+ if (ret) {
+ fprintf(stderr, "DRM_RADEON_FLIP: return = %d\n", ret);
+ exit(1);
+ }
+
+ radeon->swap_count++;
+ (void)(*dri_interface->getUST) (&radeon->swap_ust);
+
+ driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer,
+ radeon->sarea->pfCurrentPage);
+
+ if (radeon->sarea->pfCurrentPage == 1) {
+ radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
+ radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
+ } else {
+ radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
+ radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
+ }
+
+ if (IS_R300_CLASS(radeon->radeonScreen)) {
+ r300ContextPtr r300 = (r300ContextPtr)radeon;
+ R300_STATECHANGE(r300, cb);
+ r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset +
+ r300->radeon.radeonScreen->fbLocation;
+ r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
+
+ if (r300->radeon.radeonScreen->cpp == 4)
+ r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+ else
+ r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+
+ if (r300->radeon.sarea->tiling_enabled)
+ r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+ }
+}
+
+void radeonWaitForIdleLocked(radeonContextPtr radeon)
+{
+ int ret;
+ int i = 0;
+
+ do {
+ ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE);
+ if (ret)
+ DO_USLEEP(1);
+ } while (ret && ++i < 100);
+
+ if (ret < 0) {
+ UNLOCK_HARDWARE(radeon);
+ fprintf(stderr, "Error: R300 timed out... exiting\n");
+ exit(-1);
+ }
+}
+
+static void radeonWaitForIdle(radeonContextPtr radeon)
+{
+ LOCK_HARDWARE(radeon);
+ radeonWaitForIdleLocked(radeon);
+ UNLOCK_HARDWARE(radeon);
+}
+
+void radeonFlush(GLcontext * ctx)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+ if (IS_R300_CLASS(radeon->radeonScreen))
+ r300Flush(ctx);
+}
+
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+void radeonFinish(GLcontext * ctx)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+ radeonFlush(ctx);
+
+ if (radeon->do_irqs) {
+ LOCK_HARDWARE(radeon);
+ radeonEmitIrqLocked(radeon);
+ UNLOCK_HARDWARE(radeon);
+ radeonWaitIrq(radeon);
+ } else
+ radeonWaitForIdle(radeon);
+}
diff --git a/r300/radeon_ioctl.h b/r300/radeon_ioctl.h
new file mode 100644
index 0000000..3a80d36
--- /dev/null
+++ b/r300/radeon_ioctl.h
@@ -0,0 +1,57 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __RADEON_IOCTL_H__
+#define __RADEON_IOCTL_H__
+
+#include "simple_list.h"
+#include "radeon_dri.h"
+#include "radeon_lock.h"
+
+#include "xf86drm.h"
+#include "drm.h"
+#if 0
+#include "r200_context.h"
+#endif
+#include "radeon_drm.h"
+
+extern void radeonCopyBuffer(const __DRIdrawablePrivate * drawable,
+ const drm_clip_rect_t * rect);
+extern void radeonPageFlip(const __DRIdrawablePrivate * drawable);
+extern void radeonFlush(GLcontext * ctx);
+extern void radeonFinish(GLcontext * ctx);
+extern void radeonWaitForIdleLocked(radeonContextPtr radeon);
+extern uint32_t radeonGetAge(radeonContextPtr radeon);
+
+#endif /* __RADEON_IOCTL_H__ */
diff --git a/r300/radeon_lock.c b/r300/radeon_lock.c
new file mode 100644
index 0000000..bc3c2d6
--- /dev/null
+++ b/r300/radeon_lock.c
@@ -0,0 +1,137 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+ VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Gareth Hughes <gareth@valinux.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Kevin E. Martin <martin@valinux.com>
+ */
+
+#include "radeon_lock.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "r300_context.h"
+#include "r300_state.h"
+
+#include "framebuffer.h"
+
+#include "drirenderbuffer.h"
+
+#if DEBUG_LOCKING
+char *prevLockFile = NULL;
+int prevLockLine = 0;
+#endif
+
+/* Turn on/off page flipping according to the flags in the sarea:
+ */
+void radeonUpdatePageFlipping(radeonContextPtr rmesa)
+{
+ int use_back;
+
+ rmesa->doPageFlip = rmesa->sarea->pfState;
+ if (rmesa->glCtx->WinSysDrawBuffer) {
+ driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+ rmesa->sarea->pfCurrentPage);
+ r300UpdateDrawBuffer(rmesa->glCtx);
+ }
+
+ use_back = rmesa->glCtx->DrawBuffer ?
+ (rmesa->glCtx->DrawBuffer->_ColorDrawBufferMask[0] ==
+ BUFFER_BIT_BACK_LEFT) : 1;
+ use_back ^= (rmesa->sarea->pfCurrentPage == 1);
+
+ if (use_back) {
+ rmesa->state.color.drawOffset =
+ rmesa->radeonScreen->backOffset;
+ rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch;
+ } else {
+ rmesa->state.color.drawOffset =
+ rmesa->radeonScreen->frontOffset;
+ rmesa->state.color.drawPitch =
+ rmesa->radeonScreen->frontPitch;
+ }
+}
+
+/* Update the hardware state. This is called if another context has
+ * grabbed the hardware lock, which includes the X server. This
+ * function also updates the driver's window state after the X server
+ * moves, resizes or restacks a window -- the change will be reflected
+ * in the drawable position and clip rects. Since the X server grabs
+ * the hardware lock when it changes the window state, this routine will
+ * automatically be called after such a change.
+ */
+void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
+{
+ __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
+ __DRIdrawablePrivate *const readable = rmesa->dri.readable;
+ __DRIscreenPrivate *sPriv = rmesa->dri.screen;
+ drm_radeon_sarea_t *sarea = rmesa->sarea;
+ r300ContextPtr r300 = (r300ContextPtr) rmesa;
+
+ assert(drawable != NULL);
+
+ drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
+
+ /* The window might have moved, so we might need to get new clip
+ * rects.
+ *
+ * NOTE: This releases and regrabs the hw lock to allow the X server
+ * to respond to the DRI protocol request for new drawable info.
+ * Since the hardware state depends on having the latest drawable
+ * clip rects, all state checking must be done _after_ this call.
+ */
+ DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
+ if (drawable != readable) {
+ DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable);
+ }
+
+ if (rmesa->lastStamp != drawable->lastStamp) {
+ radeonUpdatePageFlipping(rmesa);
+ radeonSetCliprects(rmesa);
+ r300UpdateViewportOffset(rmesa->glCtx);
+ driUpdateFramebufferSize(rmesa->glCtx, drawable);
+ }
+
+ if (sarea->ctx_owner != rmesa->dri.hwContext) {
+ int i;
+
+ sarea->ctx_owner = rmesa->dri.hwContext;
+ for (i = 0; i < r300->nr_heaps; i++) {
+ DRI_AGE_TEXTURES(r300->texture_heaps[i]);
+ }
+ }
+
+ rmesa->lost_context = GL_TRUE;
+}
diff --git a/r300/radeon_lock.h b/r300/radeon_lock.h
new file mode 100644
index 0000000..c47adc9
--- /dev/null
+++ b/r300/radeon_lock.h
@@ -0,0 +1,118 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+ VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Gareth Hughes <gareth@valinux.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Kevin E. Martin <martin@valinux.com>
+ */
+
+#ifndef __RADEON_LOCK_H__
+#define __RADEON_LOCK_H__
+
+#if 0
+#include "r200_ioctl.h"
+#endif
+#include "radeon_context.h"
+
+extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
+extern void radeonUpdatePageFlipping(radeonContextPtr rmesa);
+
+/* Turn DEBUG_LOCKING on to find locking conflicts.
+ */
+#define DEBUG_LOCKING 0
+
+#if DEBUG_LOCKING
+extern char *prevLockFile;
+extern int prevLockLine;
+
+#define DEBUG_LOCK() \
+ do { \
+ prevLockFile = (__FILE__); \
+ prevLockLine = (__LINE__); \
+ } while (0)
+
+#define DEBUG_RESET() \
+ do { \
+ prevLockFile = 0; \
+ prevLockLine = 0; \
+ } while (0)
+
+#define DEBUG_CHECK_LOCK() \
+ do { \
+ if (prevLockFile) { \
+ fprintf(stderr, \
+ "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \
+ prevLockFile, prevLockLine, __FILE__, __LINE__); \
+ exit(1); \
+ } \
+ } while (0)
+
+#else
+
+#define DEBUG_LOCK()
+#define DEBUG_RESET()
+#define DEBUG_CHECK_LOCK()
+
+#endif
+
+/*
+ * !!! We may want to separate locks from locks with validation. This
+ * could be used to improve performance for those things commands that
+ * do not do any drawing !!!
+ */
+
+/* Lock the hardware and validate our state.
+ */
+#define LOCK_HARDWARE( rmesa ) \
+ do { \
+ char __ret = 0; \
+ DEBUG_CHECK_LOCK(); \
+ DRM_CAS((rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \
+ (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret); \
+ if (__ret) \
+ radeonGetLock((rmesa), 0); \
+ DEBUG_LOCK(); \
+ } while (0)
+
+#define UNLOCK_HARDWARE( rmesa ) \
+ do { \
+ DRM_UNLOCK((rmesa)->dri.fd, \
+ (rmesa)->dri.hwLock, \
+ (rmesa)->dri.hwContext); \
+ DEBUG_RESET(); \
+ } while (0)
+
+#endif /* __RADEON_LOCK_H__ */
diff --git a/r300/radeon_span.c b/r300/radeon_span.c
new file mode 100644
index 0000000..eae09d6
--- /dev/null
+++ b/r300/radeon_span.c
@@ -0,0 +1,321 @@
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+ VA Linux Systems Inc., Fremont, California.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Kevin E. Martin <martin@valinux.com>
+ * Gareth Hughes <gareth@valinux.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "glheader.h"
+#include "swrast/swrast.h"
+
+#include "r300_state.h"
+#include "radeon_ioctl.h"
+#include "r300_ioctl.h"
+#include "radeon_span.h"
+
+#include "drirenderbuffer.h"
+
+#define DBG 0
+
+/*
+ * Note that all information needed to access pixels in a renderbuffer
+ * should be obtained through the gl_renderbuffer parameter, not per-context
+ * information.
+ */
+#define LOCAL_VARS \
+ driRenderbuffer *drb = (driRenderbuffer *) rb; \
+ const __DRIdrawablePrivate *dPriv = drb->dPriv; \
+ const GLuint bottom = dPriv->h - 1; \
+ GLubyte *buf = (GLubyte *) drb->flippedData \
+ + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \
+ GLuint p; \
+ (void) p;
+
+#define LOCAL_DEPTH_VARS \
+ driRenderbuffer *drb = (driRenderbuffer *) rb; \
+ const __DRIdrawablePrivate *dPriv = drb->dPriv; \
+ const GLuint bottom = dPriv->h - 1; \
+ GLuint xo = dPriv->x; \
+ GLuint yo = dPriv->y; \
+ GLubyte *buf = (GLubyte *) drb->Base.Data;
+
+#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+
+#define Y_FLIP(Y) (bottom - (Y))
+
+#define HW_LOCK()
+
+#define HW_UNLOCK()
+
+/* ================================================================
+ * Color buffer
+ */
+
+/* 16 bit, RGB565 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x) radeon##x##_RGB565
+#define TAG2(x,y) radeon##x##_RGB565##y
+#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+#include "spantmp2.h"
+
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x) radeon##x##_ARGB8888
+#define TAG2(x,y) radeon##x##_ARGB8888##y
+#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+#include "spantmp2.h"
+
+/* ================================================================
+ * Depth buffer
+ */
+
+/* The Radeon family has depth tiling on all the time, so we have to convert
+ * the x,y coordinates into the memory bus address (mba) in the same
+ * manner as the engine. In each case, the linear block address (ba)
+ * is calculated, and then wired with x and y to produce the final
+ * memory address.
+ * The chip will do address translation on its own if the surface registers
+ * are set up correctly. It is not quite enough to get it working with hyperz
+ * too...
+ */
+
+static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
+{
+ GLuint pitch = drb->pitch;
+ if (drb->depthHasSurface) {
+ return 4 * (x + y * pitch);
+ } else {
+ GLuint ba, address = 0; /* a[0..1] = 0 */
+
+#ifdef COMPILE_R300
+ ba = (y / 8) * (pitch / 8) + (x / 8);
+#else
+ ba = (y / 16) * (pitch / 16) + (x / 16);
+#endif
+
+ address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */
+ address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */
+ address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */
+ address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
+
+ address |= (y & 0x8) << 7; /* a[10] = y[3] */
+ address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */
+ address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
+
+ return address;
+ }
+}
+
+static INLINE GLuint
+radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
+{
+ GLuint pitch = drb->pitch;
+ if (drb->depthHasSurface) {
+ return 2 * (x + y * pitch);
+ } else {
+ GLuint ba, address = 0; /* a[0] = 0 */
+
+ ba = (y / 16) * (pitch / 32) + (x / 32);
+
+ address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */
+ address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */
+ address |= (x & 0x8) << 4; /* a[7] = x[3] */
+ address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
+ address |= (y & 0x8) << 7; /* a[10] = y[3] */
+ address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */
+ address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
+
+ return address;
+ }
+}
+
+/* 16-bit depth buffer functions
+ */
+#define WRITE_DEPTH( _x, _y, d ) \
+ *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
+
+#define READ_DEPTH( d, _x, _y ) \
+ d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
+
+#define TAG(x) radeon##x##_z16
+#include "depthtmp.h"
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ *
+ * Careful: It looks like the R300 uses ZZZS byte order while the R200
+ * uses SZZZ for 24 bit depth, 8 bit stencil mode.
+ */
+#ifdef COMPILE_R300
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+ GLuint tmp = *(GLuint *)(buf + offset); \
+ tmp &= 0x000000ff; \
+ tmp |= ((d << 8) & 0xffffff00); \
+ *(GLuint *)(buf + offset) = tmp; \
+} while (0)
+#else
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+ GLuint tmp = *(GLuint *)(buf + offset); \
+ tmp &= 0xff000000; \
+ tmp |= ((d) & 0x00ffffff); \
+ *(GLuint *)(buf + offset) = tmp; \
+} while (0)
+#endif
+
+#ifdef COMPILE_R300
+#define READ_DEPTH( d, _x, _y ) \
+ do { \
+ d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
+ _y + yo )) & 0xffffff00) >> 8; \
+ }while(0)
+#else
+#define READ_DEPTH( d, _x, _y ) \
+ d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
+ _y + yo )) & 0x00ffffff;
+#endif
+
+#define TAG(x) radeon##x##_z24_s8
+#include "depthtmp.h"
+
+/* ================================================================
+ * Stencil buffer
+ */
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ */
+#ifdef COMPILE_R300
+#define WRITE_STENCIL( _x, _y, d ) \
+do { \
+ GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+ GLuint tmp = *(GLuint *)(buf + offset); \
+ tmp &= 0xffffff00; \
+ tmp |= (d) & 0xff; \
+ *(GLuint *)(buf + offset) = tmp; \
+} while (0)
+#else
+#define WRITE_STENCIL( _x, _y, d ) \
+do { \
+ GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+ GLuint tmp = *(GLuint *)(buf + offset); \
+ tmp &= 0x00ffffff; \
+ tmp |= (((d) & 0xff) << 24); \
+ *(GLuint *)(buf + offset) = tmp; \
+} while (0)
+#endif
+
+#ifdef COMPILE_R300
+#define READ_STENCIL( d, _x, _y ) \
+do { \
+ GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+ GLuint tmp = *(GLuint *)(buf + offset); \
+ d = tmp & 0x000000ff; \
+} while (0)
+#else
+#define READ_STENCIL( d, _x, _y ) \
+do { \
+ GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+ GLuint tmp = *(GLuint *)(buf + offset); \
+ d = (tmp & 0xff000000) >> 24; \
+} while (0)
+#endif
+
+#define TAG(x) radeon##x##_z24_s8
+#include "stenciltmp.h"
+
+/* Move locking out to get reasonable span performance (10x better
+ * than doing this in HW_LOCK above). WaitForIdle() is the main
+ * culprit.
+ */
+
+static void radeonSpanRenderStart(GLcontext * ctx)
+{
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+#ifdef COMPILE_R300
+ r300ContextPtr r300 = (r300ContextPtr) rmesa;
+ R300_FIREVERTICES(r300);
+#else
+ RADEON_FIREVERTICES(rmesa);
+#endif
+ LOCK_HARDWARE(rmesa);
+ radeonWaitForIdleLocked(rmesa);
+}
+
+static void radeonSpanRenderFinish(GLcontext * ctx)
+{
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ _swrast_flush(ctx);
+ UNLOCK_HARDWARE(rmesa);
+}
+
+void radeonInitSpanFuncs(GLcontext * ctx)
+{
+ struct swrast_device_driver *swdd =
+ _swrast_GetDeviceDriverReference(ctx);
+ swdd->SpanRenderStart = radeonSpanRenderStart;
+ swdd->SpanRenderFinish = radeonSpanRenderFinish;
+}
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
+{
+ if (drb->Base.InternalFormat == GL_RGBA) {
+ if (vis->redBits == 5 && vis->greenBits == 6
+ && vis->blueBits == 5) {
+ radeonInitPointers_RGB565(&drb->Base);
+ } else {
+ radeonInitPointers_ARGB8888(&drb->Base);
+ }
+ } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+ radeonInitDepthPointers_z16(&drb->Base);
+ } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+ radeonInitDepthPointers_z24_s8(&drb->Base);
+ } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+ radeonInitStencilPointers_z24_s8(&drb->Base);
+ }
+}
diff --git a/r300/radeon_state.c b/r300/radeon_state.c
new file mode 100644
index 0000000..82bfd95
--- /dev/null
+++ b/r300/radeon_state.c
@@ -0,0 +1,243 @@
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "api_arrayelt.h"
+#include "enums.h"
+#include "colormac.h"
+#include "light.h"
+
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "r300_ioctl.h"
+#include "framebuffer.h"
+
+/* =============================================================
+ * Scissoring
+ */
+
+static GLboolean intersect_rect(drm_clip_rect_t * out,
+ drm_clip_rect_t * a, drm_clip_rect_t * b)
+{
+ *out = *a;
+ if (b->x1 > out->x1)
+ out->x1 = b->x1;
+ if (b->y1 > out->y1)
+ out->y1 = b->y1;
+ if (b->x2 < out->x2)
+ out->x2 = b->x2;
+ if (b->y2 < out->y2)
+ out->y2 = b->y2;
+ if (out->x1 >= out->x2)
+ return GL_FALSE;
+ if (out->y1 >= out->y2)
+ return GL_FALSE;
+ return GL_TRUE;
+}
+
+void radeonRecalcScissorRects(radeonContextPtr radeon)
+{
+ drm_clip_rect_t *out;
+ int i;
+
+ /* Grow cliprect store?
+ */
+ if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) {
+ while (radeon->state.scissor.numAllocedClipRects <
+ radeon->numClipRects) {
+ radeon->state.scissor.numAllocedClipRects += 1; /* zero case */
+ radeon->state.scissor.numAllocedClipRects *= 2;
+ }
+
+ if (radeon->state.scissor.pClipRects)
+ FREE(radeon->state.scissor.pClipRects);
+
+ radeon->state.scissor.pClipRects =
+ MALLOC(radeon->state.scissor.numAllocedClipRects *
+ sizeof(drm_clip_rect_t));
+
+ if (radeon->state.scissor.pClipRects == NULL) {
+ radeon->state.scissor.numAllocedClipRects = 0;
+ return;
+ }
+ }
+
+ out = radeon->state.scissor.pClipRects;
+ radeon->state.scissor.numClipRects = 0;
+
+ for (i = 0; i < radeon->numClipRects; i++) {
+ if (intersect_rect(out,
+ &radeon->pClipRects[i],
+ &radeon->state.scissor.rect)) {
+ radeon->state.scissor.numClipRects++;
+ out++;
+ }
+ }
+}
+
+void radeonUpdateScissor(GLcontext* ctx)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+ if (radeon->dri.drawable) {
+ __DRIdrawablePrivate *dPriv = radeon->dri.drawable;
+ int x1 = dPriv->x + ctx->Scissor.X;
+ int y1 = dPriv->y + dPriv->h - (ctx->Scissor.Y + ctx->Scissor.Height);
+
+ radeon->state.scissor.rect.x1 = x1;
+ radeon->state.scissor.rect.y1 = y1;
+ radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width - 1;
+ radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height - 1;
+
+ radeonRecalcScissorRects(radeon);
+ }
+}
+
+static void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+ if (ctx->Scissor.Enabled) {
+ /* We don't pipeline cliprect changes */
+ r300Flush(ctx);
+ radeonUpdateScissor(ctx);
+ }
+}
+
+
+/**
+ * Update cliprects and scissors.
+ */
+void radeonSetCliprects(radeonContextPtr radeon)
+{
+ __DRIdrawablePrivate *const drawable = radeon->dri.drawable;
+ __DRIdrawablePrivate *const readable = radeon->dri.readable;
+ GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate;
+ GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate;
+
+ if (draw_fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) {
+ /* Can't ignore 2d windows if we are page flipping. */
+ if (drawable->numBackClipRects == 0 || radeon->doPageFlip ||
+ radeon->sarea->pfCurrentPage == 1) {
+ radeon->numClipRects = drawable->numClipRects;
+ radeon->pClipRects = drawable->pClipRects;
+ } else {
+ radeon->numClipRects = drawable->numBackClipRects;
+ radeon->pClipRects = drawable->pBackClipRects;
+ }
+ } else {
+ /* front buffer (or none, or multiple buffers */
+ radeon->numClipRects = drawable->numClipRects;
+ radeon->pClipRects = drawable->pClipRects;
+ }
+
+ if ((draw_fb->Width != drawable->w) ||
+ (draw_fb->Height != drawable->h)) {
+ _mesa_resize_framebuffer(radeon->glCtx, draw_fb,
+ drawable->w, drawable->h);
+ draw_fb->Initialized = GL_TRUE;
+ }
+
+ if (drawable != readable) {
+ if ((read_fb->Width != readable->w) ||
+ (read_fb->Height != readable->h)) {
+ _mesa_resize_framebuffer(radeon->glCtx, read_fb,
+ readable->w, readable->h);
+ read_fb->Initialized = GL_TRUE;
+ }
+ }
+
+ if (radeon->state.scissor.enabled)
+ radeonRecalcScissorRects(radeon);
+
+ radeon->lastStamp = drawable->lastStamp;
+}
+
+
+/**
+ * Handle common enable bits.
+ * Called as a fallback by r200Enable/r300Enable.
+ */
+void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+ switch(cap) {
+ case GL_SCISSOR_TEST:
+ /* We don't pipeline cliprect & scissor changes */
+ r300Flush(ctx);
+
+ radeon->state.scissor.enabled = state;
+ radeonUpdateScissor(ctx);
+ break;
+
+ default:
+ return;
+ }
+}
+
+
+/**
+ * Initialize default state.
+ * This function is called once at context init time from
+ * r200InitState/r300InitState
+ */
+void radeonInitState(radeonContextPtr radeon)
+{
+ radeon->Fallback = 0;
+
+ if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) {
+ radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
+ radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
+ } else {
+ radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
+ radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
+ }
+}
+
+
+/**
+ * Initialize common state functions.
+ * Called by r200InitStateFuncs/r300InitStateFuncs
+ */
+void radeonInitStateFuncs(struct dd_function_table *functions)
+{
+ functions->Scissor = radeonScissor;
+}
diff --git a/r300/radeon_state.h b/r300/radeon_state.h
new file mode 100644
index 0000000..821cb40
--- /dev/null
+++ b/r300/radeon_state.h
@@ -0,0 +1,43 @@
+/*
+Copyright (C) 2004 Nicolai Haehnle. All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __RADEON_STATE_H__
+#define __RADEON_STATE_H__
+
+extern void radeonRecalcScissorRects(radeonContextPtr radeon);
+extern void radeonSetCliprects(radeonContextPtr radeon);
+extern void radeonUpdateScissor(GLcontext* ctx);
+
+extern void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state);
+
+extern void radeonInitState(radeonContextPtr radeon);
+extern void radeonInitStateFuncs(struct dd_function_table* functions);
+
+#endif