summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuc Verhaegen <libv@skynet.be>2010-03-14 07:16:11 +0100
committerLuc Verhaegen <libv@skynet.be>2010-03-14 07:16:11 +0100
commit7cf4bebfd2c646b649d14e31c40098fec0d2278b (patch)
tree083117c8b3567cd09471d486d53c972fb9d203e9
parent50d4922305e925896a71e705c438ededbaedb80f (diff)
Import radeon, r200 and r300 dri drivers from Mesa 7.1.0.7.1.0
-rw-r--r--configure.ac6
-rw-r--r--r200/r200_cmdbuf.c1
-rw-r--r--r200/r200_context.c29
-rw-r--r--r200/r200_context.h8
-rw-r--r--r200/r200_fragshader.c10
-rw-r--r--r200/r200_ioctl.c33
-rw-r--r--r200/r200_ioctl.h17
-rw-r--r--r200/r200_lock.c1
-rw-r--r--r200/r200_lock.h1
-rw-r--r--r200/r200_maos.h1
-rw-r--r--r200/r200_maos_arrays.c1
-rw-r--r--r200/r200_pixel.c7
-rw-r--r--r200/r200_pixel.h1
-rw-r--r--r200/r200_reg.h1
-rw-r--r--r200/r200_sanity.c1
-rw-r--r--r200/r200_span.c5
-rw-r--r--r200/r200_span.h1
-rw-r--r--r200/r200_state.c29
-rw-r--r--r200/r200_state.h1
-rw-r--r--r200/r200_state_init.c1
-rw-r--r--r200/r200_swtcl.c1
-rw-r--r--r200/r200_swtcl.h1
-rw-r--r--r200/r200_tcl.c1
-rw-r--r--r200/r200_tcl.h1
-rw-r--r--r200/r200_tex.c71
-rw-r--r--r200/r200_tex.h1
-rw-r--r--r200/r200_texmem.c1
-rw-r--r--r200/r200_texstate.c11
-rw-r--r--r200/r200_vertprog.c39
-rw-r--r--r300/Makefile.am11
-rw-r--r--r300/r300_cmdbuf.c355
-rw-r--r--r300/r300_cmdbuf.h6
-rw-r--r--r300/r300_context.c40
-rw-r--r--r300/r300_context.h437
-rw-r--r--r300/r300_emit.c482
-rw-r--r--r300/r300_emit.h74
-rw-r--r--r300/r300_fragprog.c2450
-rw-r--r--r300/r300_fragprog.h132
-rw-r--r--r300/r300_fragprog_emit.c344
-rw-r--r--r300/r300_fragprog_swizzle.c227
-rw-r--r--r300/r300_fragprog_swizzle.h42
-rw-r--r--r300/r300_ioctl.c345
-rw-r--r--r300/r300_ioctl.h1
-rw-r--r--r300/r300_program.h150
-rw-r--r--r300/r300_reg.h2987
-rw-r--r--r300/r300_render.c241
-rw-r--r--r300/r300_shader.c50
-rw-r--r--r300/r300_state.c1836
-rw-r--r--r300/r300_state.h26
-rw-r--r--r300/r300_swtcl.c697
-rw-r--r--r300/r300_swtcl.h45
-rw-r--r--r300/r300_tex.c406
-rw-r--r--r300/r300_tex.h2
-rw-r--r--r300/r300_texmem.c31
-rw-r--r--r300/r300_texstate.c434
-rw-r--r--r300/r300_vertprog.c1684
-rw-r--r--r300/r300_vertprog.h92
-rw-r--r--r300/r500_fragprog.c700
-rw-r--r--r300/r500_fragprog.h62
-rw-r--r--r300/r500_fragprog_emit.c327
-rw-r--r--r300/radeon_context.c21
-rw-r--r--r300/radeon_context.h15
-rw-r--r--r300/radeon_ioctl.c28
-rw-r--r--r300/radeon_ioctl.h4
-rw-r--r--r300/radeon_lock.c4
-rw-r--r--r300/radeon_lock.h3
-rw-r--r--r300/radeon_nqssadce.c284
-rw-r--r--r300/radeon_nqssadce.h96
-rw-r--r--r300/radeon_program.c128
-rw-r--r--r300/radeon_program.h99
-rw-r--r--r300/radeon_program_alu.c658
-rw-r--r--r300/radeon_program_alu.h53
-rw-r--r--r300/radeon_program_pair.c1001
-rw-r--r--r300/radeon_program_pair.h126
-rw-r--r--r300/radeon_span.c28
-rw-r--r--r300/radeon_state.c6
-rw-r--r--radeon/Makefile.am2
-rw-r--r--radeon/radeon_chipset.h114
-rw-r--r--radeon/radeon_context.c44
-rw-r--r--radeon/radeon_context.h11
-rw-r--r--radeon/radeon_ioctl.c28
-rw-r--r--radeon/radeon_ioctl.h10
-rw-r--r--radeon/radeon_screen.c523
-rw-r--r--radeon/radeon_screen.h4
-rw-r--r--radeon/radeon_span.c4
-rw-r--r--radeon/radeon_state.c24
-rw-r--r--radeon/radeon_tex.h4
-rw-r--r--radeon/radeon_texmem.c2
-rw-r--r--radeon/radeon_texstate.c83
89 files changed, 12023 insertions, 6382 deletions
diff --git a/configure.ac b/configure.ac
index c6e6dfe..46dce4c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
# Process this file with autoconf to produce a configure script
AC_PREREQ(2.57)
-AC_INIT([mesa-dri-radeon], 7.0.3, [], mesa-dri-radeon)
+AC_INIT([mesa-dri-radeon], 7.1.0, [], mesa-dri-radeon)
AM_INIT_AUTOMAKE([dist-bzip2])
@@ -16,8 +16,8 @@ AC_PROG_CC
AC_HEADER_STDC
PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0])
-PKG_CHECK_MODULES([DRI], [libmesadri >= 7.0.3 libmesadri < 7.1.0
- libmesadricommon >= 7.0.3 libmesadricommon < 7.1.0])
+PKG_CHECK_MODULES([DRI], [libmesadri >= 7.1.0 libmesadri < 7.3.0
+ libmesadricommon >= 7.1.0 libmesadricommon < 7.3.0])
AC_OUTPUT([
Makefile
diff --git a/r200/r200_cmdbuf.c b/r200/r200_cmdbuf.c
index 2920cea..c1d51e8 100644
--- a/r200/r200_cmdbuf.c
+++ b/r200/r200_cmdbuf.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_cmdbuf.c,v 1.1 2002/10/30 12:51:51 alanh Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_context.c b/r200/r200_context.c
index 786a298..c567349 100644
--- a/r200/r200_context.c
+++ b/r200/r200_context.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_context.c,v 1.3 2003/05/06 23:52:08 daenzer Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
@@ -70,6 +69,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define need_GL_ATI_fragment_shader
#define need_GL_EXT_blend_minmax
#define need_GL_EXT_fog_coord
+#define need_GL_EXT_multi_draw_arrays
#define need_GL_EXT_secondary_color
#define need_GL_EXT_blend_equation_separate
#define need_GL_EXT_blend_func_separate
@@ -133,6 +133,7 @@ const struct dri_extension card_extensions[] =
{ "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions },
{ "GL_EXT_blend_subtract", NULL },
{ "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+ { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions },
{ "GL_EXT_secondary_color", GL_EXT_secondary_color_functions },
{ "GL_EXT_stencil_wrap", NULL },
{ "GL_EXT_texture_edge_clamp", NULL },
@@ -278,14 +279,14 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
"def_max_anisotropy");
if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
- if ( sPriv->drmMinor < 13 )
+ if ( sPriv->drm_version.minor < 13 )
fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
- "disabling.\n",sPriv->drmMinor );
+ "disabling.\n", sPriv->drm_version.minor );
else
rmesa->using_hyperz = GL_TRUE;
}
- if ( sPriv->drmMinor >= 15 )
+ if ( sPriv->drm_version.minor >= 15 )
rmesa->texmicrotile = GL_TRUE;
/* Init default driver functions then plug in our R200-specific functions
@@ -318,7 +319,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
rmesa->dri.hwContext = driContextPriv->hHWContext;
rmesa->dri.hwLock = &sPriv->pSAREA->lock;
rmesa->dri.fd = sPriv->fd;
- rmesa->dri.drmMinor = sPriv->drmMinor;
+ rmesa->dri.drmMinor = sPriv->drm_version.minor;
rmesa->r200Screen = screen;
rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
@@ -500,13 +501,10 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
fthrottle_mode,
rmesa->r200Screen->irq);
- rmesa->vblank_flags = (rmesa->r200Screen->irq != 0)
- ? driGetDefaultVBlankFlags(&rmesa->optionCache) : VBLANK_FLAG_NO_IRQ;
-
rmesa->prefer_gart_client_texturing =
(getenv("R200_GART_CLIENT_TEXTURES") != 0);
- (*dri_interface->getUST)( & rmesa->swap_ust );
+ (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
#if DO_DEBUG
@@ -667,15 +665,18 @@ r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
if (R200_DEBUG & DEBUG_DRI)
fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)newCtx->glCtx);
- if ( newCtx->dri.drawable != driDrawPriv ) {
- driDrawableInitVBlank( driDrawPriv, newCtx->vblank_flags,
- &newCtx->vbl_seq );
- }
-
newCtx->dri.readable = driReadPriv;
if ( newCtx->dri.drawable != driDrawPriv ||
newCtx->lastStamp != driDrawPriv->lastStamp ) {
+ if (driDrawPriv->swap_interval == (unsigned)-1) {
+ driDrawPriv->vblFlags = (newCtx->r200Screen->irq != 0)
+ ? driGetDefaultVBlankFlags(&newCtx->optionCache)
+ : VBLANK_FLAG_NO_IRQ;
+
+ driDrawableInitVBlank( driDrawPriv );
+ }
+
newCtx->dri.drawable = driDrawPriv;
r200SetCliprects(newCtx);
diff --git a/r200/r200_context.h b/r200/r200_context.h
index a06a2f5..ee478e7 100644
--- a/r200/r200_context.h
+++ b/r200/r200_context.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_context.h,v 1.2 2002/12/16 16:18:54 dawes Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
@@ -894,11 +893,8 @@ struct r200_context {
GLuint TexGenCompSel;
GLmatrix tmpmat;
- /* VBI / buffer swap
+ /* buffer swap
*/
- GLuint vbl_seq;
- GLuint vblank_flags;
-
int64_t swap_ust;
int64_t swap_missed_ust;
@@ -931,7 +927,7 @@ struct r200_context {
#define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx))
-static __inline GLuint r200PackColor( GLuint cpp,
+static INLINE GLuint r200PackColor( GLuint cpp,
GLubyte r, GLubyte g,
GLubyte b, GLubyte a )
{
diff --git a/r200/r200_fragshader.c b/r200/r200_fragshader.c
index 5dd3ada..d514b28 100644
--- a/r200/r200_fragshader.c
+++ b/r200/r200_fragshader.c
@@ -24,13 +24,13 @@
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "tnl/t_context.h"
-#include "atifragshader.h"
-#include "program.h"
+#include "shader/atifragshader.h"
+#include "shader/program.h"
#include "r200_context.h"
#include "r200_ioctl.h"
#include "r200_tex.h"
diff --git a/r200/r200_ioctl.c b/r200/r200_ioctl.c
index 463bd64..20482a4 100644
--- a/r200/r200_ioctl.c
+++ b/r200/r200_ioctl.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_ioctl.c,v 1.4 2002/12/17 00:32:56 dawes Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
@@ -420,13 +419,14 @@ static void r200WaitForFrameCompletion( r200ContextPtr rmesa )
/* Copy the back color buffer to the front color buffer.
*/
-void r200CopyBuffer( const __DRIdrawablePrivate *dPriv,
+void r200CopyBuffer( __DRIdrawablePrivate *dPriv,
const drm_clip_rect_t *rect)
{
r200ContextPtr rmesa;
GLint nbox, i, ret;
GLboolean missed_target;
int64_t ust;
+ __DRIscreenPrivate *psp = dPriv->driScreenPriv;
assert(dPriv);
assert(dPriv->driContextPriv);
@@ -450,7 +450,7 @@ void r200CopyBuffer( const __DRIdrawablePrivate *dPriv,
if (!rect)
{
UNLOCK_HARDWARE( rmesa );
- driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
+ driWaitForVBlank( dPriv, & missed_target );
LOCK_HARDWARE( rmesa );
}
@@ -477,16 +477,18 @@ void r200CopyBuffer( const __DRIdrawablePrivate *dPriv,
if (rect->y2 < b->y2)
b->y2 = rect->y2;
- if (b->x1 < b->x2 && b->y1 < b->y2)
- b++;
+ if (b->x1 >= b->x2 || b->y1 >= b->y2)
+ continue;
}
- else
- b++;
+ b++;
n++;
}
rmesa->sarea->nbox = n;
+ if (!n)
+ continue;
+
ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
if ( ret ) {
@@ -502,7 +504,7 @@ void r200CopyBuffer( const __DRIdrawablePrivate *dPriv,
rmesa->hw.all_dirty = GL_TRUE;
rmesa->swap_count++;
- (*dri_interface->getUST)( & ust );
+ (*psp->systemTime->getUST)( & ust );
if ( missed_target ) {
rmesa->swap_missed_count++;
rmesa->swap_missed_ust = ust - rmesa->swap_ust;
@@ -514,11 +516,12 @@ void r200CopyBuffer( const __DRIdrawablePrivate *dPriv,
}
}
-void r200PageFlip( const __DRIdrawablePrivate *dPriv )
+void r200PageFlip( __DRIdrawablePrivate *dPriv )
{
r200ContextPtr rmesa;
GLint ret;
GLboolean missed_target;
+ __DRIscreenPrivate *psp = dPriv->driScreenPriv;
assert(dPriv);
assert(dPriv->driContextPriv);
@@ -554,10 +557,10 @@ void r200PageFlip( const __DRIdrawablePrivate *dPriv )
*/
r200WaitForFrameCompletion( rmesa );
UNLOCK_HARDWARE( rmesa );
- driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
+ driWaitForVBlank( dPriv, & missed_target );
if ( missed_target ) {
rmesa->swap_missed_count++;
- (void) (*dri_interface->getUST)( & rmesa->swap_missed_ust );
+ (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
}
LOCK_HARDWARE( rmesa );
@@ -571,7 +574,7 @@ void r200PageFlip( const __DRIdrawablePrivate *dPriv )
}
rmesa->swap_count++;
- (void) (*dri_interface->getUST)( & rmesa->swap_ust );
+ (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
#if 000
if ( rmesa->sarea->pfCurrentPage == 1 ) {
@@ -858,7 +861,7 @@ void r200Finish( GLcontext *ctx )
* the kernel data structures, and the current context to get the
* device fd.
*/
-void *r200AllocateMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLsizei size,
+void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
GLfloat readfreq, GLfloat writefreq,
GLfloat priority)
{
@@ -900,7 +903,7 @@ void *r200AllocateMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLsizei size,
/* Called via glXFreeMemoryMESA() */
-void r200FreeMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLvoid *pointer)
+void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer)
{
GET_CURRENT_CONTEXT(ctx);
r200ContextPtr rmesa;
@@ -937,7 +940,7 @@ void r200FreeMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLvoid *pointer)
}
/* Called via glXGetMemoryOffsetMESA() */
-GLuint r200GetMemoryOffsetMESA(__DRInativeDisplay *dpy, int scrn, const GLvoid *pointer)
+GLuint r200GetMemoryOffsetMESA(__DRIscreen *screen, const GLvoid *pointer)
{
GET_CURRENT_CONTEXT(ctx);
r200ContextPtr rmesa;
diff --git a/r200/r200_ioctl.h b/r200/r200_ioctl.h
index f537527..31431b7 100644
--- a/r200/r200_ioctl.h
+++ b/r200/r200_ioctl.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_ioctl.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
@@ -90,19 +89,19 @@ extern void r200ReleaseDmaRegion( r200ContextPtr rmesa,
struct r200_dma_region *region,
const char *caller );
-extern void r200CopyBuffer( const __DRIdrawablePrivate *drawable,
+extern void r200CopyBuffer( __DRIdrawablePrivate *drawable,
const drm_clip_rect_t *rect);
-extern void r200PageFlip( const __DRIdrawablePrivate *drawable );
+extern void r200PageFlip( __DRIdrawablePrivate *drawable );
extern void r200Flush( GLcontext *ctx );
extern void r200Finish( GLcontext *ctx );
extern void r200WaitForIdleLocked( r200ContextPtr rmesa );
extern void r200WaitForVBlank( r200ContextPtr rmesa );
extern void r200InitIoctlFuncs( struct dd_function_table *functions );
-extern void *r200AllocateMemoryMESA( __DRInativeDisplay *dpy, int scrn, GLsizei size, GLfloat readfreq,
+extern void *r200AllocateMemoryMESA( __DRIscreen *screen, GLsizei size, GLfloat readfreq,
GLfloat writefreq, GLfloat priority );
-extern void r200FreeMemoryMESA( __DRInativeDisplay *dpy, int scrn, GLvoid *pointer );
-extern GLuint r200GetMemoryOffsetMESA( __DRInativeDisplay *dpy, int scrn, const GLvoid *pointer );
+extern void r200FreeMemoryMESA( __DRIscreen *screen, GLvoid *pointer );
+extern GLuint r200GetMemoryOffsetMESA( __DRIscreen *screen, const GLvoid *pointer );
extern GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
GLint size );
@@ -138,7 +137,7 @@ do { \
memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \
rmesa->hw.ATOM.cmd_size * 4)
-static __inline int R200_DB_STATECHANGE(
+static INLINE int R200_DB_STATECHANGE(
r200ContextPtr rmesa,
struct r200_state_atom *atom )
{
@@ -184,7 +183,7 @@ do { \
* and hang on to the lock until the critical section is finished and we flush
* the buffer again and unlock.
*/
-static __inline void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes )
+static INLINE void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes )
{
if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
r200FlushCmdBuf( rmesa, __FUNCTION__ );
@@ -193,7 +192,7 @@ static __inline void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes )
/* Alloc space in the command buffer
*/
-static __inline char *r200AllocCmdBuf( r200ContextPtr rmesa,
+static INLINE char *r200AllocCmdBuf( r200ContextPtr rmesa,
int bytes, const char *where )
{
char * head;
diff --git a/r200/r200_lock.c b/r200/r200_lock.c
index b050dd7..f89b526 100644
--- a/r200/r200_lock.c
+++ b/r200/r200_lock.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_lock.c,v 1.1 2002/10/30 12:51:52 alanh Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_lock.h b/r200/r200_lock.h
index e4c3a7e..4ff9890 100644
--- a/r200/r200_lock.h
+++ b/r200/r200_lock.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_lock.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_maos.h b/r200/r200_maos.h
index 4998f67..d3ed06d 100644
--- a/r200/r200_maos.h
+++ b/r200/r200_maos.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_maos.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_maos_arrays.c b/r200/r200_maos_arrays.c
index 3162b50..7bc05e2 100644
--- a/r200/r200_maos_arrays.c
+++ b/r200/r200_maos_arrays.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_maos_arrays.c,v 1.3 2003/02/23 23:59:01 dawes Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_pixel.c b/r200/r200_pixel.c
index 7b060f9..46d2307 100644
--- a/r200/r200_pixel.c
+++ b/r200/r200_pixel.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_pixel.c,v 1.2 2002/12/16 16:18:54 dawes Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
@@ -295,7 +294,7 @@ static void do_draw_pix( GLcontext *ctx,
r200ContextPtr rmesa = R200_CONTEXT(ctx);
__DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
drm_clip_rect_t *box = dPriv->pClipRects;
- struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0][0];
+ struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0];
driRenderbuffer *drb = (driRenderbuffer *) rb;
int nbox = dPriv->numClipRects;
int i;
@@ -383,13 +382,13 @@ r200TryDrawPixels( GLcontext *ctx,
GLint pitch = unpack->RowLength ? unpack->RowLength : width;
GLuint planemask;
GLuint cpp = rmesa->r200Screen->cpp;
- GLint size = width * pitch * cpp;
+ GLint size = height * pitch * cpp;
if (R200_DEBUG & DEBUG_PIXEL)
fprintf(stderr, "%s\n", __FUNCTION__);
/* check that we're drawing to exactly one color buffer */
- if (ctx->DrawBuffer->_NumColorDrawBuffers[0] != 1)
+ if (ctx->DrawBuffer->_NumColorDrawBuffers != 1)
return GL_FALSE;
switch (format) {
diff --git a/r200/r200_pixel.h b/r200/r200_pixel.h
index 8f3923b..e62aa05 100644
--- a/r200/r200_pixel.h
+++ b/r200/r200_pixel.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_pixel.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_reg.h b/r200/r200_reg.h
index a88ea4c..5ce287f 100644
--- a/r200/r200_reg.h
+++ b/r200/r200_reg.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_reg.h,v 1.2 2002/12/16 16:18:54 dawes Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_sanity.c b/r200/r200_sanity.c
index 3f2a866..00d2f65 100644
--- a/r200/r200_sanity.c
+++ b/r200/r200_sanity.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_sanity.c,v 1.1 2002/10/30 12:51:52 alanh Exp $ */
/**************************************************************************
Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
diff --git a/r200/r200_span.c b/r200/r200_span.c
index 6e99dfe..3d7a000 100644
--- a/r200/r200_span.c
+++ b/r200/r200_span.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_span.c,v 1.1 2002/10/30 12:51:52 alanh Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
@@ -173,6 +172,7 @@ r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y )
/* 16-bit depth buffer functions
*/
+#define VALUE_TYPE GLushort
#define WRITE_DEPTH( _x, _y, d ) \
*(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d;
@@ -186,6 +186,7 @@ r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y )
/* 24 bit depth, 8 bit stencil depthbuffer functions
*/
+#define VALUE_TYPE GLuint
#define WRITE_DEPTH( _x, _y, d ) \
do { \
@@ -256,7 +257,7 @@ static void r200SpanRenderStart( GLcontext *ctx )
{
int p;
driRenderbuffer *drb =
- (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0][0];
+ (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
volatile int *buf =
(volatile int *)(rmesa->dri.screen->pFB + drb->offset);
p = *buf;
diff --git a/r200/r200_span.h b/r200/r200_span.h
index 5e7d3e4..bae5644 100644
--- a/r200/r200_span.h
+++ b/r200/r200_span.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_span.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_state.c b/r200/r200_state.c
index 16726d7..cf37517 100644
--- a/r200/r200_state.c
+++ b/r200/r200_state.c
@@ -1,4 +1,3 @@
-/* $XFree86$ */
/**************************************************************************
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
@@ -773,9 +772,11 @@ static void r200LineWidth( GLcontext *ctx, GLfloat widthf )
R200_STATECHANGE( rmesa, set );
/* Line width is stored in U6.4 format.
+ * Same min/max limits for AA, non-AA lines.
*/
rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= ~0xffff;
- rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)(ctx->Line._Width * 16.0);
+ rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)
+ (CLAMP(widthf, ctx->Const.MinLineWidth, ctx->Const.MaxLineWidth) * 16.0);
if ( widthf > 1.0 ) {
rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_WIDELINE_ENABLE;
@@ -1858,8 +1859,7 @@ void r200SetCliprects( r200ContextPtr rmesa )
GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
- if (draw_fb->_ColorDrawBufferMask[0]
- == BUFFER_BIT_BACK_LEFT) {
+ if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BIT_BACK_LEFT) {
/* Can't ignore 2d windows if we are page flipping.
*/
if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
@@ -1909,17 +1909,18 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
R200_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */
- /*
- * _ColorDrawBufferMask is easier to cope with than <mode>.
- * Check for software fallback, update cliprects.
- */
- switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
- case BUFFER_BIT_FRONT_LEFT:
- case BUFFER_BIT_BACK_LEFT:
+ if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+ /* 0 (GL_NONE) buffers or multiple color drawing buffers */
+ FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
+ return;
+ }
+
+ switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+ case BUFFER_FRONT_LEFT:
+ case BUFFER_BACK_LEFT:
FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
break;
default:
- /* 0 (GL_NONE) buffers or multiple color drawing buffers */
FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
return;
}
@@ -2444,11 +2445,11 @@ r200UpdateDrawBuffer(GLcontext *ctx)
struct gl_framebuffer *fb = ctx->DrawBuffer;
driRenderbuffer *drb;
- if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
+ if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
/* draw to front */
drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
}
- else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) {
+ else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
/* draw to back */
drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
}
diff --git a/r200/r200_state.h b/r200/r200_state.h
index f34090b..a917163 100644
--- a/r200/r200_state.h
+++ b/r200/r200_state.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_state.h,v 1.2 2002/11/05 17:46:08 tsi Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_state_init.c b/r200/r200_state_init.c
index b40d0bd..0c36cef 100644
--- a/r200/r200_state_init.c
+++ b/r200/r200_state_init.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_state_init.c,v 1.4 2003/02/22 06:21:11 dawes Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_swtcl.c b/r200/r200_swtcl.c
index 25d229d..a1ea019 100644
--- a/r200/r200_swtcl.c
+++ b/r200/r200_swtcl.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_swtcl.c,v 1.5 2003/05/06 23:52:08 daenzer Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_swtcl.h b/r200/r200_swtcl.h
index ccf8179..7458c54 100644
--- a/r200/r200_swtcl.h
+++ b/r200/r200_swtcl.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_swtcl.h,v 1.3 2003/05/06 23:52:08 daenzer Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_tcl.c b/r200/r200_tcl.c
index 1ff0cf9..78347d3 100644
--- a/r200/r200_tcl.c
+++ b/r200/r200_tcl.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tcl.c,v 1.2 2002/12/16 16:18:55 dawes Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_tcl.h b/r200/r200_tcl.h
index ac5bc11..f191ddc 100644
--- a/r200/r200_tcl.h
+++ b/r200/r200_tcl.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tcl.h,v 1.2 2002/12/16 16:18:55 dawes Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_tex.c b/r200/r200_tex.c
index 6c6450c..24b9b3b 100644
--- a/r200/r200_tex.c
+++ b/r200/r200_tex.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tex.c,v 1.2 2002/11/05 17:46:08 tsi Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
@@ -103,37 +102,39 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum
_mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
}
- switch ( twrap ) {
- case GL_REPEAT:
- t->pp_txfilter |= R200_CLAMP_T_WRAP;
- break;
- case GL_CLAMP:
- t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL;
- is_clamp = GL_TRUE;
- break;
- case GL_CLAMP_TO_EDGE:
- t->pp_txfilter |= R200_CLAMP_T_CLAMP_LAST;
- break;
- case GL_CLAMP_TO_BORDER:
- t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL;
- is_clamp_to_border = GL_TRUE;
- break;
- case GL_MIRRORED_REPEAT:
- t->pp_txfilter |= R200_CLAMP_T_MIRROR;
- break;
- case GL_MIRROR_CLAMP_EXT:
- t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL;
- is_clamp = GL_TRUE;
- break;
- case GL_MIRROR_CLAMP_TO_EDGE_EXT:
- t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_LAST;
- break;
- case GL_MIRROR_CLAMP_TO_BORDER_EXT:
- t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL;
- is_clamp_to_border = GL_TRUE;
- break;
- default:
- _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
+ if (t->base.tObj->Target != GL_TEXTURE_1D) {
+ switch ( twrap ) {
+ case GL_REPEAT:
+ t->pp_txfilter |= R200_CLAMP_T_WRAP;
+ break;
+ case GL_CLAMP:
+ t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL;
+ is_clamp = GL_TRUE;
+ break;
+ case GL_CLAMP_TO_EDGE:
+ t->pp_txfilter |= R200_CLAMP_T_CLAMP_LAST;
+ break;
+ case GL_CLAMP_TO_BORDER:
+ t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL;
+ is_clamp_to_border = GL_TRUE;
+ break;
+ case GL_MIRRORED_REPEAT:
+ t->pp_txfilter |= R200_CLAMP_T_MIRROR;
+ break;
+ case GL_MIRROR_CLAMP_EXT:
+ t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL;
+ is_clamp = GL_TRUE;
+ break;
+ case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+ t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_LAST;
+ break;
+ case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+ t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL;
+ is_clamp_to_border = GL_TRUE;
+ break;
+ default:
+ _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
+ }
}
t->pp_txformat_x &= ~R200_CLAMP_Q_MASK;
@@ -182,7 +183,7 @@ static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
{
t->pp_txfilter &= ~R200_MAX_ANISO_MASK;
- if ( max == 1.0 ) {
+ if ( max <= 1.0 ) {
t->pp_txfilter |= R200_MAX_ANISO_1_TO_1;
} else if ( max <= 2.0 ) {
t->pp_txfilter |= R200_MAX_ANISO_2_TO_1;
@@ -483,7 +484,7 @@ r200ValidateClientStorage( GLcontext *ctx, GLenum target,
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
- if (0)
+ if ( R200_DEBUG & DEBUG_TEXTURE )
fprintf(stderr, "intformat %s format %s type %s\n",
_mesa_lookup_enum_by_nr( internalFormat ),
_mesa_lookup_enum_by_nr( format ),
@@ -549,7 +550,7 @@ r200ValidateClientStorage( GLcontext *ctx, GLenum target,
format, type);
- if (0)
+ if ( R200_DEBUG & DEBUG_TEXTURE )
fprintf(stderr, "%s: srcRowStride %d/%x\n",
__FUNCTION__, srcRowStride, srcRowStride);
diff --git a/r200/r200_tex.h b/r200/r200_tex.h
index 68e9a0e..10ff8e8 100644
--- a/r200/r200_tex.h
+++ b/r200/r200_tex.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tex.h,v 1.1 2002/10/30 12:51:53 alanh Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
diff --git a/r200/r200_texmem.c b/r200/r200_texmem.c
index 9daafcf..183c4ca 100644
--- a/r200/r200_texmem.c
+++ b/r200/r200_texmem.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_texmem.c,v 1.5 2002/12/17 00:32:56 dawes Exp $ */
/**************************************************************************
Copyright (C) Tungsten Graphics 2002. All Rights Reserved.
diff --git a/r200/r200_texstate.c b/r200/r200_texstate.c
index d12c3bc..4edf304 100644
--- a/r200/r200_texstate.c
+++ b/r200/r200_texstate.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_texstate.c,v 1.3 2003/02/15 22:18:47 dawes Exp $ */
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
@@ -980,9 +979,7 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
unsigned long long offset, GLint depth, GLuint pitch)
{
- r200ContextPtr rmesa =
- (r200ContextPtr) ((__DRIcontextPrivate *) pDRICtx->private)->
- driverPrivate;
+ r200ContextPtr rmesa = pDRICtx->driverPrivate;
struct gl_texture_object *tObj =
_mesa_lookup_texture(rmesa->glCtx, texname);
r200TexObjPtr t;
@@ -1818,6 +1815,12 @@ void r200UpdateTextureState( GLcontext *ctx )
GLboolean ok;
GLuint dbg;
+ /* NOTE: must not manipulate rmesa->state.texture.unit[].unitneeded or
+ rmesa->state.envneeded before a R200_STATECHANGE (or R200_NEWPRIM) since
+ we use these to determine if we want to emit the corresponding state
+ atoms. */
+ R200_NEWPRIM( rmesa );
+
if (ctx->ATIFragmentShader._Enabled) {
GLuint i;
for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
diff --git a/r200/r200_vertprog.c b/r200/r200_vertprog.c
index 6089d61..562992f 100644
--- a/r200/r200_vertprog.c
+++ b/r200/r200_vertprog.c
@@ -30,10 +30,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* Aapo Tahkola <aet@rasterburn.org>
* Roland Scheidegger <rscheidegger_lists@hispeed.ch>
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-#include "program.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
#include "shader/prog_instruction.h"
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
@@ -154,7 +154,7 @@ static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_p
return GL_TRUE;
}
-static __inline unsigned long t_dst_mask(GLuint mask)
+static INLINE unsigned long t_dst_mask(GLuint mask)
{
/* WRITEMASK_* is equivalent to VSF_FLAG_* */
return mask & VSF_FLAG_ALL;
@@ -215,6 +215,7 @@ static unsigned long t_src_class(enum register_file file)
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
case PROGRAM_STATE_VAR:
return VSF_IN_CLASS_PARAM;
/*
@@ -228,7 +229,7 @@ static unsigned long t_src_class(enum register_file file)
}
}
-static __inline unsigned long t_swizzle(GLubyte swizzle)
+static INLINE unsigned long t_swizzle(GLubyte swizzle)
{
/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
return swizzle;
@@ -408,6 +409,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte
int fog_temp_i = 0;
int free_inputs;
int array_count = 0;
+ int u_temp_used;
vp->native = GL_FALSE;
vp->translated = GL_TRUE;
@@ -744,9 +746,16 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte
goto next;
case OPCODE_MAD:
+ /* only 2 read ports into temp memory thus may need the macro op MAD_2
+ instead (requiring 2 clocks) if all inputs are in temp memory
+ (and, only if they actually reference 3 distinct temps) */
hw_op=(src[0].File == PROGRAM_TEMPORARY &&
src[1].File == PROGRAM_TEMPORARY &&
- src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
+ src[2].File == PROGRAM_TEMPORARY &&
+ (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
+ (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
+ (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
+ R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
t_dst_mask(dst.WriteMask));
@@ -874,8 +883,11 @@ else {
case OPCODE_XPD:
/* mul r0, r1.yzxw, r2.zxyw
mad r0, -r2.yzxw, r1.zxyw, r0
- NOTE: might need MAD_2
*/
+ hw_op=(src[0].File == PROGRAM_TEMPORARY &&
+ src[1].File == PROGRAM_TEMPORARY &&
+ (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
+ R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
@@ -901,7 +913,7 @@ else {
o_inst++;
u_temp_i--;
- o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst),
+ o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
t_dst_mask(dst.WriteMask));
o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
@@ -1051,14 +1063,15 @@ else {
dofogfix = 0;
}
+ u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
if (mesa_vp->Base.NumNativeTemporaries <
- (mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) {
+ (mesa_vp->Base.NumTemporaries + u_temp_used)) {
mesa_vp->Base.NumNativeTemporaries =
- mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i);
+ mesa_vp->Base.NumTemporaries + u_temp_used;
}
- if (u_temp_i < mesa_vp->Base.NumTemporaries) {
+ if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
if (R200_DEBUG & DEBUG_FALLBACKS) {
- fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
+ fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
}
return GL_FALSE;
}
diff --git a/r300/Makefile.am b/r300/Makefile.am
index 0992115..a1fa2ef 100644
--- a/r300/Makefile.am
+++ b/r300/Makefile.am
@@ -24,7 +24,16 @@ r300_dri_la_SOURCES = \
r300_texmem.c \
r300_tex.c \
r300_texstate.c \
+ radeon_program.c \
+ radeon_program_alu.c \
+ radeon_program_pair.c \
+ radeon_nqssadce.c \
r300_vertprog.c \
r300_fragprog.c \
+ r300_fragprog_swizzle.c \
+ r300_fragprog_emit.c \
+ r500_fragprog.c \
+ r500_fragprog_emit.c \
r300_shader.c \
- r300_emit.c
+ r300_emit.c \
+ r300_swtcl.c
diff --git a/r300/r300_cmdbuf.c b/r300/r300_cmdbuf.c
index 3befa58..c069660 100644
--- a/r300/r300_cmdbuf.c
+++ b/r300/r300_cmdbuf.c
@@ -133,13 +133,15 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat
int i;
int dwords = (*state->check) (r300, state);
- fprintf(stderr, " emit %s/%d/%d\n", state->name, dwords,
+ fprintf(stderr, " emit %s %d/%d\n", state->name, dwords,
state->cmd_size);
- if (RADEON_DEBUG & DEBUG_VERBOSE)
- for (i = 0; i < dwords; i++)
- fprintf(stderr, " %s[%d]: %08X\n",
+ if (RADEON_DEBUG & DEBUG_VERBOSE) {
+ for (i = 0; i < dwords; i++) {
+ fprintf(stderr, " %s[%d]: %08x\n",
state->name, i, state->cmd[i]);
+ }
+ }
}
/**
@@ -148,35 +150,21 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat
* The caller must have ensured that there is enough space in the command
* buffer.
*/
-static __inline__ void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
+static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
{
struct r300_state_atom *atom;
uint32_t *dest;
+ int dwords;
dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
- foreach(atom, &r300->hw.atomlist) {
- if ((atom->dirty || r300->hw.all_dirty) == dirty) {
- int dwords = (*atom->check) (r300, atom);
-
- if (dwords)
- r300PrintStateAtom(r300, atom);
- else
- fprintf(stderr,
- " skip state %s\n",
- atom->name);
- }
- }
- }
-
/* Emit WAIT */
*dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN);
dest++;
r300->cmdbuf.count_used++;
/* Emit cache flush */
- *dest = cmdpacket0(R300_TX_CNTL, 1);
+ *dest = cmdpacket0(R300_TX_INVALTAGS, 1);
dest++;
r300->cmdbuf.count_used++;
@@ -193,13 +181,20 @@ static __inline__ void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
foreach(atom, &r300->hw.atomlist) {
if ((atom->dirty || r300->hw.all_dirty) == dirty) {
- int dwords = (*atom->check) (r300, atom);
-
+ dwords = (*atom->check) (r300, atom);
if (dwords) {
+ if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
+ r300PrintStateAtom(r300, atom);
+ }
memcpy(dest, atom->cmd, dwords * 4);
dest += dwords;
r300->cmdbuf.count_used += dwords;
atom->dirty = GL_FALSE;
+ } else {
+ if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
+ fprintf(stderr, " skip state %s\n",
+ atom->name);
+ }
}
}
}
@@ -245,22 +240,43 @@ void r300EmitState(r300ContextPtr r300)
r300->hw.all_dirty = GL_FALSE;
}
-#define CHECK( NM, COUNT ) \
-static int check_##NM( r300ContextPtr r300, \
- struct r300_state_atom* atom ) \
-{ \
- (void) atom; (void) r300; \
- return (COUNT); \
-}
-
#define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count)
#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
+#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
+
+static int check_always(r300ContextPtr r300, struct r300_state_atom *atom)
+{
+ return atom->cmd_size;
+}
+
+static int check_variable(r300ContextPtr r300, struct r300_state_atom *atom)
+{
+ int cnt;
+ cnt = packet0_count(atom->cmd);
+ return cnt ? cnt + 1 : 0;
+}
+
+static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom)
+{
+ int cnt;
+ cnt = vpu_count(atom->cmd);
+ return cnt ? (cnt * 4) + 1 : 0;
+}
+
+static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom)
+{
+ int cnt;
+ cnt = r500fp_count(atom->cmd);
+ return cnt ? (cnt * 6) + 1 : 0;
+}
+
+static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
+{
+ int cnt;
+ cnt = r500fp_count(atom->cmd);
+ return cnt ? (cnt * 4) + 1 : 0;
+}
-CHECK(always, atom->cmd_size)
- CHECK(variable, packet0_count(atom->cmd) ? (1 + packet0_count(atom->cmd)) : 0)
- CHECK(vpu, vpu_count(atom->cmd) ? (1 + vpu_count(atom->cmd) * 4) : 0)
-#undef packet0_count
-#undef vpu_count
#define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \
do { \
r300->hw.ATOM.cmd_size = (SZ); \
@@ -280,10 +296,15 @@ void r300InitCmdBuf(r300ContextPtr r300)
{
int size, mtu;
int has_tcl = 1;
+ int is_r500 = 0;
+ int i;
if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
has_tcl = 0;
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ is_r500 = 1;
+
r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */
mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
@@ -298,30 +319,41 @@ void r300InitCmdBuf(r300ContextPtr r300)
/* Initialize state atoms */
ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0);
r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6);
- ALLOC_STATE(vap_cntl, always, 2, 0);
- r300->hw.vap_cntl.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1);
+ ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0);
+ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1);
+ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0;
+ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1);
+ if (is_r500) {
+ ALLOC_STATE(vap_index_offset, always, 2, 0);
+ r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1);
+ r300->hw.vap_index_offset.cmd[1] = 0;
+ }
ALLOC_STATE(vte, always, 3, 0);
r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2);
- ALLOC_STATE(unk2134, always, 3, 0);
- r300->hw.unk2134.cmd[0] = cmdpacket0(0x2134, 2);
+ ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0);
+ r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(R300_VAP_VF_MAX_VTX_INDX, 2);
ALLOC_STATE(vap_cntl_status, always, 2, 0);
r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1);
ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0);
r300->hw.vir[0].cmd[R300_VIR_CMD_0] =
- cmdpacket0(R300_VAP_INPUT_ROUTE_0_0, 1);
+ cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1);
ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1);
r300->hw.vir[1].cmd[R300_VIR_CMD_0] =
- cmdpacket0(R300_VAP_INPUT_ROUTE_1_0, 1);
+ cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0);
- r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_INPUT_CNTL_0, 2);
- ALLOC_STATE(unk21DC, always, 2, 0);
- r300->hw.unk21DC.cmd[0] = cmdpacket0(0x21DC, 1);
- ALLOC_STATE(vap_clip_cntl, always, 2, 0);
- r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1);
- ALLOC_STATE(unk2220, always, 5, 0);
- r300->hw.unk2220.cmd[0] = cmdpacket0(0x2220, 4);
- ALLOC_STATE(unk2288, always, 2, 0);
- r300->hw.unk2288.cmd[0] = cmdpacket0(R300_VAP_UNKNOWN_2288, 1);
+ r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2);
+ ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0);
+ r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
+
+ if (has_tcl) {
+ ALLOC_STATE(vap_clip_cntl, always, 2, 0);
+ r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1);
+ ALLOC_STATE(vap_clip, always, 5, 0);
+ r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4);
+ ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0);
+ r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1);
+ }
+
ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0);
r300->hw.vof.cmd[R300_VOF_CMD_0] =
cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2);
@@ -329,7 +361,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
if (has_tcl) {
ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0);
r300->hw.pvs.cmd[R300_PVS_CMD_0] =
- cmdpacket0(R300_VAP_PVS_CNTL_1, 3);
+ cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3);
}
ALLOC_STATE(gb_enable, always, 2, 0);
@@ -338,130 +370,183 @@ void r300InitCmdBuf(r300ContextPtr r300)
r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5);
ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1);
- ALLOC_STATE(unk4200, always, 5, 0);
- r300->hw.unk4200.cmd[0] = cmdpacket0(0x4200, 4);
- ALLOC_STATE(unk4214, always, 2, 0);
- r300->hw.unk4214.cmd[0] = cmdpacket0(0x4214, 1);
+ ALLOC_STATE(ga_point_s0, always, 5, 0);
+ r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4);
+ ALLOC_STATE(ga_triangle_stipple, always, 2, 0);
+ r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1);
ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0);
- r300->hw.ps.cmd[0] = cmdpacket0(R300_RE_POINTSIZE, 1);
- ALLOC_STATE(unk4230, always, 4, 0);
- r300->hw.unk4230.cmd[0] = cmdpacket0(0x4230, 3);
+ r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1);
+ ALLOC_STATE(ga_point_minmax, always, 4, 0);
+ r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3);
ALLOC_STATE(lcntl, always, 2, 0);
- r300->hw.lcntl.cmd[0] = cmdpacket0(R300_RE_LINE_CNT, 1);
- ALLOC_STATE(unk4260, always, 4, 0);
- r300->hw.unk4260.cmd[0] = cmdpacket0(0x4260, 3);
+ r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1);
+ ALLOC_STATE(ga_line_stipple, always, 4, 0);
+ r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3);
ALLOC_STATE(shade, always, 5, 0);
- r300->hw.shade.cmd[0] = cmdpacket0(R300_RE_SHADE, 4);
+ r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4);
ALLOC_STATE(polygon_mode, always, 4, 0);
- r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3);
+ r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3);
ALLOC_STATE(fogp, always, 3, 0);
- r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2);
+ r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2);
ALLOC_STATE(zbias_cntl, always, 2, 0);
- r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_RE_ZBIAS_CNTL, 1);
+ r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1);
ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0);
r300->hw.zbs.cmd[R300_ZBS_CMD_0] =
- cmdpacket0(R300_RE_ZBIAS_T_FACTOR, 4);
+ cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
ALLOC_STATE(occlusion_cntl, always, 2, 0);
- r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1);
+ r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1);
ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0);
- r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_RE_CULL_CNTL, 1);
- ALLOC_STATE(unk42C0, always, 3, 0);
- r300->hw.unk42C0.cmd[0] = cmdpacket0(0x42C0, 2);
+ r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1);
+ ALLOC_STATE(su_depth_scale, always, 3, 0);
+ r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2);
ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0);
- r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_CNTL_0, 2);
- ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0);
- r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_INTERP_0, 8);
- ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, 1);
- ALLOC_STATE(unk43A4, always, 3, 0);
- r300->hw.unk43A4.cmd[0] = cmdpacket0(0x43A4, 2);
- ALLOC_STATE(unk43E8, always, 2, 0);
- r300->hw.unk43E8.cmd[0] = cmdpacket0(0x43E8, 1);
- ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
- r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3);
- r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4);
- ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
- r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0);
- ALLOC_STATE(unk46A4, always, 6, 0);
- r300->hw.unk46A4.cmd[0] = cmdpacket0(0x46A4, 5);
- ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
- r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1);
- ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
- r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1);
- ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
- r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1);
- ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
- r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1);
+ r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2);
+ if (is_r500) {
+ ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0);
+ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16);
+ for (i = 0; i < 8; i++) {
+ r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] =
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
+ }
+ ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1);
+ } else {
+ ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0);
+ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8);
+ ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1);
+ }
+ ALLOC_STATE(sc_hyperz, always, 3, 0);
+ r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2);
+ ALLOC_STATE(sc_screendoor, always, 2, 0);
+ r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1);
+ ALLOC_STATE(us_out_fmt, always, 6, 0);
+ r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5);
+
+ if (is_r500) {
+ ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0);
+ r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2);
+ r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO;
+ r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3);
+ r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1);
+ r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */
+
+ ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0);
+ r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0);
+ ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0);
+ r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0);
+ } else {
+ ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
+ r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3);
+ r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4);
+ ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
+ r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0);
+
+ ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
+ r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1);
+ ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
+ r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1);
+ ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
+ r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1);
+ ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
+ r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1);
+ ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
+ r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0);
+ }
ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0);
- r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_RE_FOG_STATE, 1);
+ r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1);
ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0);
- r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FOG_COLOR_R, 3);
+ r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3);
ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0);
- r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_PP_ALPHA_TEST, 2);
- ALLOC_STATE(unk4BD8, always, 2, 0);
- r300->hw.unk4BD8.cmd[0] = cmdpacket0(0x4BD8, 1);
- ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
- r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0);
- ALLOC_STATE(unk4E00, always, 2, 0);
- r300->hw.unk4E00.cmd[0] = cmdpacket0(0x4E00, 1);
+ r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2);
+ ALLOC_STATE(fg_depth_src, always, 2, 0);
+ r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1);
+ ALLOC_STATE(rb3d_cctl, always, 2, 0);
+ r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1);
ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0);
r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2);
ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0);
- r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(R300_RB3D_COLORMASK, 1);
- ALLOC_STATE(blend_color, always, 4, 0);
- r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 3);
+ r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(RB3D_COLOR_CHANNEL_MASK, 1);
+ if (is_r500) {
+ ALLOC_STATE(blend_color, always, 3, 0);
+ r300->hw.blend_color.cmd[0] = cmdpacket0(R500_RB3D_CONSTANT_COLOR_AR, 2);
+ } else {
+ ALLOC_STATE(blend_color, always, 2, 0);
+ r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 1);
+ }
+ ALLOC_STATE(rop, always, 2, 0);
+ r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1);
ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
- ALLOC_STATE(unk4E50, always, 10, 0);
- r300->hw.unk4E50.cmd[0] = cmdpacket0(0x4E50, 9);
- ALLOC_STATE(unk4E88, always, 2, 0);
- r300->hw.unk4E88.cmd[0] = cmdpacket0(0x4E88, 1);
- ALLOC_STATE(unk4EA0, always, 3, 0);
- r300->hw.unk4EA0.cmd[0] = cmdpacket0(0x4EA0, 2);
+ ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
+ r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9);
+ ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
+ r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1);
+ ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
r300->hw.zs.cmd[R300_ZS_CMD_0] =
- cmdpacket0(R300_RB3D_ZSTENCIL_CNTL_0, 3);
+ cmdpacket0(R300_ZB_CNTL, 3);
ALLOC_STATE(zstencil_format, always, 5, 0);
r300->hw.zstencil_format.cmd[0] =
- cmdpacket0(R300_RB3D_ZSTENCIL_FORMAT, 4);
+ cmdpacket0(R300_ZB_FORMAT, 4);
ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
- r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_RB3D_DEPTHOFFSET, 2);
- ALLOC_STATE(unk4F28, always, 2, 0);
- r300->hw.unk4F28.cmd[0] = cmdpacket0(0x4F28, 1);
+ r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2);
+ ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
+ r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1);
ALLOC_STATE(unk4F30, always, 3, 0);
r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2);
- ALLOC_STATE(unk4F44, always, 2, 0);
- r300->hw.unk4F44.cmd[0] = cmdpacket0(0x4F44, 1);
- ALLOC_STATE(unk4F54, always, 2, 0);
- r300->hw.unk4F54.cmd[0] = cmdpacket0(0x4F54, 1);
+ ALLOC_STATE(zb_hiz_offset, always, 2, 0);
+ r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1);
+ ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
+ r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1);
/* VPU only on TCL */
if (has_tcl) {
int i;
ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
r300->hw.vpi.cmd[R300_VPI_CMD_0] =
- cmdvpu(R300_PVS_UPLOAD_PROGRAM, 0);
+ cmdvpu(R300_PVS_CODE_START, 0);
- ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
- r300->hw.vpp.cmd[R300_VPP_CMD_0] =
- cmdvpu(R300_PVS_UPLOAD_PARAMETERS, 0);
+ if (is_r500) {
+ ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+ r300->hw.vpp.cmd[R300_VPP_CMD_0] =
+ cmdvpu(R500_PVS_CONST_START, 0);
- ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
- r300->hw.vps.cmd[R300_VPS_CMD_0] =
- cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1);
+ ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+ r300->hw.vps.cmd[R300_VPS_CMD_0] =
+ cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1);
- for (i = 0; i < 6; i++) {
- ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
- r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
- cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1);
+ for (i = 0; i < 6; i++) {
+ ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+ r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
+ cmdvpu(R500_PVS_UCP_START + i, 1);
+ }
+ } else {
+ ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+ r300->hw.vpp.cmd[R300_VPP_CMD_0] =
+ cmdvpu(R300_PVS_CONST_START, 0);
+
+ ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+ r300->hw.vps.cmd[R300_VPS_CMD_0] =
+ cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1);
+
+ for (i = 0; i < 6; i++) {
+ ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+ r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
+ cmdvpu(R300_PVS_UCP_START + i, 1);
+ }
}
}
/* Textures */
ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_FILTER_0, 0);
+ cmdpacket0(R300_TX_FILTER0_0, 0);
ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
@@ -475,7 +560,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
cmdpacket0(R300_TX_FORMAT_0, 0);
ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
- r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_PITCH_0, 0);
+ r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0);
ALLOC_STATE(tex.offset, variable, mtu + 1, 0);
r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
diff --git a/r300/r300_cmdbuf.h b/r300/r300_cmdbuf.h
index bfb2eda..a8eaa58 100644
--- a/r300/r300_cmdbuf.h
+++ b/r300/r300_cmdbuf.h
@@ -52,7 +52,7 @@ extern void r300DestroyCmdBuf(r300ContextPtr r300);
*
* \param dwords The number of dwords we need to be free on the command buffer
*/
-static __inline__ void r300EnsureCmdBufSpace(r300ContextPtr r300,
+static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300,
int dwords, const char *caller)
{
assert(dwords < r300->cmdbuf.size);
@@ -68,7 +68,7 @@ static __inline__ void r300EnsureCmdBufSpace(r300ContextPtr r300,
* causes state reemission after a flush. This is necessary to ensure
* correct hardware state after an unlock.
*/
-static __inline__ uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
+static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
int dwords, const char *caller)
{
uint32_t *ptr;
@@ -80,7 +80,7 @@ static __inline__ uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
return ptr;
}
-static __inline__ uint32_t *r300AllocCmdBuf(r300ContextPtr r300,
+static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300,
int dwords, const char *caller)
{
uint32_t *ptr;
diff --git a/r300/r300_context.c b/r300/r300_context.c
index 9ea14ab..fcf571d 100644
--- a/r300/r300_context.c
+++ b/r300/r300_context.c
@@ -63,6 +63,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_ioctl.h"
#include "r300_tex.h"
#include "r300_emit.h"
+#include "r300_swtcl.h"
#ifdef USER_BUFFERS
#include "r300_mem.h"
@@ -78,11 +79,13 @@ int hw_tcl_on = 1;
#define need_GL_EXT_stencil_two_side
#define need_GL_ARB_multisample
+#define need_GL_ARB_point_parameters
#define need_GL_ARB_texture_compression
#define need_GL_ARB_vertex_buffer_object
#define need_GL_ARB_vertex_program
#define need_GL_EXT_blend_minmax
//#define need_GL_EXT_fog_coord
+#define need_GL_EXT_multi_draw_arrays
#define need_GL_EXT_secondary_color
#define need_GL_EXT_blend_equation_separate
#define need_GL_EXT_blend_func_separate
@@ -92,8 +95,13 @@ int hw_tcl_on = 1;
const struct dri_extension card_extensions[] = {
/* *INDENT-OFF* */
+ {"GL_ARB_depth_texture", NULL},
+ {"GL_ARB_fragment_program", NULL},
{"GL_ARB_multisample", GL_ARB_multisample_functions},
{"GL_ARB_multitexture", NULL},
+ {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
+ {"GL_ARB_shadow", NULL},
+ {"GL_ARB_shadow_ambient", NULL},
{"GL_ARB_texture_border_clamp", NULL},
{"GL_ARB_texture_compression", GL_ARB_texture_compression_functions},
{"GL_ARB_texture_cube_map", NULL},
@@ -104,14 +112,15 @@ const struct dri_extension card_extensions[] = {
{"GL_ARB_texture_mirrored_repeat", NULL},
{"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions},
{"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
- {"GL_ARB_fragment_program", NULL},
{"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
{"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
{"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
{"GL_EXT_blend_subtract", NULL},
// {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+ {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions},
{"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
{"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
+ {"GL_EXT_shadow_funcs", NULL},
{"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions},
{"GL_EXT_stencil_wrap", NULL},
{"GL_EXT_texture_edge_clamp", NULL},
@@ -129,6 +138,7 @@ const struct dri_extension card_extensions[] = {
{"GL_NV_blend_square", NULL},
{"GL_NV_vertex_program", GL_NV_vertex_program_functions},
{"GL_SGIS_generate_mipmap", NULL},
+ {"GL_SGIX_depth_texture", NULL},
{NULL, NULL}
/* *INDENT-ON* */
};
@@ -272,6 +282,12 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
MIN2(ctx->Const.MaxTextureImageUnits,
ctx->Const.MaxTextureCoordUnits);
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+ ctx->Const.MaxTextureLodBias = 16.0;
+
+ if (screen->chip_family >= CHIP_FAMILY_RV515) {
+ ctx->Const.MaxTextureLevels = 13;
+ ctx->Const.MaxTextureRectSize = 4096;
+ }
ctx->Const.MinPointSize = 1.0;
ctx->Const.MinPointSizeAA = 1.0;
@@ -317,15 +333,17 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
_tnl_allow_vertex_fog(ctx, GL_TRUE);
/* currently bogus data */
- ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
- ctx->Const.VertexProgram.MaxNativeInstructions =
- VSF_MAX_FRAGMENT_LENGTH / 4;
- ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */
- ctx->Const.VertexProgram.MaxTemps = 32;
- ctx->Const.VertexProgram.MaxNativeTemps =
- /*VSF_MAX_FRAGMENT_TEMPS */ 32;
- ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */
- ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+ if (screen->chip_flags & RADEON_CHIPSET_TCL) {
+ ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
+ ctx->Const.VertexProgram.MaxNativeInstructions =
+ VSF_MAX_FRAGMENT_LENGTH / 4;
+ ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */
+ ctx->Const.VertexProgram.MaxTemps = 32;
+ ctx->Const.VertexProgram.MaxNativeTemps =
+ /*VSF_MAX_FRAGMENT_TEMPS */ 32;
+ ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */
+ ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+ }
ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS;
ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */
@@ -363,6 +381,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
radeonInitSpanFuncs(ctx);
r300InitCmdBuf(r300);
r300InitState(r300);
+ if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
+ r300InitSwtcl(ctx);
TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
diff --git a/r300/r300_context.h b/r300/r300_context.h
index 6b0a588..d2017f8 100644
--- a/r300/r300_context.h
+++ b/r300/r300_context.h
@@ -49,8 +49,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define USER_BUFFERS
-//#define OPTIMIZE_ELTS
-
struct r300_context;
typedef struct r300_context r300ContextRec;
typedef struct r300_context *r300ContextPtr;
@@ -75,12 +73,12 @@ typedef struct r300_context *r300ContextPtr;
}
#include "r300_vertprog.h"
-#include "r300_fragprog.h"
+#include "r500_fragprog.h"
/**
* This function takes a float and packs it into a uint32_t
*/
-static __inline__ uint32_t r300PackFloat32(float fl)
+static INLINE uint32_t r300PackFloat32(float fl)
{
union {
float fl;
@@ -97,7 +95,7 @@ static __inline__ uint32_t r300PackFloat32(float fl)
* But it works for most things. I'll fix it later if someone
* else with a better clue doesn't
*/
-static __inline__ uint32_t r300PackFloat24(float f)
+static INLINE uint32_t r300PackFloat24(float f)
{
float mantissa;
int exponent;
@@ -149,7 +147,6 @@ struct r300_dma_region {
int aos_offset; /* address in GART memory */
int aos_stride; /* distance between elements, in dwords */
int aos_size; /* number of components (1-4) */
- int aos_reg; /* VAP register assignment */
};
struct r300_dma {
@@ -181,13 +178,6 @@ struct r300_tex_obj {
GLuint bufAddr; /* Offset to start of locally
shared texture block */
- GLuint dirty_state; /* Flags (1 per texunit) for
- whether or not this texobj
- has dirty hardware state
- (pp_*) that needs to be
- brought into the
- texunit. */
-
drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
/* Six, for the cube faces */
@@ -333,15 +323,17 @@ struct r300_state_atom {
#define R300_RI_INTERP_7 8
#define R300_RI_CMDSIZE 9
+#define R500_RI_CMDSIZE 17
+
#define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */
-#define R300_RR_ROUTE_0 1
-#define R300_RR_ROUTE_1 2
-#define R300_RR_ROUTE_2 3
-#define R300_RR_ROUTE_3 4
-#define R300_RR_ROUTE_4 5
-#define R300_RR_ROUTE_5 6
-#define R300_RR_ROUTE_6 7
-#define R300_RR_ROUTE_7 8
+#define R300_RR_INST_0 1
+#define R300_RR_INST_1 2
+#define R300_RR_INST_2 3
+#define R300_RR_INST_3 4
+#define R300_RR_INST_4 5
+#define R300_RR_INST_5 6
+#define R300_RR_INST_6 7
+#define R300_RR_INST_7 8
#define R300_RR_CMDSIZE 9
#define R300_FP_CMD_0 0
@@ -355,6 +347,17 @@ struct r300_state_atom {
#define R300_FP_NODE3 8
#define R300_FP_CMDSIZE 9
+#define R500_FP_CMD_0 0
+#define R500_FP_CNTL 1
+#define R500_FP_PIXSIZE 2
+#define R500_FP_CMD_1 3
+#define R500_FP_CODE_ADDR 4
+#define R500_FP_CODE_RANGE 5
+#define R500_FP_CODE_OFFSET 6
+#define R500_FP_CMD_2 7
+#define R500_FP_FC_CNTL 8
+#define R500_FP_CMDSIZE 9
+
#define R300_FPT_CMD_0 0
#define R300_FPT_INSTR_0 1
#define R300_FPT_CMDSIZE 65
@@ -362,10 +365,14 @@ struct r300_state_atom {
#define R300_FPI_CMD_0 0
#define R300_FPI_INSTR_0 1
#define R300_FPI_CMDSIZE 65
+/* R500 has space for 512 instructions - 6 dwords per instruction */
+#define R500_FPI_CMDSIZE (512*6+1)
#define R300_FPP_CMD_0 0
#define R300_FPP_PARAM_0 1
#define R300_FPP_CMDSIZE (32*4+1)
+/* R500 has spcae for 256 constants - 4 dwords per constant */
+#define R500_FPP_CMDSIZE (256*4+1)
#define R300_FOGS_CMD_0 0
#define R300_FOGS_STATE 1
@@ -413,6 +420,12 @@ struct r300_state_atom {
#define R300_ZB_PITCH 2
#define R300_ZB_CMDSIZE 3
+#define R300_VAP_CNTL_FLUSH 0
+#define R300_VAP_CNTL_FLUSH_1 1
+#define R300_VAP_CNTL_CMD 2
+#define R300_VAP_CNTL_INSTR 3
+#define R300_VAP_CNTL_SIZE 4
+
#define R300_VPI_CMD_0 0
#define R300_VPI_INSTR_0 1
#define R300_VPI_CMDSIZE 1025 /* 256 16 byte instructions */
@@ -454,63 +467,67 @@ struct r300_hw_state {
struct r300_state_atom vpt; /* viewport (1D98) */
struct r300_state_atom vap_cntl;
+ struct r300_state_atom vap_index_offset; /* 0x208c r5xx only */
struct r300_state_atom vof; /* VAP output format register 0x2090 */
struct r300_state_atom vte; /* (20B0) */
- struct r300_state_atom unk2134; /* (2134) */
+ struct r300_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */
struct r300_state_atom vap_cntl_status;
struct r300_state_atom vir[2]; /* vap input route (2150/21E0) */
struct r300_state_atom vic; /* vap input control (2180) */
- struct r300_state_atom unk21DC; /* (21DC) */
+ struct r300_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
struct r300_state_atom vap_clip_cntl;
- struct r300_state_atom unk2220; /* (2220) */
- struct r300_state_atom unk2288; /* (2288) */
+ struct r300_state_atom vap_clip;
+ struct r300_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */
struct r300_state_atom pvs; /* pvs_cntl (22D0) */
struct r300_state_atom gb_enable; /* (4008) */
struct r300_state_atom gb_misc; /* Multisampling position shifts ? (4010) */
- struct r300_state_atom unk4200; /* (4200) */
- struct r300_state_atom unk4214; /* (4214) */
+ struct r300_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
+ struct r300_state_atom ga_triangle_stipple; /* (4214) */
struct r300_state_atom ps; /* pointsize (421C) */
- struct r300_state_atom unk4230; /* (4230) */
+ struct r300_state_atom ga_point_minmax; /* (4230) */
struct r300_state_atom lcntl; /* line control */
- struct r300_state_atom unk4260; /* (4260) */
+ struct r300_state_atom ga_line_stipple; /* (4260) */
struct r300_state_atom shade;
struct r300_state_atom polygon_mode;
struct r300_state_atom fogp; /* fog parameters (4294) */
- struct r300_state_atom unk429C; /* (429C) */
+ struct r300_state_atom ga_soft_reset; /* (429C) */
struct r300_state_atom zbias_cntl;
struct r300_state_atom zbs; /* zbias (42A4) */
struct r300_state_atom occlusion_cntl;
struct r300_state_atom cul; /* cull cntl (42B8) */
- struct r300_state_atom unk42C0; /* (42C0) */
+ struct r300_state_atom su_depth_scale; /* (42C0) */
struct r300_state_atom rc; /* rs control (4300) */
struct r300_state_atom ri; /* rs interpolators (4310) */
struct r300_state_atom rr; /* rs route (4330) */
- struct r300_state_atom unk43A4; /* (43A4) */
- struct r300_state_atom unk43E8; /* (43E8) */
+ struct r300_state_atom sc_hyperz; /* (43A4) */
+ struct r300_state_atom sc_screendoor; /* (43E8) */
struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */
struct r300_state_atom fpt; /* texi - (4620) */
- struct r300_state_atom unk46A4; /* (46A4) */
+ struct r300_state_atom us_out_fmt; /* (46A4) */
+ struct r300_state_atom r500fp; /* r500 fp instructions */
+ struct r300_state_atom r500fp_const; /* r500 fp constants */
struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */
struct r300_state_atom fogs; /* fog state (4BC0) */
struct r300_state_atom fogc; /* fog color (4BC8) */
struct r300_state_atom at; /* alpha test (4BD4) */
- struct r300_state_atom unk4BD8; /* (4BD8) */
+ struct r300_state_atom fg_depth_src; /* (4BD8) */
struct r300_state_atom fpp; /* 0x4C00 and following */
- struct r300_state_atom unk4E00; /* (4E00) */
+ struct r300_state_atom rb3d_cctl; /* (4E00) */
struct r300_state_atom bld; /* blending (4E04) */
struct r300_state_atom cmk; /* colormask (4E0C) */
struct r300_state_atom blend_color; /* constant blend color */
+ struct r300_state_atom rop; /* ropcntl */
struct r300_state_atom cb; /* colorbuffer (4E28) */
- struct r300_state_atom unk4E50; /* (4E50) */
- struct r300_state_atom unk4E88; /* (4E88) */
- struct r300_state_atom unk4EA0; /* (4E88) I saw it only written on RV350 hardware.. */
+ struct r300_state_atom rb3d_dither_ctl; /* (4E50) */
+ struct r300_state_atom rb3d_aaresolve_ctl; /* (4E88) */
+ struct r300_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */
struct r300_state_atom zs; /* zstencil control (4F00) */
struct r300_state_atom zstencil_format;
struct r300_state_atom zb; /* z buffer (4F20) */
- struct r300_state_atom unk4F28; /* (4F28) */
+ struct r300_state_atom zb_depthclearvalue; /* (4F28) */
struct r300_state_atom unk4F30; /* (4F30) */
- struct r300_state_atom unk4F44; /* (4F44) */
- struct r300_state_atom unk4F54; /* (4F54) */
+ struct r300_state_atom zb_hiz_offset; /* (4F44) */
+ struct r300_state_atom zb_hiz_pitch; /* (4F54) */
struct r300_state_atom vpi; /* vp instructions */
struct r300_state_atom vpp; /* vp parameters */
@@ -557,9 +574,7 @@ struct r300_depthbuffer_state {
};
struct r300_stencilbuffer_state {
- GLuint clear;
GLboolean hw_stencil;
-
};
/* Vertex shader state */
@@ -579,38 +594,21 @@ struct r300_vertex_shader_fragment {
union {
GLuint d[VSF_MAX_FRAGMENT_LENGTH];
float f[VSF_MAX_FRAGMENT_LENGTH];
- VERTEX_SHADER_INSTRUCTION i[VSF_MAX_FRAGMENT_LENGTH / 4];
+ GLuint i[VSF_MAX_FRAGMENT_LENGTH];
} body;
};
-#define VSF_DEST_PROGRAM 0x0
-#define VSF_DEST_MATRIX0 0x200
-#define VSF_DEST_MATRIX1 0x204
-#define VSF_DEST_MATRIX2 0x208
-#define VSF_DEST_VECTOR0 0x20c
-#define VSF_DEST_VECTOR1 0x20d
-#define VSF_DEST_UNKNOWN1 0x400
-#define VSF_DEST_UNKNOWN2 0x406
-
struct r300_vertex_shader_state {
struct r300_vertex_shader_fragment program;
-
- struct r300_vertex_shader_fragment unknown1;
- struct r300_vertex_shader_fragment unknown2;
-
- int program_start;
- int unknown_ptr1; /* pointer within program space */
- int program_end;
-
- int param_offset;
- int param_count;
-
- int unknown_ptr2; /* pointer within program space */
- int unknown_ptr3; /* pointer within program space */
};
extern int hw_tcl_on;
+#define COLOR_IS_RGBA
+#define TAG(x) r300##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
//#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current)
#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp)
@@ -624,6 +622,7 @@ extern int hw_tcl_on;
struct r300_vertex_program_key {
GLuint InputsRead;
GLuint OutputsWritten;
+ GLuint OutputsAdded;
};
struct r300_vertex_program {
@@ -655,102 +654,55 @@ struct r300_vertex_program_cont {
#define PFS_NUM_TEMP_REGS 32
#define PFS_NUM_CONST_REGS 16
-/* Mapping Mesa registers to R300 temporaries */
-struct reg_acc {
- int reg; /* Assigned hw temp */
- unsigned int refcount; /* Number of uses by mesa program */
-};
+struct r300_pfs_compile_state;
-/**
- * Describe the current lifetime information for an R300 temporary
- */
-struct reg_lifetime {
- /* Index of the first slot where this register is free in the sense
- that it can be used as a new destination register.
- This is -1 if the register has been assigned to a Mesa register
- and the last access to the register has not yet been emitted */
- int free;
-
- /* Index of the first slot where this register is currently reserved.
- This is used to stop e.g. a scalar operation from being moved
- before the allocation time of a register that was first allocated
- for a vector operation. */
- int reserved;
-
- /* Index of the first slot in which the register can be used as a
- source without losing the value that is written by the last
- emitted instruction that writes to the register */
- int vector_valid;
- int scalar_valid;
-
- /* Index to the slot where the register was last read.
- This is also the first slot in which the register may be written again */
- int vector_lastread;
- int scalar_lastread;
-};
/**
- * Store usage information about an ALU instruction slot during the
- * compilation of a fragment program.
+ * Stores state that influences the compilation of a fragment program.
*/
-#define SLOT_SRC_VECTOR (1<<0)
-#define SLOT_SRC_SCALAR (1<<3)
-#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR)
-#define SLOT_OP_VECTOR (1<<16)
-#define SLOT_OP_SCALAR (1<<17)
-#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR)
-
-struct r300_pfs_compile_slot {
- /* Bitmask indicating which parts of the slot are used, using SLOT_ constants
- defined above */
- unsigned int used;
-
- /* Selected sources */
- int vsrc[3];
- int ssrc[3];
+struct r300_fragment_program_external_state {
+ struct {
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is:
+ * 0 - GL_LUMINANCE
+ * 1 - GL_INTENSITY
+ * 2 - GL_ALPHA
+ * depending on the depth texture mode.
+ */
+ GLuint depth_texture_mode : 2;
+
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is (texture_compare_func - GL_NEVER).
+ * [e.g. if compare function is GL_LEQUAL, this field is 3]
+ *
+ * Otherwise, this field is 0.
+ */
+ GLuint texture_compare_func : 3;
+ } unit[16];
};
-/**
- * Store information during compilation of fragment programs.
- */
-struct r300_pfs_compile_state {
- int nrslots; /* number of ALU slots used so far */
- /* Track which (parts of) slots are already filled with instructions */
- struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST];
-
- /* Track the validity of R300 temporaries */
- struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS];
-
- /* Used to map Mesa's inputs/temps onto hardware temps */
- int temp_in_use;
- struct reg_acc temps[PFS_NUM_TEMP_REGS];
- struct reg_acc inputs[32]; /* don't actually need 32... */
-
- /* Track usage of hardware temps, for register allocation,
- * indirection detection, etc. */
- GLuint used_in_node;
- GLuint dest_in_node;
+struct r300_fragment_program_node {
+ int tex_offset; /**< first tex instruction */
+ int tex_end; /**< last tex instruction, relative to tex_offset */
+ int alu_offset; /**< first ALU instruction */
+ int alu_end; /**< last ALU instruction, relative to alu_offset */
+ int flags;
};
/**
- * Store everything about a fragment program that is needed
- * to render with that program.
+ * Stores an R300 fragment program in its compiled-to-hardware form.
*/
-struct r300_fragment_program {
- struct gl_fragment_program mesa_program;
-
- GLcontext *ctx;
- GLboolean translated;
- GLboolean error;
- struct r300_pfs_compile_state *cs;
-
+struct r300_fragment_program_code {
struct {
- int length;
+ int length; /**< total # of texture instructions used */
GLuint inst[PFS_MAX_TEX_INST];
} tex;
struct {
+ int length; /**< total # of ALU instructions used */
struct {
GLuint inst0;
GLuint inst1;
@@ -759,89 +711,121 @@ struct r300_fragment_program {
} inst[PFS_MAX_ALU_INST];
} alu;
- struct {
- int tex_offset;
- int tex_end;
- int alu_offset;
- int alu_end;
- int flags;
- } node[4];
+ struct r300_fragment_program_node node[4];
int cur_node;
int first_node_has_tex;
- int alu_offset;
- int alu_end;
- int tex_offset;
- int tex_end;
-
- /* Hardware constants.
- * Contains a pointer to the value. The destination of the pointer
- * is supposed to be updated when GL state changes.
- * Typically, this is either a pointer into
- * gl_program_parameter_list::ParameterValues, or a pointer to a
- * global constant (e.g. for sin/cos-approximation)
+ /**
+ * Remember which program register a given hardware constant
+ * belongs to.
*/
- const GLfloat *constant[PFS_NUM_CONST_REGS];
+ struct prog_src_register constant[PFS_NUM_CONST_REGS];
int const_nr;
int max_temp_idx;
+};
+
+/**
+ * Store everything about a fragment program that is needed
+ * to render with that program.
+ */
+struct r300_fragment_program {
+ struct gl_fragment_program mesa_program;
+
+ GLboolean translated;
+ GLboolean error;
+
+ struct r300_fragment_program_external_state state;
+ struct r300_fragment_program_code code;
+ GLboolean WritesDepth;
GLuint optimization;
};
-#define R300_MAX_AOS_ARRAYS 16
+struct r500_pfs_compile_state;
-#define AOS_FORMAT_USHORT 0
-#define AOS_FORMAT_FLOAT 1
-#define AOS_FORMAT_UBYTE 2
-#define AOS_FORMAT_FLOAT_COLOR 3
+struct r500_fragment_program_external_state {
+ struct {
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is:
+ * 0 - GL_LUMINANCE
+ * 1 - GL_INTENSITY
+ * 2 - GL_ALPHA
+ * depending on the depth texture mode.
+ */
+ GLuint depth_texture_mode : 2;
+
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is (texture_compare_func - GL_NEVER).
+ * [e.g. if compare function is GL_LEQUAL, this field is 3]
+ *
+ * Otherwise, this field is 0.
+ */
+ GLuint texture_compare_func : 3;
+ } unit[16];
+};
-#define REG_COORDS 0
-#define REG_COLOR0 1
-#define REG_TEX0 2
+struct r500_fragment_program_code {
+ struct {
+ GLuint inst0;
+ GLuint inst1;
+ GLuint inst2;
+ GLuint inst3;
+ GLuint inst4;
+ GLuint inst5;
+ } inst[512];
+
+ int inst_offset;
+ int inst_end;
+
+ /**
+ * Remember which program register a given hardware constant
+ * belongs to.
+ */
+ struct prog_src_register constant[PFS_NUM_CONST_REGS];
+ int const_nr;
-struct dt {
- GLint size;
- GLenum type;
- GLsizei stride;
- void *data;
+ int max_temp_idx;
};
-struct radeon_vertex_buffer {
- int Count;
- void *Elts;
- int elt_size;
- int elt_min, elt_max; /* debug */
+struct r500_fragment_program {
+ struct gl_fragment_program mesa_program;
- struct dt AttribPtr[VERT_ATTRIB_MAX];
+ GLcontext *ctx;
+ GLboolean translated;
+ GLboolean error;
+
+ struct r500_fragment_program_external_state state;
+ struct r500_fragment_program_code code;
- const struct _mesa_prim *Primitive;
- GLuint PrimitiveCount;
- GLint LockFirst;
- GLsizei LockCount;
- int lock_uptodate;
+ GLboolean writes_depth;
+
+ GLuint optimization;
};
+#define R300_MAX_AOS_ARRAYS 16
+
+#define REG_COORDS 0
+#define REG_COLOR0 1
+#define REG_TEX0 2
+
struct r300_state {
struct r300_depthbuffer_state depth;
struct r300_texture_state texture;
int sw_tcl_inputs[VERT_ATTRIB_MAX];
struct r300_vertex_shader_state vertex_shader;
- struct r300_pfs_compile_state pfs_compile;
struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
int aos_count;
- struct radeon_vertex_buffer VB;
GLuint *Elts;
struct r300_dma_region elt_dma;
- DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for.
+ struct r300_dma_region swtcl_dma;
+ DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for.
They are the same as tnl->render_inputs for fixed pipeline */
- struct {
- int transform_offset; /* Transform matrix offset, -1 if none */
- } vap_param; /* vertex processor parameter allocation - tells where to write parameters */
-
struct r300_stencilbuffer_state stencil;
};
@@ -850,6 +834,62 @@ struct r300_state {
#define R300_FALLBACK_TCL 1
#define R300_FALLBACK_RAST 2
+/* r300_swtcl.c
+ */
+struct r300_swtcl_info {
+ GLuint RenderIndex;
+
+ /**
+ * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is
+ * installed in the Mesa state vector.
+ */
+ GLuint vertex_size;
+
+ /**
+ * Attributes instructing the Mesa TCL pipeline where / how to put vertex
+ * data in the hardware buffer.
+ */
+ struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+
+ /**
+ * Number of elements of \c ::vertex_attrs that are actually used.
+ */
+ GLuint vertex_attr_count;
+
+ /**
+ * Cached pointer to the buffer where Mesa will store vertex data.
+ */
+ GLubyte *verts;
+
+ /* Fallback rasterization functions
+ */
+ // r200_point_func draw_point;
+ // r200_line_func draw_line;
+ // r200_tri_func draw_tri;
+
+ GLuint hw_primitive;
+ GLenum render_primitive;
+ GLuint numverts;
+
+ /**
+ * Offset of the 4UB color data within a hardware (swtcl) vertex.
+ */
+ GLuint coloroffset;
+
+ /**
+ * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
+ */
+ GLuint specoffset;
+
+ /**
+ * Should Mesa project vertex data or will the hardware do it?
+ */
+ GLboolean needproj;
+
+ struct r300_dma_region indexed_verts;
+};
+
+
/**
* \brief R300 context structure.
*/
@@ -888,6 +928,9 @@ struct r300_context {
GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
GLboolean disable_lowimpact_fallback;
+
+ DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */
+ struct r300_swtcl_info swtcl;
};
struct r300_buffer_object {
diff --git a/r300/r300_emit.c b/r300/r300_emit.c
index 2c26069..2ea17ad 100644
--- a/r300/r300_emit.c
+++ b/r300/r300_emit.c
@@ -86,16 +86,15 @@ do { \
} while (0)
#endif
-static void r300EmitVec4(GLcontext * ctx,
- struct r300_dma_region *rvb,
+static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
GLvoid * data, int stride, int count)
{
int i;
int *out = (int *)(rvb->address + rvb->start);
if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
+ fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+ __FUNCTION__, count, stride, (void *)out, (void *)data);
if (stride == 4)
COPY_DWORDS(out, data, count);
@@ -107,16 +106,15 @@ static void r300EmitVec4(GLcontext * ctx,
}
}
-static void r300EmitVec8(GLcontext * ctx,
- struct r300_dma_region *rvb,
+static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
GLvoid * data, int stride, int count)
{
int i;
int *out = (int *)(rvb->address + rvb->start);
if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
+ fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+ __FUNCTION__, count, stride, (void *)out, (void *)data);
if (stride == 8)
COPY_DWORDS(out, data, count * 2);
@@ -129,8 +127,7 @@ static void r300EmitVec8(GLcontext * ctx,
}
}
-static void r300EmitVec12(GLcontext * ctx,
- struct r300_dma_region *rvb,
+static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
GLvoid * data, int stride, int count)
{
int i;
@@ -152,16 +149,15 @@ static void r300EmitVec12(GLcontext * ctx,
}
}
-static void r300EmitVec16(GLcontext * ctx,
- struct r300_dma_region *rvb,
+static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
GLvoid * data, int stride, int count)
{
int i;
int *out = (int *)(rvb->address + rvb->start);
if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d\n",
- __FUNCTION__, count, stride);
+ fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+ __FUNCTION__, count, stride, (void *)out, (void *)data);
if (stride == 16)
COPY_DWORDS(out, data, count * 4);
@@ -176,32 +172,22 @@ static void r300EmitVec16(GLcontext * ctx,
}
}
-static void r300EmitVec(GLcontext * ctx,
- struct r300_dma_region *rvb,
+static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
GLvoid * data, int size, int stride, int count)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d size %d stride %d\n",
- __FUNCTION__, count, size, stride);
-
- /* Gets triggered when playing with future_hw_tcl_on ... */
- //assert(!rvb->buf);
-
if (stride == 0) {
r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
count = 1;
rvb->aos_offset = GET_START(rvb);
rvb->aos_stride = 0;
} else {
- r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); /* alignment? */
+ r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
rvb->aos_offset = GET_START(rvb);
rvb->aos_stride = size;
}
- /* Emit the data
- */
switch (size) {
case 1:
r300EmitVec4(ctx, rvb, data, stride, count);
@@ -217,128 +203,76 @@ static void r300EmitVec(GLcontext * ctx,
break;
default:
assert(0);
- _mesa_exit(-1);
break;
}
-
}
-static GLuint t_type(struct dt *dt)
-{
- switch (dt->type) {
- case GL_UNSIGNED_BYTE:
- return AOS_FORMAT_UBYTE;
- case GL_SHORT:
- return AOS_FORMAT_USHORT;
- case GL_FLOAT:
- return AOS_FORMAT_FLOAT;
- default:
- assert(0);
- break;
- }
+#define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \
+ (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT)
- return AOS_FORMAT_FLOAT;
-}
-
-static GLuint t_vir0_size(struct dt *dt)
-{
- switch (dt->type) {
- case GL_UNSIGNED_BYTE:
- return 4;
- case GL_SHORT:
- return 7;
- case GL_FLOAT:
- return dt->size - 1;
- default:
- assert(0);
- break;
- }
-
- return 0;
-}
-
-static GLuint t_aos_size(struct dt *dt)
-{
- switch (dt->type) {
- case GL_UNSIGNED_BYTE:
- return 1;
- case GL_SHORT:
- return 2;
- case GL_FLOAT:
- return dt->size;
- default:
- assert(0);
- break;
- }
-
- return 0;
-}
-
-static GLuint t_vir0(uint32_t * dst, struct dt *dt, int *inputs,
- GLint * tab, GLuint nr)
+GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
+ int *inputs, GLint * tab, GLuint nr)
{
GLuint i, dw;
- for (i = 0; i + 1 < nr; i += 2) {
- dw = t_vir0_size(&dt[tab[i]]) | (inputs[tab[i]] << 8) |
- (t_type(&dt[tab[i]]) << 14);
- dw |=
- (t_vir0_size(&dt[tab[i + 1]]) |
- (inputs[tab[i + 1]] << 8) | (t_type(&dt[tab[i + 1]])
- << 14)) << 16;
-
- if (i + 2 == nr) {
- dw |= (1 << (13 + 16));
+ /* type, inputs, stop bit, size */
+ for (i = 0; i < nr; i += 2) {
+ /* make sure input is valid, would lockup the gpu */
+ assert(inputs[tab[i]] != -1);
+ dw = (R300_SIGNED | DW_SIZE(i));
+ if (i + 1 == nr) {
+ dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
+ } else {
+ assert(inputs[tab[i + 1]] != -1);
+ dw |= (R300_SIGNED |
+ DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT;
+ if (i + 2 == nr) {
+ dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
+ }
}
dst[i >> 1] = dw;
}
- if (nr & 1) {
- dw = t_vir0_size(&dt[tab[nr - 1]]) | (inputs[tab[nr - 1]]
- << 8) |
- (t_type(&dt[tab[nr - 1]]) << 14);
- dw |= 1 << 13;
-
- dst[nr >> 1] = dw;
- }
-
return (nr + 1) >> 1;
}
-static GLuint t_swizzle(int swizzle[4])
+static GLuint r300VAPInputRoute1Swizzle(int swizzle[4])
{
- return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) |
- (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) |
- (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) |
- (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT);
+ return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) |
+ (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) |
+ (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) |
+ (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT);
}
-static GLuint t_vir1(uint32_t * dst, int swizzle[][4], GLuint nr)
+GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
{
- GLuint i;
+ GLuint i, dw;
- for (i = 0; i + 1 < nr; i += 2) {
- dst[i >> 1] = t_swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE;
- dst[i >> 1] |=
- (t_swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE)
- << 16;
+ for (i = 0; i < nr; i += 2) {
+ dw = (r300VAPInputRoute1Swizzle(swizzle[i]) |
+ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
+ R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT;
+ if (i + 1 < nr) {
+ dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) |
+ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
+ R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
+ }
+ dst[i >> 1] = dw;
}
- if (nr & 1)
- dst[nr >> 1] =
- t_swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE;
-
return (nr + 1) >> 1;
}
-static GLuint t_emit_size(struct dt *dt)
+GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
{
- return dt->size;
+ /* No idea what this value means. I have seen other values written to
+ * this register... */
+ return 0x5555;
}
-static GLuint t_vic(GLcontext * ctx, GLuint InputsRead)
+GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
GLuint i, vic_1 = 0;
if (InputsRead & (1 << VERT_ATTRIB_POS))
@@ -350,177 +284,188 @@ static GLuint t_vic(GLcontext * ctx, GLuint InputsRead)
if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
vic_1 |= R300_INPUT_CNTL_COLOR;
- r300->state.texture.tc_count = 0;
+ rmesa->state.texture.tc_count = 0;
for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
- r300->state.texture.tc_count++;
+ rmesa->state.texture.tc_count++;
vic_1 |= R300_INPUT_CNTL_TC0 << i;
}
return vic_1;
}
+GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
+{
+ GLuint ret = 0;
+
+ if (OutputsWritten & (1 << VERT_RESULT_HPOS))
+ ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+
+ if (OutputsWritten & (1 << VERT_RESULT_COL0))
+ ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
+
+ if (OutputsWritten & (1 << VERT_RESULT_COL1))
+ ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
+
+ if (OutputsWritten & (1 << VERT_RESULT_BFC0)
+ || OutputsWritten & (1 << VERT_RESULT_BFC1))
+ ret |=
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT |
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
+
+#if 0
+ if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
+#endif
+
+ if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+
+ return ret;
+}
+
+GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
+{
+ GLuint i, ret = 0;
+
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) {
+ ret |= (4 << (3 * i));
+ }
+ }
+
+ return ret;
+}
+
/* Emit vertex data to GART memory
* Route inputs to the vertex processor
* This function should never return R300_FALLBACK_TCL when using software tcl.
*/
-
int r300EmitArrays(GLcontext * ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- r300ContextPtr r300 = rmesa;
- struct radeon_vertex_buffer *VB = &rmesa->state.VB;
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *vb = &tnl->vb;
GLuint nr;
- GLuint count = VB->Count;
+ GLuint count = vb->Count;
GLuint i;
GLuint InputsRead = 0, OutputsWritten = 0;
int *inputs = NULL;
int vir_inputs[VERT_ATTRIB_MAX];
GLint tab[VERT_ATTRIB_MAX];
int swizzle[VERT_ATTRIB_MAX][4];
+ struct r300_vertex_program *prog =
+ (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
if (hw_tcl_on) {
- struct r300_vertex_program *prog =
- (struct r300_vertex_program *)
- CURRENT_VERTEX_SHADER(ctx);
inputs = prog->inputs;
- InputsRead = CURRENT_VERTEX_SHADER(ctx)->key.InputsRead;
- OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+ InputsRead = prog->key.InputsRead;
+ OutputsWritten = prog->key.OutputsWritten;
} else {
- DECLARE_RENDERINPUTS(inputs_bitset);
- inputs = r300->state.sw_tcl_inputs;
+ inputs = rmesa->state.sw_tcl_inputs;
- RENDERINPUTS_COPY(inputs_bitset,
- TNL_CONTEXT(ctx)->render_inputs_bitset);
+ DECLARE_RENDERINPUTS(render_inputs_bitset);
+ RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
- assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_POS));
- InputsRead |= 1 << VERT_ATTRIB_POS;
- OutputsWritten |= 1 << VERT_RESULT_HPOS;
+ vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr;
- assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_NORMAL)
- == 0);
+ assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
+ assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
+ //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
- assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR0));
- InputsRead |= 1 << VERT_ATTRIB_COLOR0;
- OutputsWritten |= 1 << VERT_RESULT_COL0;
+ if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
+ InputsRead |= 1 << VERT_ATTRIB_POS;
+ OutputsWritten |= 1 << VERT_RESULT_HPOS;
+ }
- if (RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR1)) {
+ if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
+ InputsRead |= 1 << VERT_ATTRIB_COLOR0;
+ OutputsWritten |= 1 << VERT_RESULT_COL0;
+ }
+
+ if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) {
InputsRead |= 1 << VERT_ATTRIB_COLOR1;
OutputsWritten |= 1 << VERT_RESULT_COL1;
}
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
- if (RENDERINPUTS_TEST
- (inputs_bitset, _TNL_ATTRIB_TEX(i))) {
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) {
InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
}
+ }
- for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
- if (InputsRead & (1 << i))
+ for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (InputsRead & (1 << i)) {
inputs[i] = nr++;
- else
+ } else {
inputs[i] = -1;
-
- if (!
- (r300->radeon.radeonScreen->
- chip_flags & RADEON_CHIPSET_TCL)) {
- /* Fixed, apply to vir0 only */
- memcpy(vir_inputs, inputs,
- VERT_ATTRIB_MAX * sizeof(int));
- inputs = vir_inputs;
-
- if (InputsRead & VERT_ATTRIB_POS)
- inputs[VERT_ATTRIB_POS] = 0;
-
- if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
- inputs[VERT_ATTRIB_COLOR0] = 2;
-
- if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
- inputs[VERT_ATTRIB_COLOR1] = 3;
-
- for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
- if (InputsRead & (1 << i))
- inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+ }
}
- RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset,
- inputs_bitset);
+ /* Fixed, apply to vir0 only */
+ memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int));
+ inputs = vir_inputs;
+ if (InputsRead & VERT_ATTRIB_POS)
+ inputs[VERT_ATTRIB_POS] = 0;
+ if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
+ inputs[VERT_ATTRIB_COLOR0] = 2;
+ if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
+ inputs[VERT_ATTRIB_COLOR1] = 3;
+ for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
+ if (InputsRead & (1 << i))
+ inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+
+ RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
}
+
assert(InputsRead);
assert(OutputsWritten);
- for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
- if (InputsRead & (1 << i))
+ for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (InputsRead & (1 << i)) {
tab[nr++] = i;
+ }
+ }
- if (nr > R300_MAX_AOS_ARRAYS)
+ if (nr > R300_MAX_AOS_ARRAYS) {
return R300_FALLBACK_TCL;
+ }
for (i = 0; i < nr; i++) {
- int ci;
- int comp_size, fix, found = 0;
+ int ci, fix, found = 0;
swizzle[i][0] = SWIZZLE_ZERO;
swizzle[i][1] = SWIZZLE_ZERO;
swizzle[i][2] = SWIZZLE_ZERO;
swizzle[i][3] = SWIZZLE_ONE;
- for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++)
+ for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
swizzle[i][ci] = ci;
-
-#if MESA_BIG_ENDIAN
-#define SWAP_INT(a, b) do { \
- int __temp; \
- __temp = a;\
- a = b; \
- b = __temp; \
-} while (0)
-
- if (VB->AttribPtr[tab[i]].type == GL_UNSIGNED_BYTE) {
- SWAP_INT(swizzle[i][0], swizzle[i][3]);
- SWAP_INT(swizzle[i][1], swizzle[i][2]);
}
-#endif /* MESA_BIG_ENDIAN */
- if (r300IsGartMemory(rmesa, VB->AttribPtr[tab[i]].data,
- /*(count-1)*stride */ 4)) {
- if (VB->AttribPtr[tab[i]].stride % 4)
+ if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
+ if (vb->AttribPtr[tab[i]]->stride % 4) {
return R300_FALLBACK_TCL;
-
- rmesa->state.aos[i].address =
- VB->AttribPtr[tab[i]].data;
+ }
+ rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
rmesa->state.aos[i].start = 0;
- rmesa->state.aos[i].aos_offset =
- r300GartOffsetFromVirtual(rmesa,
- VB->
- AttribPtr[tab[i]].data);
- rmesa->state.aos[i].aos_stride =
- VB->AttribPtr[tab[i]].stride / 4;
-
- rmesa->state.aos[i].aos_size =
- t_emit_size(&VB->AttribPtr[tab[i]]);
+ rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
+ rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
+ rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
} else {
- /* TODO: r300EmitVec can only handle 4 byte vectors */
- if (VB->AttribPtr[tab[i]].type != GL_FLOAT)
- return R300_FALLBACK_TCL;
-
r300EmitVec(ctx, &rmesa->state.aos[i],
- VB->AttribPtr[tab[i]].data,
- t_emit_size(&VB->AttribPtr[tab[i]]),
- VB->AttribPtr[tab[i]].stride, count);
+ vb->AttribPtr[tab[i]]->data,
+ vb->AttribPtr[tab[i]]->size,
+ vb->AttribPtr[tab[i]]->stride, count);
}
- rmesa->state.aos[i].aos_size =
- t_aos_size(&VB->AttribPtr[tab[i]]);
-
- comp_size = _mesa_sizeof_type(VB->AttribPtr[tab[i]].type);
+ rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
- for (fix = 0; fix <= 4 - VB->AttribPtr[tab[i]].size; fix++) {
- if ((rmesa->state.aos[i].aos_offset -
- comp_size * fix) % 4)
+ for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
+ if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
continue;
-
+ }
found = 1;
break;
}
@@ -529,70 +474,41 @@ int r300EmitArrays(GLcontext * ctx)
if (fix > 0) {
WARN_ONCE("Feeling lucky?\n");
}
-
- rmesa->state.aos[i].aos_offset -= comp_size * fix;
-
- for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++)
+ rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
+ for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
swizzle[i][ci] += fix;
+ }
} else {
WARN_ONCE
("Cannot handle offset %x with stride %d, comp %d\n",
rmesa->state.aos[i].aos_offset,
rmesa->state.aos[i].aos_stride,
- VB->AttribPtr[tab[i]].size);
+ vb->AttribPtr[tab[i]]->size);
return R300_FALLBACK_TCL;
}
}
- /* setup INPUT_ROUTE */
- R300_STATECHANGE(r300, vir[0]);
- ((drm_r300_cmd_header_t *) r300->hw.vir[0].cmd)->packet0.count =
- t_vir0(&r300->hw.vir[0].cmd[R300_VIR_CNTL_0], VB->AttribPtr,
- inputs, tab, nr);
-
- R300_STATECHANGE(r300, vir[1]);
- ((drm_r300_cmd_header_t *) r300->hw.vir[1].cmd)->packet0.count =
- t_vir1(&r300->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr);
-
- /* Set up input_cntl */
- /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
- R300_STATECHANGE(r300, vic);
- r300->hw.vic.cmd[R300_VIC_CNTL_0] = 0x5555; /* Hard coded value, no idea what it means */
- r300->hw.vic.cmd[R300_VIC_CNTL_1] = t_vic(ctx, InputsRead);
-
- /* Stage 3: VAP output */
-
- R300_STATECHANGE(r300, vof);
-
- r300->hw.vof.cmd[R300_VOF_CNTL_0] = 0;
- r300->hw.vof.cmd[R300_VOF_CNTL_1] = 0;
-
- if (OutputsWritten & (1 << VERT_RESULT_HPOS))
- r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
- R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
-
- if (OutputsWritten & (1 << VERT_RESULT_COL0))
- r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
-
- if (OutputsWritten & (1 << VERT_RESULT_COL1))
- r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
-
- /*if(OutputsWritten & (1 << VERT_RESULT_BFC0))
- r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
-
- if(OutputsWritten & (1 << VERT_RESULT_BFC1))
- r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; */
- //if(OutputsWritten & (1 << VERT_RESULT_FOGC))
-
- if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
- r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
- R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
-
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
- if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i)))
- r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4 << (3 * i));
+ /* Setup INPUT_ROUTE. */
+ R300_STATECHANGE(rmesa, vir[0]);
+ ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
+ r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
+ vb->AttribPtr, inputs, tab, nr);
+ R300_STATECHANGE(rmesa, vir[1]);
+ ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
+ r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
+ nr);
+
+ /* Setup INPUT_CNTL. */
+ R300_STATECHANGE(rmesa, vic);
+ rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+ rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+
+ /* Setup OUTPUT_VTX_FMT. */
+ R300_STATECHANGE(rmesa, vof);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_0] =
+ r300VAPOutputCntl0(ctx, OutputsWritten);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_1] =
+ r300VAPOutputCntl1(ctx, OutputsWritten);
rmesa->state.aos_count = nr;
@@ -625,3 +541,19 @@ void r300ReleaseArrays(GLcontext * ctx)
r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
}
}
+
+void r300EmitCacheFlush(r300ContextPtr rmesa)
+{
+ int cmd_reserved = 0;
+ int cmd_written = 0;
+
+ drm_radeon_cmd_header_t *cmd = NULL;
+
+ reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
+ e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+
+ reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
+ e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+}
diff --git a/r300/r300_emit.h b/r300/r300_emit.h
index 7be098f..5950539 100644
--- a/r300/r300_emit.h
+++ b/r300/r300_emit.h
@@ -44,22 +44,13 @@
#include "r300_cmdbuf.h"
#include "radeon_reg.h"
-/*
- * CP type-3 packets
- */
-#define RADEON_CP_PACKET3_UNK1B 0xC0001B00
-#define RADEON_CP_PACKET3_INDX_BUFFER 0xC0003300
-#define RADEON_CP_PACKET3_3D_DRAW_VBUF_2 0xC0003400
-#define RADEON_CP_PACKET3_3D_DRAW_IMMD_2 0xC0003500
-#define RADEON_CP_PACKET3_3D_DRAW_INDX_2 0xC0003600
-#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00
-#define RADEON_CP_PACKET3_3D_CLEAR_ZMASK 0xC0003202
-#define RADEON_CP_PACKET3_3D_CLEAR_CMASK 0xC0003802
-#define RADEON_CP_PACKET3_3D_CLEAR_HIZ 0xC0003702
-
+/* TODO: move these defines (and the ones from DRM) into r300_reg.h and sync up
+ * with DRM */
#define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
+#define CP_PACKET3( pkt, n ) \
+ (RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
-static __inline__ uint32_t cmdpacket0(int reg, int count)
+static INLINE uint32_t cmdpacket0(int reg, int count)
{
drm_r300_cmd_header_t cmd;
@@ -71,7 +62,7 @@ static __inline__ uint32_t cmdpacket0(int reg, int count)
return cmd.u;
}
-static __inline__ uint32_t cmdvpu(int addr, int count)
+static INLINE uint32_t cmdvpu(int addr, int count)
{
drm_r300_cmd_header_t cmd;
@@ -83,7 +74,21 @@ static __inline__ uint32_t cmdvpu(int addr, int count)
return cmd.u;
}
-static __inline__ uint32_t cmdpacket3(int packet)
+static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.r500fp.cmd_type = R300_CMD_R500FP;
+ cmd.r500fp.count = count;
+ cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8;
+ cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0;
+ cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0;
+ cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF);
+
+ return cmd.u;
+}
+
+static INLINE uint32_t cmdpacket3(int packet)
{
drm_r300_cmd_header_t cmd;
@@ -93,7 +98,7 @@ static __inline__ uint32_t cmdpacket3(int packet)
return cmd.u;
}
-static __inline__ uint32_t cmdcpdelay(unsigned short count)
+static INLINE uint32_t cmdcpdelay(unsigned short count)
{
drm_r300_cmd_header_t cmd;
@@ -103,7 +108,7 @@ static __inline__ uint32_t cmdcpdelay(unsigned short count)
return cmd.u;
}
-static __inline__ uint32_t cmdwait(unsigned char flags)
+static INLINE uint32_t cmdwait(unsigned char flags)
{
drm_r300_cmd_header_t cmd;
@@ -113,7 +118,7 @@ static __inline__ uint32_t cmdwait(unsigned char flags)
return cmd.u;
}
-static __inline__ uint32_t cmdpacify(void)
+static INLINE uint32_t cmdpacify(void)
{
drm_r300_cmd_header_t cmd;
@@ -175,6 +180,19 @@ static __inline__ uint32_t cmdpacify(void)
cmd[0].i = cmdvpu((dest), _n/4); \
} while (0);
+#define r500fp_start_fragment(dest, length) \
+ do { \
+ int _n; \
+ _n = (length); \
+ cmd = (drm_radeon_cmd_header_t*) \
+ r300AllocCmdBuf(rmesa, \
+ (_n+1), \
+ __FUNCTION__); \
+ cmd_reserved = _n+1; \
+ cmd_written =1; \
+ cmd[0].i = cmdr500fp((dest), _n/6, 0, 0); \
+ } while (0);
+
#define start_packet3(packet, count) \
{ \
int _n; \
@@ -200,7 +218,7 @@ static __inline__ uint32_t cmdpacify(void)
/**
* Must be sent to switch to 2d commands
*/
-void static inline end_3d(r300ContextPtr rmesa)
+void static INLINE end_3d(r300ContextPtr rmesa)
{
drm_radeon_cmd_header_t *cmd = NULL;
@@ -209,7 +227,7 @@ void static inline end_3d(r300ContextPtr rmesa)
cmd[0].header.cmd_type = R300_CMD_END3D;
}
-void static inline cp_delay(r300ContextPtr rmesa, unsigned short count)
+void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
{
drm_radeon_cmd_header_t *cmd = NULL;
@@ -218,7 +236,7 @@ void static inline cp_delay(r300ContextPtr rmesa, unsigned short count)
cmd[0].i = cmdcpdelay(count);
}
-void static inline cp_wait(r300ContextPtr rmesa, unsigned char flags)
+void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags)
{
drm_radeon_cmd_header_t *cmd = NULL;
@@ -234,5 +252,17 @@ void r300UseArrays(GLcontext * ctx);
#endif
extern void r300ReleaseArrays(GLcontext * ctx);
+extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
+extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
+
+extern void r300EmitCacheFlush(r300ContextPtr rmesa);
+
+extern GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
+ int *inputs, GLint * tab, GLuint nr);
+extern GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr);
+extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead);
+extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead);
+extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten);
+extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten);
#endif
diff --git a/r300/r300_fragprog.c b/r300/r300_fragprog.c
index cce8e68..453dda7 100644
--- a/r300/r300_fragprog.c
+++ b/r300/r300_fragprog.c
@@ -28,16 +28,12 @@
/**
* \file
*
- * \author Ben Skeggs <darktama@iinet.net.au>
+ * Fragment program compiler. Perform transformations on the intermediate
+ * representation until the program is in a form where we can translate
+ * it more or less directly into machine-readable form.
*
+ * \author Ben Skeggs <darktama@iinet.net.au>
* \author Jerome Glisse <j.glisse@gmail.com>
- *
- * \todo Depth write, WPOS/FOGC inputs
- *
- * \todo FogOption
- *
- * \todo Verify results of opcodes for accuracy, I've only checked them in
- * specific cases.
*/
#include "glheader.h"
@@ -49,1953 +45,233 @@
#include "r300_context.h"
#include "r300_fragprog.h"
-#include "r300_reg.h"
+#include "r300_fragprog_swizzle.h"
#include "r300_state.h"
-/*
- * Usefull macros and values
- */
-#define ERROR(fmt, args...) do { \
- fprintf(stderr, "%s::%s(): " fmt "\n", \
- __FILE__, __FUNCTION__, ##args); \
- fp->error = GL_TRUE; \
- } while(0)
-
-#define PFS_INVAL 0xFFFFFFFF
-#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
-
-#define SWIZZLE_XYZ 0
-#define SWIZZLE_XXX 1
-#define SWIZZLE_YYY 2
-#define SWIZZLE_ZZZ 3
-#define SWIZZLE_WWW 4
-#define SWIZZLE_YZX 5
-#define SWIZZLE_ZXY 6
-#define SWIZZLE_WZY 7
-#define SWIZZLE_111 8
-#define SWIZZLE_000 9
-#define SWIZZLE_HHH 10
-
-#define swizzle(r, x, y, z, w) do_swizzle(fp, r, \
- ((SWIZZLE_##x<<0)| \
- (SWIZZLE_##y<<3)| \
- (SWIZZLE_##z<<6)| \
- (SWIZZLE_##w<<9)), \
- 0)
-
-#define REG_TYPE_INPUT 0
-#define REG_TYPE_OUTPUT 1
-#define REG_TYPE_TEMP 2
-#define REG_TYPE_CONST 3
-
-#define REG_TYPE_SHIFT 0
-#define REG_INDEX_SHIFT 2
-#define REG_VSWZ_SHIFT 8
-#define REG_SSWZ_SHIFT 13
-#define REG_NEGV_SHIFT 18
-#define REG_NEGS_SHIFT 19
-#define REG_ABS_SHIFT 20
-#define REG_NO_USE_SHIFT 21 // Hack for refcounting
-#define REG_VALID_SHIFT 22 // Does the register contain a defined value?
-#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)?
-
-#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT)
-#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT)
-#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT)
-#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT)
-#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT)
-#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT)
-#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT)
-#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT)
-#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT)
-#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT)
-
-#define REG(type, index, vswz, sswz, nouse, valid, builtin) \
- (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \
- ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \
- ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \
- ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \
- ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \
- ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \
- ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
-#define REG_GET_TYPE(reg) \
- ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT)
-#define REG_GET_INDEX(reg) \
- ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT)
-#define REG_GET_VSWZ(reg) \
- ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT)
-#define REG_GET_SSWZ(reg) \
- ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT)
-#define REG_GET_NO_USE(reg) \
- ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT)
-#define REG_GET_VALID(reg) \
- ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT)
-#define REG_GET_BUILTIN(reg) \
- ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT)
-#define REG_SET_TYPE(reg, type) \
- reg = ((reg & ~REG_TYPE_MASK) | \
- ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK))
-#define REG_SET_INDEX(reg, index) \
- reg = ((reg & ~REG_INDEX_MASK) | \
- ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK))
-#define REG_SET_VSWZ(reg, vswz) \
- reg = ((reg & ~REG_VSWZ_MASK) | \
- ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK))
-#define REG_SET_SSWZ(reg, sswz) \
- reg = ((reg & ~REG_SSWZ_MASK) | \
- ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
-#define REG_SET_NO_USE(reg, nouse) \
- reg = ((reg & ~REG_NO_USE_MASK) | \
- ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK))
-#define REG_SET_VALID(reg, valid) \
- reg = ((reg & ~REG_VALID_MASK) | \
- ((valid << REG_VALID_SHIFT) & REG_VALID_MASK))
-#define REG_SET_BUILTIN(reg, builtin) \
- reg = ((reg & ~REG_BUILTIN_MASK) | \
- ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK))
-#define REG_ABS(reg) \
- reg = (reg | REG_ABS_MASK)
-#define REG_NEGV(reg) \
- reg = (reg | REG_NEGV_MASK)
-#define REG_NEGS(reg) \
- reg = (reg | REG_NEGS_MASK)
-
-/*
- * Datas structures for fragment program generation
- */
-
-/* description of r300 native hw instructions */
-static const struct {
- const char *name;
- int argc;
- int v_op;
- int s_op;
-} r300_fpop[] = {
- /* *INDENT-OFF* */
- {"MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD},
- {"DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4},
- {"DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4},
- {"MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN},
- {"MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX},
- {"CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP},
- {"FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC},
- {"EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2},
- {"LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2},
- {"RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP},
- {"RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ},
- {"REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL},
- {"CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL},
- /* *INDENT-ON* */
-};
-
-/* vector swizzles r300 can support natively, with a couple of
- * cases we handle specially
- *
- * REG_VSWZ/REG_SSWZ is an index into this table
- */
-
-/* mapping from SWIZZLE_* to r300 native values for scalar insns */
-#define SWIZZLE_HALF 6
-
-#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
- SWIZZLE_##y, \
- SWIZZLE_##z, \
- SWIZZLE_ZERO))
-/* native swizzles */
-static const struct r300_pfs_swizzle {
- GLuint hash; /* swizzle value this matches */
- GLuint base; /* base value for hw swizzle */
- GLuint stride; /* difference in base between arg0/1/2 */
- GLuint flags;
-} v_swiz[] = {
- /* *INDENT-OFF* */
- {MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR},
- {MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH},
- {MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0},
- {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0},
- {MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0},
- {PFS_INVAL, 0, 0, 0},
- /* *INDENT-ON* */
-};
-
-/* used during matching of non-native swizzles */
-#define SWZ_X_MASK (7 << 0)
-#define SWZ_Y_MASK (7 << 3)
-#define SWZ_Z_MASK (7 << 6)
-#define SWZ_W_MASK (7 << 9)
-static const struct {
- GLuint hash; /* used to mask matching swizzle components */
- int mask; /* actual outmask */
- int count; /* count of components matched */
-} s_mask[] = {
- /* *INDENT-OFF* */
- {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3},
- {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2},
- {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2},
- {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2},
- {SWZ_X_MASK, 1, 1},
- {SWZ_Y_MASK, 2, 1},
- {SWZ_Z_MASK, 4, 1},
- {PFS_INVAL, PFS_INVAL, PFS_INVAL}
- /* *INDENT-ON* */
-};
-
-static const struct {
- int base; /* hw value of swizzle */
- int stride; /* difference between SRC0/1/2 */
- GLuint flags;
-} s_swiz[] = {
- /* *INDENT-OFF* */
- {R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR},
- {R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR},
- {R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR},
- {R300_FPI2_ARGA_SRC0A, 1, SLOT_SRC_SCALAR},
- {R300_FPI2_ARGA_ZERO, 0, 0},
- {R300_FPI2_ARGA_ONE, 0, 0},
- {R300_FPI2_ARGA_HALF, 0, 0}
- /* *INDENT-ON* */
-};
-
-/* boiler-plate reg, for convenience */
-static const GLuint undef = REG(REG_TYPE_TEMP,
- 0,
- SWIZZLE_XYZ,
- SWIZZLE_W,
- GL_FALSE,
- GL_FALSE,
- GL_FALSE);
-
-/* constant one source */
-static const GLuint pfs_one = REG(REG_TYPE_CONST,
- 0,
- SWIZZLE_111,
- SWIZZLE_ONE,
- GL_FALSE,
- GL_TRUE,
- GL_TRUE);
-
-/* constant half source */
-static const GLuint pfs_half = REG(REG_TYPE_CONST,
- 0,
- SWIZZLE_HHH,
- SWIZZLE_HALF,
- GL_FALSE,
- GL_TRUE,
- GL_TRUE);
-
-/* constant zero source */
-static const GLuint pfs_zero = REG(REG_TYPE_CONST,
- 0,
- SWIZZLE_000,
- SWIZZLE_ZERO,
- GL_FALSE,
- GL_TRUE,
- GL_TRUE);
+#include "radeon_nqssadce.h"
+#include "radeon_program_alu.h"
-/*
- * Common functions prototypes
- */
-static void dump_program(struct r300_fragment_program *fp);
-static void emit_arith(struct r300_fragment_program *fp, int op,
- GLuint dest, int mask,
- GLuint src0, GLuint src1, GLuint src2, int flags);
-/**
- * Get an R300 temporary that can be written to in the given slot.
- */
-static int get_hw_temp(struct r300_fragment_program *fp, int slot)
+static void reset_srcreg(struct prog_src_register* reg)
{
- COMPILE_STATE;
- int r;
-
- for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
- if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot)
- break;
- }
-
- if (r >= PFS_NUM_TEMP_REGS) {
- ERROR("Out of hardware temps\n");
- return 0;
- }
- // Reserved is used to avoid the following scenario:
- // R300 temporary X is first assigned to Mesa temporary Y during vector ops
- // R300 temporary X is then assigned to Mesa temporary Z for further vector ops
- // Then scalar ops on Mesa temporary Z are emitted and move back in time
- // to overwrite the value of temporary Y.
- // End scenario.
- cs->hwtemps[r].reserved = cs->hwtemps[r].free;
- cs->hwtemps[r].free = -1;
-
- // Reset to some value that won't mess things up when the user
- // tries to read from a temporary that hasn't been assigned a value yet.
- // In the normal case, vector_valid and scalar_valid should be set to
- // a sane value by the first emit that writes to this temporary.
- cs->hwtemps[r].vector_valid = 0;
- cs->hwtemps[r].scalar_valid = 0;
-
- if (r > fp->max_temp_idx)
- fp->max_temp_idx = r;
-
- return r;
+ _mesa_bzero(reg, sizeof(*reg));
+ reg->Swizzle = SWIZZLE_NOOP;
}
-/**
- * Get an R300 temporary that will act as a TEX destination register.
- */
-static int get_hw_temp_tex(struct r300_fragment_program *fp)
+static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
{
- COMPILE_STATE;
- int r;
-
- for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
- if (cs->used_in_node & (1 << r))
- continue;
-
- // Note: Be very careful here
- if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0)
- break;
- }
-
- if (r >= PFS_NUM_TEMP_REGS)
- return get_hw_temp(fp, 0); /* Will cause an indirection */
-
- cs->hwtemps[r].reserved = cs->hwtemps[r].free;
- cs->hwtemps[r].free = -1;
-
- // Reset to some value that won't mess things up when the user
- // tries to read from a temporary that hasn't been assigned a value yet.
- // In the normal case, vector_valid and scalar_valid should be set to
- // a sane value by the first emit that writes to this temporary.
- cs->hwtemps[r].vector_valid = cs->nrslots;
- cs->hwtemps[r].scalar_valid = cs->nrslots;
-
- if (r > fp->max_temp_idx)
- fp->max_temp_idx = r;
-
- return r;
-}
-
-/**
- * Mark the given hardware register as free.
- */
-static void free_hw_temp(struct r300_fragment_program *fp, int idx)
-{
- COMPILE_STATE;
-
- // Be very careful here. Consider sequences like
- // MAD r0, r1,r2,r3
- // TEX r4, ...
- // The TEX instruction may be moved in front of the MAD instruction
- // due to the way nodes work. We don't want to alias r1 and r4 in
- // this case.
- // I'm certain the register allocation could be further sanitized,
- // but it's tricky because of stuff that can happen inside emit_tex
- // and emit_arith.
- cs->hwtemps[idx].free = cs->nrslots + 1;
-}
-
-/**
- * Create a new Mesa temporary register.
- */
-static GLuint get_temp_reg(struct r300_fragment_program *fp)
-{
- COMPILE_STATE;
- GLuint r = undef;
- GLuint index;
-
- index = ffs(~cs->temp_in_use);
- if (!index) {
- ERROR("Out of program temps\n");
- return r;
- }
-
- cs->temp_in_use |= (1 << --index);
- cs->temps[index].refcount = 0xFFFFFFFF;
- cs->temps[index].reg = -1;
-
- REG_SET_TYPE(r, REG_TYPE_TEMP);
- REG_SET_INDEX(r, index);
- REG_SET_VALID(r, GL_TRUE);
- return r;
-}
-
-/**
- * Create a new Mesa temporary register that will act as the destination
- * register for a texture read.
- */
-static GLuint get_temp_reg_tex(struct r300_fragment_program *fp)
-{
- COMPILE_STATE;
- GLuint r = undef;
- GLuint index;
-
- index = ffs(~cs->temp_in_use);
- if (!index) {
- ERROR("Out of program temps\n");
- return r;
- }
-
- cs->temp_in_use |= (1 << --index);
- cs->temps[index].refcount = 0xFFFFFFFF;
- cs->temps[index].reg = get_hw_temp_tex(fp);
-
- REG_SET_TYPE(r, REG_TYPE_TEMP);
- REG_SET_INDEX(r, index);
- REG_SET_VALID(r, GL_TRUE);
- return r;
-}
-
-/**
- * Free a Mesa temporary and the associated R300 temporary.
- */
-static void free_temp(struct r300_fragment_program *fp, GLuint r)
-{
- COMPILE_STATE;
- GLuint index = REG_GET_INDEX(r);
-
- if (!(cs->temp_in_use & (1 << index)))
- return;
+ gl_state_index fail_value_tokens[STATE_LENGTH] = {
+ STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
+ };
+ struct prog_src_register reg = { 0, };
- if (REG_GET_TYPE(r) == REG_TYPE_TEMP) {
- free_hw_temp(fp, cs->temps[index].reg);
- cs->temps[index].reg = -1;
- cs->temp_in_use &= ~(1 << index);
- } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) {
- free_hw_temp(fp, cs->inputs[index].reg);
- cs->inputs[index].reg = -1;
- }
+ fail_value_tokens[2] = tmu;
+ reg.File = PROGRAM_STATE_VAR;
+ reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
+ reg.Swizzle = SWIZZLE_WWWW;
+ return reg;
}
/**
- * Emit a hardware constant/parameter.
+ * Transform TEX, TXP, TXB, and KIL instructions in the following way:
+ * - premultiply texture coordinates for RECT
+ * - extract operand swizzles
+ * - introduce a temporary register when write masks are needed
*
- * \p cp Stable pointer to an array of 4 floats.
- * The pointer must be stable in the sense that it remains to be valid
- * and hold the contents of the constant/parameter throughout the lifetime
- * of the fragment program (actually, up until the next time the fragment
- * program is translated).
- */
-static GLuint emit_const4fv(struct r300_fragment_program *fp,
- const GLfloat * cp)
-{
- GLuint reg = undef;
- int index;
-
- for (index = 0; index < fp->const_nr; ++index) {
- if (fp->constant[index] == cp)
- break;
- }
-
- if (index >= fp->const_nr) {
- if (index >= PFS_NUM_CONST_REGS) {
- ERROR("Out of hw constants!\n");
- return reg;
- }
-
- fp->const_nr++;
- fp->constant[index] = cp;
- }
-
- REG_SET_TYPE(reg, REG_TYPE_CONST);
- REG_SET_INDEX(reg, index);
- REG_SET_VALID(reg, GL_TRUE);
- return reg;
-}
-
-static inline GLuint negate(GLuint r)
-{
- REG_NEGS(r);
- REG_NEGV(r);
- return r;
-}
-
-/* Hack, to prevent clobbering sources used multiple times when
- * emulating non-native instructions
+ * \todo If/when r5xx uses the radeon_program architecture, this can probably
+ * be reused.
*/
-static inline GLuint keep(GLuint r)
-{
- REG_SET_NO_USE(r, GL_TRUE);
- return r;
-}
-
-static inline GLuint absolute(GLuint r)
+static GLboolean transform_TEX(
+ struct radeon_transform_context *t,
+ struct prog_instruction* orig_inst, void* data)
{
- REG_ABS(r);
- return r;
-}
-
-static int swz_native(struct r300_fragment_program *fp,
- GLuint src, GLuint * r, GLuint arbneg)
-{
- /* Native swizzle, handle negation */
- src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT);
-
- if ((arbneg & 0x7) == 0x0) {
- src = src & ~REG_NEGV_MASK;
- *r = src;
- } else if ((arbneg & 0x7) == 0x7) {
- src |= REG_NEGV_MASK;
- *r = src;
- } else {
- if (!REG_GET_VALID(*r))
- *r = get_temp_reg(fp);
- src |= REG_NEGV_MASK;
- emit_arith(fp,
- PFS_OP_MAD,
- *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0);
- src = src & ~REG_NEGV_MASK;
- emit_arith(fp,
- PFS_OP_MAD,
- *r,
- (arbneg ^ 0x7) | WRITEMASK_W,
- src, pfs_one, pfs_zero, 0);
- }
-
- return 3;
-}
-
-static int swz_emit_partial(struct r300_fragment_program *fp,
- GLuint src,
- GLuint * r, int mask, int mc, GLuint arbneg)
-{
- GLuint tmp;
- GLuint wmask = 0;
-
- if (!REG_GET_VALID(*r))
- *r = get_temp_reg(fp);
-
- /* A partial match, VSWZ/mask define what parts of the
- * desired swizzle we match
- */
- if (mc + s_mask[mask].count == 3) {
- wmask = WRITEMASK_W;
- src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT;
- }
-
- tmp = arbneg & s_mask[mask].mask;
- if (tmp) {
- tmp = tmp ^ s_mask[mask].mask;
- if (tmp) {
- emit_arith(fp,
- PFS_OP_MAD,
- *r,
- arbneg & s_mask[mask].mask,
- keep(src) | REG_NEGV_MASK,
- pfs_one, pfs_zero, 0);
- if (!wmask) {
- REG_SET_NO_USE(src, GL_TRUE);
- } else {
- REG_SET_NO_USE(src, GL_FALSE);
- }
- emit_arith(fp,
- PFS_OP_MAD,
- *r, tmp | wmask, src, pfs_one, pfs_zero, 0);
- } else {
- if (!wmask) {
- REG_SET_NO_USE(src, GL_TRUE);
- } else {
- REG_SET_NO_USE(src, GL_FALSE);
- }
- emit_arith(fp,
- PFS_OP_MAD,
- *r,
- (arbneg & s_mask[mask].mask) | wmask,
- src | REG_NEGV_MASK, pfs_one, pfs_zero, 0);
- }
- } else {
- if (!wmask) {
- REG_SET_NO_USE(src, GL_TRUE);
- } else {
- REG_SET_NO_USE(src, GL_FALSE);
- }
- emit_arith(fp, PFS_OP_MAD,
- *r,
- s_mask[mask].mask | wmask,
- src, pfs_one, pfs_zero, 0);
- }
-
- return s_mask[mask].count;
-}
-
-static GLuint do_swizzle(struct r300_fragment_program *fp,
- GLuint src, GLuint arbswz, GLuint arbneg)
-{
- GLuint r = undef;
- GLuint vswz;
- int c_mask = 0;
- int v_match = 0;
-
- /* If swizzling from something without an XYZW native swizzle,
- * emit result to a temp, and do new swizzle from the temp.
- */
-#if 0
- if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) {
- GLuint temp = get_temp_reg(fp);
- emit_arith(fp,
- PFS_OP_MAD,
- temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0);
- src = temp;
- }
-#endif
-
- if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) {
- GLuint vsrcswz =
- (v_swiz[REG_GET_VSWZ(src)].
- hash & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK)) |
- REG_GET_SSWZ(src) << 9;
- GLint i;
-
- GLuint newswz = 0;
- GLuint offset;
- for (i = 0; i < 4; ++i) {
- offset = GET_SWZ(arbswz, i);
-
- newswz |=
- (offset <= 3) ? GET_SWZ(vsrcswz,
- offset) << i *
- 3 : offset << i * 3;
- }
-
- arbswz = newswz & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK);
- REG_SET_SSWZ(src, GET_SWZ(newswz, 3));
- } else {
- /* set scalar swizzling */
- REG_SET_SSWZ(src, GET_SWZ(arbswz, 3));
-
- }
- do {
- vswz = REG_GET_VSWZ(src);
- do {
- int chash;
-
- REG_SET_VSWZ(src, vswz);
- chash = v_swiz[REG_GET_VSWZ(src)].hash &
- s_mask[c_mask].hash;
-
- if (chash == (arbswz & s_mask[c_mask].hash)) {
- if (s_mask[c_mask].count == 3) {
- v_match += swz_native(fp,
- src, &r, arbneg);
- } else {
- v_match += swz_emit_partial(fp,
- src,
- &r,
- c_mask,
- v_match,
- arbneg);
- }
-
- if (v_match == 3)
- return r;
-
- /* Fill with something invalid.. all 0's was
- * wrong before, matched SWIZZLE_X. So all
- * 1's will be okay for now
- */
- arbswz |= (PFS_INVAL & s_mask[c_mask].hash);
- }
- } while (v_swiz[++vswz].hash != PFS_INVAL);
- REG_SET_VSWZ(src, SWIZZLE_XYZ);
- } while (s_mask[++c_mask].hash != PFS_INVAL);
-
- ERROR("should NEVER get here\n");
- return r;
-}
-
-static GLuint t_src(struct r300_fragment_program *fp,
- struct prog_src_register fpsrc)
-{
- GLuint r = undef;
-
- switch (fpsrc.File) {
- case PROGRAM_TEMPORARY:
- REG_SET_INDEX(r, fpsrc.Index);
- REG_SET_VALID(r, GL_TRUE);
- REG_SET_TYPE(r, REG_TYPE_TEMP);
- break;
- case PROGRAM_INPUT:
- REG_SET_INDEX(r, fpsrc.Index);
- REG_SET_VALID(r, GL_TRUE);
- REG_SET_TYPE(r, REG_TYPE_INPUT);
- break;
- case PROGRAM_LOCAL_PARAM:
- r = emit_const4fv(fp,
- fp->mesa_program.Base.LocalParams[fpsrc.
- Index]);
- break;
- case PROGRAM_ENV_PARAM:
- r = emit_const4fv(fp,
- fp->ctx->FragmentProgram.Parameters[fpsrc.
- Index]);
- break;
- case PROGRAM_STATE_VAR:
- case PROGRAM_NAMED_PARAM:
- r = emit_const4fv(fp,
- fp->mesa_program.Base.Parameters->
- ParameterValues[fpsrc.Index]);
- break;
- default:
- ERROR("unknown SrcReg->File %x\n", fpsrc.File);
- return r;
- }
-
- /* no point swizzling ONE/ZERO/HALF constants... */
- if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO)
- r = do_swizzle(fp, r, fpsrc.Swizzle, fpsrc.NegateBase);
- return r;
-}
-
-static GLuint t_scalar_src(struct r300_fragment_program *fp,
- struct prog_src_register fpsrc)
-{
- struct prog_src_register src = fpsrc;
- int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */
-
- src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9));
-
- return t_src(fp, src);
-}
-
-static GLuint t_dst(struct r300_fragment_program *fp,
- struct prog_dst_register dest)
-{
- GLuint r = undef;
-
- switch (dest.File) {
- case PROGRAM_TEMPORARY:
- REG_SET_INDEX(r, dest.Index);
- REG_SET_VALID(r, GL_TRUE);
- REG_SET_TYPE(r, REG_TYPE_TEMP);
- return r;
- case PROGRAM_OUTPUT:
- REG_SET_TYPE(r, REG_TYPE_OUTPUT);
- switch (dest.Index) {
- case FRAG_RESULT_COLR:
- case FRAG_RESULT_DEPR:
- REG_SET_INDEX(r, dest.Index);
- REG_SET_VALID(r, GL_TRUE);
- return r;
- default:
- ERROR("Bad DstReg->Index 0x%x\n", dest.Index);
- return r;
- }
- default:
- ERROR("Bad DstReg->File 0x%x\n", dest.File);
- return r;
- }
-}
-
-static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex)
-{
- COMPILE_STATE;
- int idx;
- int index = REG_GET_INDEX(src);
-
- switch (REG_GET_TYPE(src)) {
- case REG_TYPE_TEMP:
- /* NOTE: if reg==-1 here, a source is being read that
- * hasn't been written to. Undefined results.
- */
- if (cs->temps[index].reg == -1)
- cs->temps[index].reg = get_hw_temp(fp, cs->nrslots);
-
- idx = cs->temps[index].reg;
-
- if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0))
- free_temp(fp, src);
- break;
- case REG_TYPE_INPUT:
- idx = cs->inputs[index].reg;
-
- if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0))
- free_hw_temp(fp, cs->inputs[index].reg);
- break;
- case REG_TYPE_CONST:
- return (index | SRC_CONST);
- default:
- ERROR("Invalid type for source reg\n");
- return (0 | SRC_CONST);
- }
+ struct r300_fragment_program_compiler *compiler =
+ (struct r300_fragment_program_compiler*)data;
+ struct prog_instruction inst = *orig_inst;
+ struct prog_instruction* tgt;
+ GLboolean destredirect = GL_FALSE;
+
+ if (inst.Opcode != OPCODE_TEX &&
+ inst.Opcode != OPCODE_TXB &&
+ inst.Opcode != OPCODE_TXP &&
+ inst.Opcode != OPCODE_KIL)
+ return GL_FALSE;
- if (!tex)
- cs->used_in_node |= (1 << idx);
+ if (inst.Opcode != OPCODE_KIL &&
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
- return idx;
-}
+ if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
+ tgt = radeonAppendInstructions(t->Program, 1);
-static int t_hw_dst(struct r300_fragment_program *fp,
- GLuint dest, GLboolean tex, int slot)
-{
- COMPILE_STATE;
- int idx;
- GLuint index = REG_GET_INDEX(dest);
- assert(REG_GET_VALID(dest));
-
- switch (REG_GET_TYPE(dest)) {
- case REG_TYPE_TEMP:
- if (cs->temps[REG_GET_INDEX(dest)].reg == -1) {
- if (!tex) {
- cs->temps[index].reg = get_hw_temp(fp, slot);
+ tgt->Opcode = OPCODE_MOV;
+ tgt->DstReg = inst.DstReg;
+ if (comparefunc == GL_ALWAYS) {
+ tgt->SrcReg[0].File = PROGRAM_BUILTIN;
+ tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
} else {
- cs->temps[index].reg = get_hw_temp_tex(fp);
+ tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
}
+ return GL_TRUE;
}
- idx = cs->temps[index].reg;
-
- if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0))
- free_temp(fp, dest);
-
- cs->dest_in_node |= (1 << idx);
- cs->used_in_node |= (1 << idx);
- break;
- case REG_TYPE_OUTPUT:
- switch (index) {
- case FRAG_RESULT_COLR:
- fp->node[fp->cur_node].flags |=
- R300_PFS_NODE_OUTPUT_COLOR;
- break;
- case FRAG_RESULT_DEPR:
- fp->node[fp->cur_node].flags |=
- R300_PFS_NODE_OUTPUT_DEPTH;
- break;
- }
- return index;
- break;
- default:
- ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest));
- return 0;
- }
- return idx;
-}
-
-static void emit_nop(struct r300_fragment_program *fp)
-{
- COMPILE_STATE;
-
- if (cs->nrslots >= PFS_MAX_ALU_INST) {
- ERROR("Out of ALU instruction slots\n");
- return;
+ inst.DstReg.File = PROGRAM_TEMPORARY;
+ inst.DstReg.Index = radeonFindFreeTemporary(t);
+ inst.DstReg.WriteMask = WRITEMASK_XYZW;
}
- fp->alu.inst[cs->nrslots].inst0 = NOP_INST0;
- fp->alu.inst[cs->nrslots].inst1 = NOP_INST1;
- fp->alu.inst[cs->nrslots].inst2 = NOP_INST2;
- fp->alu.inst[cs->nrslots].inst3 = NOP_INST3;
- cs->nrslots++;
-}
-
-static void emit_tex(struct r300_fragment_program *fp,
- struct prog_instruction *fpi, int opcode)
-{
- COMPILE_STATE;
- GLuint coord = t_src(fp, fpi->SrcReg[0]);
- GLuint dest = undef, rdest = undef;
- GLuint din, uin;
- int unit = fpi->TexSrcUnit;
- int hwsrc, hwdest;
- GLuint tempreg = 0;
-
- uin = cs->used_in_node;
- din = cs->dest_in_node;
-
- /* Resolve source/dest to hardware registers */
- if (opcode != R300_FPITX_OP_KIL) {
- if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) {
- /**
- * Hardware uses [0..1]x[0..1] range for rectangle textures
- * instead of [0..Width]x[0..Height].
- * Add a scaling instruction.
- *
- * \todo Refactor this once we have proper rewriting/optimization
- * support for programs.
- */
- gl_state_index tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
- 0
- };
- int factor_index;
- GLuint factorreg;
-
- tokens[2] = unit;
- factor_index =
- _mesa_add_state_reference(fp->mesa_program.Base.
- Parameters, tokens);
- factorreg =
- emit_const4fv(fp,
- fp->mesa_program.Base.Parameters->
- ParameterValues[factor_index]);
- tempreg = keep(get_temp_reg(fp));
-
- emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
- coord, factorreg, pfs_zero, 0);
-
- /* Ensure correct node indirection */
- uin = cs->used_in_node;
- din = cs->dest_in_node;
-
- hwsrc = t_hw_src(fp, tempreg, GL_TRUE);
- } else {
- hwsrc = t_hw_src(fp, coord, GL_TRUE);
- }
-
- dest = t_dst(fp, fpi->DstReg);
- /* r300 doesn't seem to be able to do TEX->output reg */
- if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
- rdest = dest;
- dest = get_temp_reg_tex(fp);
- }
- hwdest =
- t_hw_dst(fp, dest, GL_TRUE,
- fp->node[fp->cur_node].alu_offset);
-
- /* Use a temp that hasn't been used in this node, rather
- * than causing an indirection
- */
- if (uin & (1 << hwdest)) {
- free_hw_temp(fp, hwdest);
- hwdest = get_hw_temp_tex(fp);
- cs->temps[REG_GET_INDEX(dest)].reg = hwdest;
- }
- } else {
- hwdest = 0;
- unit = 0;
- hwsrc = t_hw_src(fp, coord, GL_TRUE);
- }
-
- /* Indirection if source has been written in this node, or if the
- * dest has been read/written in this node
+ /* Hardware uses [0..1]x[0..1] range for rectangle textures
+ * instead of [0..Width]x[0..Height].
+ * Add a scaling instruction.
*/
- if ((REG_GET_TYPE(coord) != REG_TYPE_CONST &&
- (din & (1 << hwsrc))) || (uin & (1 << hwdest))) {
-
- /* Finish off current node */
- if (fp->node[fp->cur_node].alu_offset == cs->nrslots)
- emit_nop(fp);
-
- fp->node[fp->cur_node].alu_end =
- cs->nrslots - fp->node[fp->cur_node].alu_offset - 1;
- assert(fp->node[fp->cur_node].alu_end >= 0);
-
- if (++fp->cur_node >= PFS_MAX_TEX_INDIRECT) {
- ERROR("too many levels of texture indirection\n");
- return;
- }
-
- /* Start new node */
- fp->node[fp->cur_node].tex_offset = fp->tex.length;
- fp->node[fp->cur_node].alu_offset = cs->nrslots;
- fp->node[fp->cur_node].tex_end = -1;
- fp->node[fp->cur_node].alu_end = -1;
- fp->node[fp->cur_node].flags = 0;
- cs->used_in_node = 0;
- cs->dest_in_node = 0;
- }
+ if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
+ gl_state_index tokens[STATE_LENGTH] = {
+ STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
+ 0
+ };
- if (fp->cur_node == 0)
- fp->first_node_has_tex = 1;
+ int tempreg = radeonFindFreeTemporary(t);
+ int factor_index;
- fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT)
- | (hwdest << R300_FPITX_DST_SHIFT)
- | (unit << R300_FPITX_IMAGE_SHIFT)
- /* not entirely sure about this */
- | (opcode << R300_FPITX_OPCODE_SHIFT);
+ tokens[2] = inst.TexSrcUnit;
+ factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);
- cs->dest_in_node |= (1 << hwdest);
- if (REG_GET_TYPE(coord) != REG_TYPE_CONST)
- cs->used_in_node |= (1 << hwsrc);
+ tgt = radeonAppendInstructions(t->Program, 1);
- fp->node[fp->cur_node].tex_end++;
+ tgt->Opcode = OPCODE_MUL;
+ tgt->DstReg.File = PROGRAM_TEMPORARY;
+ tgt->DstReg.Index = tempreg;
+ tgt->SrcReg[0] = inst.SrcReg[0];
+ tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
+ tgt->SrcReg[1].Index = factor_index;
- /* Copy from temp to output if needed */
- if (REG_GET_VALID(rdest)) {
- emit_arith(fp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest,
- pfs_one, pfs_zero, 0);
- free_temp(fp, dest);
+ reset_srcreg(&inst.SrcReg[0]);
+ inst.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst.SrcReg[0].Index = tempreg;
}
- /* Free temp register */
- if (tempreg != 0)
- free_temp(fp, tempreg);
-}
-
-/**
- * Returns the first slot where we could possibly allow writing to dest,
- * according to register allocation.
- */
-static int get_earliest_allowed_write(struct r300_fragment_program *fp,
- GLuint dest, int mask)
-{
- COMPILE_STATE;
- int idx;
- int pos;
- GLuint index = REG_GET_INDEX(dest);
- assert(REG_GET_VALID(dest));
-
- switch (REG_GET_TYPE(dest)) {
- case REG_TYPE_TEMP:
- if (cs->temps[index].reg == -1)
- return 0;
-
- idx = cs->temps[index].reg;
- break;
- case REG_TYPE_OUTPUT:
- return 0;
- default:
- ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest));
- return 0;
- }
-
- pos = cs->hwtemps[idx].reserved;
- if (mask & WRITEMASK_XYZ) {
- if (pos < cs->hwtemps[idx].vector_lastread)
- pos = cs->hwtemps[idx].vector_lastread;
- }
- if (mask & WRITEMASK_W) {
- if (pos < cs->hwtemps[idx].scalar_lastread)
- pos = cs->hwtemps[idx].scalar_lastread;
- }
-
- return pos;
-}
+ if (inst.Opcode != OPCODE_KIL) {
+ if (inst.DstReg.File != PROGRAM_TEMPORARY ||
+ inst.DstReg.WriteMask != WRITEMASK_XYZW) {
+ int tempreg = radeonFindFreeTemporary(t);
-/**
- * Allocates a slot for an ALU instruction that can consist of
- * a vertex part or a scalar part or both.
- *
- * Sources from src (src[0] to src[argc-1]) are added to the slot in the
- * appropriate position (vector and/or scalar), and their positions are
- * recorded in the srcpos array.
- *
- * This function emits instruction code for the source fetch and the
- * argument selection. It does not emit instruction code for the
- * opcode or the destination selection.
- *
- * @return the index of the slot
- */
-static int find_and_prepare_slot(struct r300_fragment_program *fp,
- GLboolean emit_vop,
- GLboolean emit_sop,
- int argc, GLuint * src, GLuint dest, int mask)
-{
- COMPILE_STATE;
- int hwsrc[3];
- int srcpos[3];
- unsigned int used;
- int tempused;
- int tempvsrc[3];
- int tempssrc[3];
- int pos;
- int regnr;
- int i, j;
-
- // Determine instruction slots, whether sources are required on
- // vector or scalar side, and the smallest slot number where
- // all source registers are available
- used = 0;
- if (emit_vop)
- used |= SLOT_OP_VECTOR;
- if (emit_sop)
- used |= SLOT_OP_SCALAR;
-
- pos = get_earliest_allowed_write(fp, dest, mask);
-
- if (fp->node[fp->cur_node].alu_offset > pos)
- pos = fp->node[fp->cur_node].alu_offset;
- for (i = 0; i < argc; ++i) {
- if (!REG_GET_BUILTIN(src[i])) {
- if (emit_vop)
- used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i;
- if (emit_sop)
- used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i;
- }
-
- hwsrc[i] = t_hw_src(fp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */
- regnr = hwsrc[i] & 31;
-
- if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) {
- if (used & (SLOT_SRC_VECTOR << i)) {
- if (cs->hwtemps[regnr].vector_valid > pos)
- pos = cs->hwtemps[regnr].vector_valid;
- }
- if (used & (SLOT_SRC_SCALAR << i)) {
- if (cs->hwtemps[regnr].scalar_valid > pos)
- pos = cs->hwtemps[regnr].scalar_valid;
- }
+ inst.DstReg.File = PROGRAM_TEMPORARY;
+ inst.DstReg.Index = tempreg;
+ inst.DstReg.WriteMask = WRITEMASK_XYZW;
+ destredirect = GL_TRUE;
}
}
- // Find a slot that fits
- for (;; ++pos) {
- if (cs->slot[pos].used & used & SLOT_OP_BOTH)
- continue;
-
- if (pos >= cs->nrslots) {
- if (cs->nrslots >= PFS_MAX_ALU_INST) {
- ERROR("Out of ALU instruction slots\n");
- return -1;
- }
-
- fp->alu.inst[pos].inst0 = NOP_INST0;
- fp->alu.inst[pos].inst1 = NOP_INST1;
- fp->alu.inst[pos].inst2 = NOP_INST2;
- fp->alu.inst[pos].inst3 = NOP_INST3;
-
- cs->nrslots++;
- }
- // Note: When we need both parts (vector and scalar) of a source,
- // we always try to put them into the same position. This makes the
- // code easier to read, and it is optimal (i.e. one doesn't gain
- // anything by splitting the parts).
- // It also avoids headaches with swizzles that access both parts (i.e WXY)
- tempused = cs->slot[pos].used;
- for (i = 0; i < 3; ++i) {
- tempvsrc[i] = cs->slot[pos].vsrc[i];
- tempssrc[i] = cs->slot[pos].ssrc[i];
- }
-
- for (i = 0; i < argc; ++i) {
- int flags = (used >> i) & SLOT_SRC_BOTH;
-
- if (!flags) {
- srcpos[i] = 0;
- continue;
- }
-
- for (j = 0; j < 3; ++j) {
- if ((tempused >> j) & flags & SLOT_SRC_VECTOR) {
- if (tempvsrc[j] != hwsrc[i])
- continue;
- }
-
- if ((tempused >> j) & flags & SLOT_SRC_SCALAR) {
- if (tempssrc[j] != hwsrc[i])
- continue;
- }
-
- break;
- }
-
- if (j == 3)
- break;
-
- srcpos[i] = j;
- tempused |= flags << j;
- if (flags & SLOT_SRC_VECTOR)
- tempvsrc[j] = hwsrc[i];
- if (flags & SLOT_SRC_SCALAR)
- tempssrc[j] = hwsrc[i];
- }
-
- if (i == argc)
- break;
- }
-
- // Found a slot, reserve it
- cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH);
- for (i = 0; i < 3; ++i) {
- cs->slot[pos].vsrc[i] = tempvsrc[i];
- cs->slot[pos].ssrc[i] = tempssrc[i];
- }
-
- for (i = 0; i < argc; ++i) {
- if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) {
- int regnr = hwsrc[i] & 31;
-
- if (used & (SLOT_SRC_VECTOR << i)) {
- if (cs->hwtemps[regnr].vector_lastread < pos)
- cs->hwtemps[regnr].vector_lastread =
- pos;
- }
- if (used & (SLOT_SRC_SCALAR << i)) {
- if (cs->hwtemps[regnr].scalar_lastread < pos)
- cs->hwtemps[regnr].scalar_lastread =
- pos;
- }
- }
- }
-
- // Emit the source fetch code
- fp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK;
- fp->alu.inst[pos].inst1 |=
- ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) |
- (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) |
- (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT));
-
- fp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK;
- fp->alu.inst[pos].inst3 |=
- ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) |
- (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) |
- (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT));
-
- // Emit the argument selection code
- if (emit_vop) {
- int swz[3];
-
- for (i = 0; i < 3; ++i) {
- if (i < argc) {
- swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base +
- (srcpos[i] *
- v_swiz[REG_GET_VSWZ(src[i])].
- stride)) | ((src[i] & REG_NEGV_MASK)
- ? ARG_NEG : 0) | ((src[i]
- &
- REG_ABS_MASK)
- ?
- ARG_ABS
- : 0);
- } else {
- swz[i] = R300_FPI0_ARGC_ZERO;
- }
- }
-
- fp->alu.inst[pos].inst0 &=
- ~(R300_FPI0_ARG0C_MASK | R300_FPI0_ARG1C_MASK |
- R300_FPI0_ARG2C_MASK);
- fp->alu.inst[pos].inst0 |=
- (swz[0] << R300_FPI0_ARG0C_SHIFT) | (swz[1] <<
- R300_FPI0_ARG1C_SHIFT)
- | (swz[2] << R300_FPI0_ARG2C_SHIFT);
- }
-
- if (emit_sop) {
- int swz[3];
-
- for (i = 0; i < 3; ++i) {
- if (i < argc) {
- swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base +
- (srcpos[i] *
- s_swiz[REG_GET_SSWZ(src[i])].
- stride)) | ((src[i] & REG_NEGV_MASK)
- ? ARG_NEG : 0) | ((src[i]
- &
- REG_ABS_MASK)
- ?
- ARG_ABS
- : 0);
- } else {
- swz[i] = R300_FPI2_ARGA_ZERO;
- }
- }
-
- fp->alu.inst[pos].inst2 &=
- ~(R300_FPI2_ARG0A_MASK | R300_FPI2_ARG1A_MASK |
- R300_FPI2_ARG2A_MASK);
- fp->alu.inst[pos].inst2 |=
- (swz[0] << R300_FPI2_ARG0A_SHIFT) | (swz[1] <<
- R300_FPI2_ARG1A_SHIFT)
- | (swz[2] << R300_FPI2_ARG2A_SHIFT);
- }
-
- return pos;
-}
-
-/**
- * Append an ALU instruction to the instruction list.
- */
-static void emit_arith(struct r300_fragment_program *fp,
- int op,
- GLuint dest,
- int mask,
- GLuint src0, GLuint src1, GLuint src2, int flags)
-{
- COMPILE_STATE;
- GLuint src[3] = { src0, src1, src2 };
- int hwdest;
- GLboolean emit_vop, emit_sop;
- int vop, sop, argc;
- int pos;
-
- vop = r300_fpop[op].v_op;
- sop = r300_fpop[op].s_op;
- argc = r300_fpop[op].argc;
-
- if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT &&
- REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) {
- if (mask & WRITEMASK_Z) {
- mask = WRITEMASK_W;
- } else {
- return;
- }
- }
-
- emit_vop = GL_FALSE;
- emit_sop = GL_FALSE;
- if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3)
- emit_vop = GL_TRUE;
- if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA)
- emit_sop = GL_TRUE;
-
- pos =
- find_and_prepare_slot(fp, emit_vop, emit_sop, argc, src, dest,
- mask);
- if (pos < 0)
- return;
-
- hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */
-
- if (flags & PFS_FLAG_SAT) {
- vop |= R300_FPI0_OUTC_SAT;
- sop |= R300_FPI2_OUTA_SAT;
- }
-
- /* Throw the pieces together and get FPI0/1 */
- if (emit_vop) {
- fp->alu.inst[pos].inst0 |= vop;
+ tgt = radeonAppendInstructions(t->Program, 1);
+ _mesa_copy_instructions(tgt, &inst, 1);
+
+ if (inst.Opcode != OPCODE_KIL &&
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
+ GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
+ int rcptemp = radeonFindFreeTemporary(t);
+ int pass, fail;
+
+ tgt = radeonAppendInstructions(t->Program, 3);
+
+ tgt[0].Opcode = OPCODE_RCP;
+ tgt[0].DstReg.File = PROGRAM_TEMPORARY;
+ tgt[0].DstReg.Index = rcptemp;
+ tgt[0].DstReg.WriteMask = WRITEMASK_W;
+ tgt[0].SrcReg[0] = inst.SrcReg[0];
+ tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ tgt[1].Opcode = OPCODE_MAD;
+ tgt[1].DstReg = inst.DstReg;
+ tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
+ tgt[1].SrcReg[0] = inst.SrcReg[0];
+ tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
+ tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
+ tgt[1].SrcReg[1].Index = rcptemp;
+ tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
+ tgt[1].SrcReg[2].Index = inst.DstReg.Index;
+ if (depthmode == 0) /* GL_LUMINANCE */
+ tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ else if (depthmode == 2) /* GL_ALPHA */
+ tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
+
+ /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
+ * r < tex <=> -tex+r < 0
+ * r >= tex <=> not (-tex+r < 0 */
+ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
+ tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW;
+ else
+ tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW;
- fp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT;
+ tgt[2].Opcode = OPCODE_CMP;
+ tgt[2].DstReg = orig_inst->DstReg;
+ tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
+ tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
- if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
- if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
- fp->alu.inst[pos].inst1 |=
- (mask & WRITEMASK_XYZ) <<
- R300_FPI1_DSTC_OUTPUT_MASK_SHIFT;
- } else
- assert(0);
+ if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ pass = 1;
+ fail = 2;
} else {
- fp->alu.inst[pos].inst1 |=
- (mask & WRITEMASK_XYZ) <<
- R300_FPI1_DSTC_REG_MASK_SHIFT;
-
- cs->hwtemps[hwdest].vector_valid = pos + 1;
+ pass = 2;
+ fail = 1;
}
- }
- /* And now FPI2/3 */
- if (emit_sop) {
- fp->alu.inst[pos].inst2 |= sop;
-
- if (mask & WRITEMASK_W) {
- if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
- if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
- fp->alu.inst[pos].inst3 |=
- (hwdest << R300_FPI3_DSTA_SHIFT) |
- R300_FPI3_DSTA_OUTPUT;
- } else if (REG_GET_INDEX(dest) ==
- FRAG_RESULT_DEPR) {
- fp->alu.inst[pos].inst3 |=
- R300_FPI3_DSTA_DEPTH;
- } else
- assert(0);
- } else {
- fp->alu.inst[pos].inst3 |=
- (hwdest << R300_FPI3_DSTA_SHIFT) |
- R300_FPI3_DSTA_REG;
+ tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
+ tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
+ tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
+ } else if (destredirect) {
+ tgt = radeonAppendInstructions(t->Program, 1);
- cs->hwtemps[hwdest].scalar_valid = pos + 1;
- }
- }
+ tgt->Opcode = OPCODE_MOV;
+ tgt->DstReg = orig_inst->DstReg;
+ tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
+ tgt->SrcReg[0].Index = inst.DstReg.Index;
}
- return;
+ return GL_TRUE;
}
-#if 0
-static GLuint get_attrib(struct r300_fragment_program *fp, GLuint attr)
+
+static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp)
{
struct gl_fragment_program *mp = &fp->mesa_program;
- GLuint r = undef;
-
- if (!(mp->Base.InputsRead & (1 << attr))) {
- ERROR("Attribute %d was not provided!\n", attr);
- return undef;
- }
- REG_SET_TYPE(r, REG_TYPE_INPUT);
- REG_SET_INDEX(r, attr);
- REG_SET_VALID(r, GL_TRUE);
- return r;
+ /* Ask Mesa nicely to fill in ParameterValues for us */
+ if (mp->Base.Parameters)
+ _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters);
}
-#endif
-
-static GLfloat SinCosConsts[2][4] = {
- {
- 1.273239545, // 4/PI
- -0.405284735, // -4/(PI*PI)
- 3.141592654, // PI
- 0.2225 // weight
- },
- {
- 0.75,
- 0.0,
- 0.159154943, // 1/(2*PI)
- 6.283185307 // 2*PI
- }
-};
+
/**
- * Emit a LIT instruction.
- * \p flags may be PFS_FLAG_SAT
+ * Transform the program to support fragment.position.
*
- * Definition of LIT (from ARB_fragment_program):
- * tmp = VectorLoad(op0);
- * if (tmp.x < 0) tmp.x = 0;
- * if (tmp.y < 0) tmp.y = 0;
- * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
- * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
- * result.x = 1.0;
- * result.y = tmp.x;
- * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
- * result.w = 1.0;
+ * Introduce a small fragment at the start of the program that will be
+ * the only code that directly reads the FRAG_ATTRIB_WPOS input.
+ * All other code pieces that reference that input will be rewritten
+ * to read from a newly allocated temporary.
*
- * The longest path of computation is the one leading to result.z,
- * consisting of 5 operations. This implementation of LIT takes
- * 5 slots. So unless there's some special undocumented opcode,
- * this implementation is potentially optimal. Unfortunately,
- * emit_arith is a bit too conservative because it doesn't understand
- * partial writes to the vector component.
+ * \todo if/when r5xx supports the radeon_program architecture, this is a
+ * likely candidate for code sharing.
*/
-static const GLfloat LitConst[4] =
- { 127.999999, 127.999999, 127.999999, -127.999999 };
-
-static void emit_lit(struct r300_fragment_program *fp,
- GLuint dest, int mask, GLuint src, int flags)
+static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
{
- COMPILE_STATE;
- GLuint cnst;
- int needTemporary;
- GLuint temp;
-
- cnst = emit_const4fv(fp, LitConst);
-
- needTemporary = 0;
- if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) {
- needTemporary = 1;
- } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
- // LIT is typically followed by DP3/DP4, so there's no point
- // in creating special code for this case
- needTemporary = 1;
- }
-
- if (needTemporary) {
- temp = keep(get_temp_reg(fp));
- } else {
- temp = keep(dest);
- }
-
- // Note: The order of emit_arith inside the slots is relevant,
- // because emit_arith only looks at scalar vs. vector when resolving
- // dependencies, and it does not consider individual vector components,
- // so swizzling between the two parts can create fake dependencies.
-
- // First slot
- emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_XY,
- keep(src), pfs_zero, undef, 0);
- emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0);
-
- // Second slot
- emit_arith(fp, PFS_OP_MIN, temp, WRITEMASK_Z,
- swizzle(temp, W, W, W, W), cnst, undef, 0);
- emit_arith(fp, PFS_OP_LG2, temp, WRITEMASK_W,
- swizzle(temp, Y, Y, Y, Y), undef, undef, 0);
-
- // Third slot
- // If desired, we saturate the y result here.
- // This does not affect the use as a condition variable in the CMP later
- emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W,
- temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0);
- emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_Y,
- swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags);
-
- // Fourth slot
- emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_X,
- pfs_one, pfs_one, pfs_zero, 0);
- emit_arith(fp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0);
-
- // Fifth slot
- emit_arith(fp, PFS_OP_CMP, temp, WRITEMASK_Z,
- pfs_zero, swizzle(temp, W, W, W, W),
- negate(swizzle(temp, Y, Y, Y, Y)), flags);
- emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one,
- pfs_zero, 0);
-
- if (needTemporary) {
- emit_arith(fp, PFS_OP_MAD, dest, mask,
- temp, pfs_one, pfs_zero, flags);
- free_temp(fp, temp);
- } else {
- // Decrease refcount of the destination
- t_hw_dst(fp, dest, GL_FALSE, cs->nrslots);
- }
-}
-
-static GLboolean parse_program(struct r300_fragment_program *fp)
-{
- struct gl_fragment_program *mp = &fp->mesa_program;
- const struct prog_instruction *inst = mp->Base.Instructions;
- struct prog_instruction *fpi;
- GLuint src[3], dest, temp[2];
- int flags, mask = 0;
- int const_sin[2];
-
- if (!inst || inst[0].Opcode == OPCODE_END) {
- ERROR("empty program?\n");
- return GL_FALSE;
- }
+ GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead;
- for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
- if (fpi->SaturateMode == SATURATE_ZERO_ONE)
- flags = PFS_FLAG_SAT;
- else
- flags = 0;
-
- if (fpi->Opcode != OPCODE_KIL) {
- dest = t_dst(fp, fpi->DstReg);
- mask = fpi->DstReg.WriteMask;
- }
-
- switch (fpi->Opcode) {
- case OPCODE_ABS:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- emit_arith(fp, PFS_OP_MAD, dest, mask,
- absolute(src[0]), pfs_one, pfs_zero, flags);
- break;
- case OPCODE_ADD:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- emit_arith(fp, PFS_OP_MAD, dest, mask,
- src[0], pfs_one, src[1], flags);
- break;
- case OPCODE_CMP:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- src[2] = t_src(fp, fpi->SrcReg[2]);
- /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c
- * r300 - if src2.c < 0.0 ? src1.c : src0.c
- */
- emit_arith(fp, PFS_OP_CMP, dest, mask,
- src[2], src[1], src[0], flags);
- break;
- case OPCODE_COS:
- /*
- * cos using a parabola (see SIN):
- * cos(x):
- * x = (x/(2*PI))+0.75
- * x = frac(x)
- * x = (x*2*PI)-PI
- * result = sin(x)
- */
- temp[0] = get_temp_reg(fp);
- const_sin[0] = emit_const4fv(fp, SinCosConsts[0]);
- const_sin[1] = emit_const4fv(fp, SinCosConsts[1]);
- src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
-
- /* add 0.5*PI and do range reduction */
-
- emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
- swizzle(src[0], X, X, X, X),
- swizzle(const_sin[1], Z, Z, Z, Z),
- swizzle(const_sin[1], X, X, X, X), 0);
-
- emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X,
- swizzle(temp[0], X, X, X, X),
- undef, undef, 0);
-
- emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI
- negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI
- 0);
-
- /* SIN */
-
- emit_arith(fp, PFS_OP_MAD, temp[0],
- WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
- Z, Z, Z,
- Z),
- const_sin[0], pfs_zero, 0);
-
- emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
- swizzle(temp[0], Y, Y, Y, Y),
- absolute(swizzle(temp[0], Z, Z, Z, Z)),
- swizzle(temp[0], X, X, X, X), 0);
-
- emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y,
- swizzle(temp[0], X, X, X, X),
- absolute(swizzle(temp[0], X, X, X, X)),
- negate(swizzle(temp[0], X, X, X, X)), 0);
-
- emit_arith(fp, PFS_OP_MAD, dest, mask,
- swizzle(temp[0], Y, Y, Y, Y),
- swizzle(const_sin[0], W, W, W, W),
- swizzle(temp[0], X, X, X, X), flags);
-
- free_temp(fp, temp[0]);
- break;
- case OPCODE_DP3:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- emit_arith(fp, PFS_OP_DP3, dest, mask,
- src[0], src[1], undef, flags);
- break;
- case OPCODE_DP4:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- emit_arith(fp, PFS_OP_DP4, dest, mask,
- src[0], src[1], undef, flags);
- break;
- case OPCODE_DPH:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- /* src0.xyz1 -> temp
- * DP4 dest, temp, src1
- */
-#if 0
- temp[0] = get_temp_reg(fp);
- src[0].s_swz = SWIZZLE_ONE;
- emit_arith(fp, PFS_OP_MAD, temp[0], mask,
- src[0], pfs_one, pfs_zero, 0);
- emit_arith(fp, PFS_OP_DP4, dest, mask,
- temp[0], src[1], undef, flags);
- free_temp(fp, temp[0]);
-#else
- emit_arith(fp, PFS_OP_DP4, dest, mask,
- swizzle(src[0], X, Y, Z, ONE), src[1],
- undef, flags);
-#endif
- break;
- case OPCODE_DST:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- /* dest.y = src0.y * src1.y */
- if (mask & WRITEMASK_Y)
- emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Y,
- keep(src[0]), keep(src[1]),
- pfs_zero, flags);
- /* dest.z = src0.z */
- if (mask & WRITEMASK_Z)
- emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Z,
- src[0], pfs_one, pfs_zero, flags);
- /* result.x = 1.0
- * result.w = src1.w */
- if (mask & WRITEMASK_XW) {
- REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */
- emit_arith(fp, PFS_OP_MAD, dest,
- mask & WRITEMASK_XW,
- src[1], pfs_one, pfs_zero, flags);
- }
- break;
- case OPCODE_EX2:
- src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
- emit_arith(fp, PFS_OP_EX2, dest, mask,
- src[0], undef, undef, flags);
- break;
- case OPCODE_FLR:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- temp[0] = get_temp_reg(fp);
- /* FRC temp, src0
- * MAD dest, src0, 1.0, -temp
- */
- emit_arith(fp, PFS_OP_FRC, temp[0], mask,
- keep(src[0]), undef, undef, 0);
- emit_arith(fp, PFS_OP_MAD, dest, mask,
- src[0], pfs_one, negate(temp[0]), flags);
- free_temp(fp, temp[0]);
- break;
- case OPCODE_FRC:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- emit_arith(fp, PFS_OP_FRC, dest, mask,
- src[0], undef, undef, flags);
- break;
- case OPCODE_KIL:
- emit_tex(fp, fpi, R300_FPITX_OP_KIL);
- break;
- case OPCODE_LG2:
- src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
- emit_arith(fp, PFS_OP_LG2, dest, mask,
- src[0], undef, undef, flags);
- break;
- case OPCODE_LIT:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- emit_lit(fp, dest, mask, src[0], flags);
- break;
- case OPCODE_LRP:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- src[2] = t_src(fp, fpi->SrcReg[2]);
- /* result = tmp0tmp1 + (1 - tmp0)tmp2
- * = tmp0tmp1 + tmp2 + (-tmp0)tmp2
- * MAD temp, -tmp0, tmp2, tmp2
- * MAD result, tmp0, tmp1, temp
- */
- temp[0] = get_temp_reg(fp);
- emit_arith(fp, PFS_OP_MAD, temp[0], mask,
- negate(keep(src[0])), keep(src[2]), src[2],
- 0);
- emit_arith(fp, PFS_OP_MAD, dest, mask,
- src[0], src[1], temp[0], flags);
- free_temp(fp, temp[0]);
- break;
- case OPCODE_MAD:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- src[2] = t_src(fp, fpi->SrcReg[2]);
- emit_arith(fp, PFS_OP_MAD, dest, mask,
- src[0], src[1], src[2], flags);
- break;
- case OPCODE_MAX:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- emit_arith(fp, PFS_OP_MAX, dest, mask,
- src[0], src[1], undef, flags);
- break;
- case OPCODE_MIN:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- emit_arith(fp, PFS_OP_MIN, dest, mask,
- src[0], src[1], undef, flags);
- break;
- case OPCODE_MOV:
- case OPCODE_SWZ:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- emit_arith(fp, PFS_OP_MAD, dest, mask,
- src[0], pfs_one, pfs_zero, flags);
- break;
- case OPCODE_MUL:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- emit_arith(fp, PFS_OP_MAD, dest, mask,
- src[0], src[1], pfs_zero, flags);
- break;
- case OPCODE_POW:
- src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
- src[1] = t_scalar_src(fp, fpi->SrcReg[1]);
- temp[0] = get_temp_reg(fp);
- emit_arith(fp, PFS_OP_LG2, temp[0], WRITEMASK_W,
- src[0], undef, undef, 0);
- emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W,
- temp[0], src[1], pfs_zero, 0);
- emit_arith(fp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask,
- temp[0], undef, undef, 0);
- free_temp(fp, temp[0]);
- break;
- case OPCODE_RCP:
- src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
- emit_arith(fp, PFS_OP_RCP, dest, mask,
- src[0], undef, undef, flags);
- break;
- case OPCODE_RSQ:
- src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
- emit_arith(fp, PFS_OP_RSQ, dest, mask,
- absolute(src[0]), pfs_zero, pfs_zero, flags);
- break;
- case OPCODE_SCS:
- /*
- * scs using a parabola :
- * scs(x):
- * result.x = sin(-abs(x)+0.5*PI) (cos)
- * result.y = sin(x) (sin)
- *
- */
- temp[0] = get_temp_reg(fp);
- temp[1] = get_temp_reg(fp);
- const_sin[0] = emit_const4fv(fp, SinCosConsts[0]);
- const_sin[1] = emit_const4fv(fp, SinCosConsts[1]);
- src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
-
- /* x = -abs(x)+0.5*PI */
- emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI
- pfs_half,
- negate(abs
- (swizzle(keep(src[0]), X, X, X, X))),
- 0);
-
- /* C*x (sin) */
- emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W,
- swizzle(const_sin[0], Y, Y, Y, Y),
- swizzle(keep(src[0]), X, X, X, X),
- pfs_zero, 0);
-
- /* B*x, C*x (cos) */
- emit_arith(fp, PFS_OP_MAD, temp[0],
- WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
- Z, Z, Z,
- Z),
- const_sin[0], pfs_zero, 0);
-
- /* B*x (sin) */
- emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W,
- swizzle(const_sin[0], X, X, X, X),
- keep(src[0]), pfs_zero, 0);
-
- /* y = B*x + C*x*abs(x) (sin) */
- emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_Z,
- absolute(src[0]),
- swizzle(temp[0], W, W, W, W),
- swizzle(temp[1], W, W, W, W), 0);
-
- /* y = B*x + C*x*abs(x) (cos) */
- emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W,
- swizzle(temp[0], Y, Y, Y, Y),
- absolute(swizzle(temp[0], Z, Z, Z, Z)),
- swizzle(temp[0], X, X, X, X), 0);
-
- /* y*abs(y) - y (cos), y*abs(y) - y (sin) */
- emit_arith(fp, PFS_OP_MAD, temp[0],
- WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1],
- W, Z, Y,
- X),
- absolute(swizzle(temp[1], W, Z, Y, X)),
- negate(swizzle(temp[1], W, Z, Y, X)), 0);
-
- /* dest.xy = mad(temp.xy, P, temp2.wz) */
- emit_arith(fp, PFS_OP_MAD, dest,
- mask & (WRITEMASK_X | WRITEMASK_Y), temp[0],
- swizzle(const_sin[0], W, W, W, W),
- swizzle(temp[1], W, Z, Y, X), flags);
-
- free_temp(fp, temp[0]);
- free_temp(fp, temp[1]);
- break;
- case OPCODE_SGE:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- temp[0] = get_temp_reg(fp);
- /* temp = src0 - src1
- * dest.c = (temp.c < 0.0) ? 0 : 1
- */
- emit_arith(fp, PFS_OP_MAD, temp[0], mask,
- src[0], pfs_one, negate(src[1]), 0);
- emit_arith(fp, PFS_OP_CMP, dest, mask,
- pfs_one, pfs_zero, temp[0], 0);
- free_temp(fp, temp[0]);
- break;
- case OPCODE_SIN:
- /*
- * using a parabola:
- * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x)
- * extra precision is obtained by weighting against
- * itself squared.
- */
-
- temp[0] = get_temp_reg(fp);
- const_sin[0] = emit_const4fv(fp, SinCosConsts[0]);
- const_sin[1] = emit_const4fv(fp, SinCosConsts[1]);
- src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
-
- /* do range reduction */
-
- emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
- swizzle(keep(src[0]), X, X, X, X),
- swizzle(const_sin[1], Z, Z, Z, Z),
- pfs_half, 0);
-
- emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X,
- swizzle(temp[0], X, X, X, X),
- undef, undef, 0);
-
- emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI
- negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI
- 0);
-
- /* SIN */
-
- emit_arith(fp, PFS_OP_MAD, temp[0],
- WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
- Z, Z, Z,
- Z),
- const_sin[0], pfs_zero, 0);
-
- emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
- swizzle(temp[0], Y, Y, Y, Y),
- absolute(swizzle(temp[0], Z, Z, Z, Z)),
- swizzle(temp[0], X, X, X, X), 0);
-
- emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y,
- swizzle(temp[0], X, X, X, X),
- absolute(swizzle(temp[0], X, X, X, X)),
- negate(swizzle(temp[0], X, X, X, X)), 0);
-
- emit_arith(fp, PFS_OP_MAD, dest, mask,
- swizzle(temp[0], Y, Y, Y, Y),
- swizzle(const_sin[0], W, W, W, W),
- swizzle(temp[0], X, X, X, X), flags);
-
- free_temp(fp, temp[0]);
- break;
- case OPCODE_SLT:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- temp[0] = get_temp_reg(fp);
- /* temp = src0 - src1
- * dest.c = (temp.c < 0.0) ? 1 : 0
- */
- emit_arith(fp, PFS_OP_MAD, temp[0], mask,
- src[0], pfs_one, negate(src[1]), 0);
- emit_arith(fp, PFS_OP_CMP, dest, mask,
- pfs_zero, pfs_one, temp[0], 0);
- free_temp(fp, temp[0]);
- break;
- case OPCODE_SUB:
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- emit_arith(fp, PFS_OP_MAD, dest, mask,
- src[0], pfs_one, negate(src[1]), flags);
- break;
- case OPCODE_TEX:
- emit_tex(fp, fpi, R300_FPITX_OP_TEX);
- break;
- case OPCODE_TXB:
- emit_tex(fp, fpi, R300_FPITX_OP_TXB);
- break;
- case OPCODE_TXP:
- emit_tex(fp, fpi, R300_FPITX_OP_TXP);
- break;
- case OPCODE_XPD:{
- src[0] = t_src(fp, fpi->SrcReg[0]);
- src[1] = t_src(fp, fpi->SrcReg[1]);
- temp[0] = get_temp_reg(fp);
- /* temp = src0.zxy * src1.yzx */
- emit_arith(fp, PFS_OP_MAD, temp[0],
- WRITEMASK_XYZ, swizzle(keep(src[0]),
- Z, X, Y, W),
- swizzle(keep(src[1]), Y, Z, X, W),
- pfs_zero, 0);
- /* dest.xyz = src0.yzx * src1.zxy - temp
- * dest.w = undefined
- * */
- emit_arith(fp, PFS_OP_MAD, dest,
- mask & WRITEMASK_XYZ, swizzle(src[0],
- Y, Z,
- X, W),
- swizzle(src[1], Z, X, Y, W),
- negate(temp[0]), flags);
- /* cleanup */
- free_temp(fp, temp[0]);
- break;
- }
- default:
- ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
- break;
- }
-
- if (fp->error)
- return GL_FALSE;
-
- }
-
- return GL_TRUE;
-}
+ if (!(InputsRead & FRAG_BIT_WPOS))
+ return;
-static void insert_wpos(struct gl_program *prog)
-{
static gl_state_index tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
};
struct prog_instruction *fpi;
GLuint window_index;
int i = 0;
- GLuint tempregi = prog->NumTemporaries;
- /* should do something else if no temps left... */
- prog->NumTemporaries++;
+ GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
- fpi = _mesa_alloc_instructions(prog->NumInstructions + 3);
- _mesa_init_instructions(fpi, prog->NumInstructions + 3);
+ _mesa_insert_instructions(compiler->program, 0, 3);
+ fpi = compiler->program->Instructions;
/* perspective divide */
fpi[i].Opcode = OPCODE_RCP;
@@ -2027,7 +303,7 @@ static void insert_wpos(struct gl_program *prog)
i++;
/* viewport transformation */
- window_index = _mesa_add_state_reference(prog->Parameters, tokens);
+ window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
fpi[i].Opcode = OPCODE_MAD;
@@ -2052,242 +328,182 @@ static void insert_wpos(struct gl_program *prog)
MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
i++;
- _mesa_copy_instructions(&fpi[i], prog->Instructions,
- prog->NumInstructions);
-
- free(prog->Instructions);
-
- prog->Instructions = fpi;
-
- prog->NumInstructions += i;
- fpi = &prog->Instructions[prog->NumInstructions - 1];
-
- assert(fpi->Opcode == OPCODE_END);
-
- for (fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++) {
- for (i = 0; i < 3; i++)
- if (fpi->SrcReg[i].File == PROGRAM_INPUT &&
- fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS) {
- fpi->SrcReg[i].File = PROGRAM_TEMPORARY;
- fpi->SrcReg[i].Index = tempregi;
+ for (; i < compiler->program->NumInstructions; ++i) {
+ int reg;
+ for (reg = 0; reg < 3; reg++) {
+ if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
+ fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
+ fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
+ fpi[i].SrcReg[reg].Index = tempregi;
}
+ }
}
}
-/* - Init structures
- * - Determine what hwregs each input corresponds to
- */
-static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp)
+
+static void nqssadce_init(struct nqssadce_state* s)
{
- struct r300_pfs_compile_state *cs = NULL;
- struct gl_fragment_program *mp = &fp->mesa_program;
- struct prog_instruction *fpi;
- GLuint InputsRead = mp->Base.InputsRead;
- GLuint temps_used = 0; /* for fp->temps[] */
- int i, j;
-
- /* New compile, reset tracking data */
- fp->optimization =
- driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
- fp->translated = GL_FALSE;
- fp->error = GL_FALSE;
- fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
- fp->tex.length = 0;
- fp->cur_node = 0;
- fp->first_node_has_tex = 0;
- fp->const_nr = 0;
- fp->max_temp_idx = 0;
- fp->node[0].alu_end = -1;
- fp->node[0].tex_end = -1;
-
- _mesa_memset(cs, 0, sizeof(*fp->cs));
- for (i = 0; i < PFS_MAX_ALU_INST; i++) {
- for (j = 0; j < 3; j++) {
- cs->slot[i].vsrc[j] = SRC_CONST;
- cs->slot[i].ssrc[j] = SRC_CONST;
- }
- }
+ s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW;
+ s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W;
+}
- /* Work out what temps the Mesa inputs correspond to, this must match
- * what setup_rs_unit does, which shouldn't be a problem as rs_unit
- * configures itself based on the fragprog's InputsRead
- *
- * NOTE: this depends on get_hw_temp() allocating registers in order,
- * starting from register 0.
- */
- /* Texcoords come first */
- for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
- if (InputsRead & (FRAG_BIT_TEX0 << i)) {
- cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
- cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
- get_hw_temp(fp, 0);
- }
+static GLuint build_dtm(GLuint depthmode)
+{
+ switch(depthmode) {
+ default:
+ case GL_LUMINANCE: return 0;
+ case GL_INTENSITY: return 1;
+ case GL_ALPHA: return 2;
}
- InputsRead &= ~FRAG_BITS_TEX_ANY;
+}
- /* fragment position treated as a texcoord */
- if (InputsRead & FRAG_BIT_WPOS) {
- cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
- cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0);
- insert_wpos(&mp->Base);
- }
- InputsRead &= ~FRAG_BIT_WPOS;
+static GLuint build_func(GLuint comparefunc)
+{
+ return comparefunc - GL_NEVER;
+}
- /* Then primary colour */
- if (InputsRead & FRAG_BIT_COL0) {
- cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
- cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0);
- }
- InputsRead &= ~FRAG_BIT_COL0;
- /* Secondary color */
- if (InputsRead & FRAG_BIT_COL1) {
- cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
- cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0);
- }
- InputsRead &= ~FRAG_BIT_COL1;
-
- /* Anything else */
- if (InputsRead) {
- WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
- /* force read from hwreg 0 for now */
- for (i = 0; i < 32; i++)
- if (InputsRead & (1 << i))
- cs->inputs[i].reg = 0;
- }
+/**
+ * Collect all external state that is relevant for compiling the given
+ * fragment program.
+ */
+static void build_state(
+ r300ContextPtr r300,
+ struct r300_fragment_program *fp,
+ struct r300_fragment_program_external_state *state)
+{
+ int unit;
- /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
- * That way, we can free up the reg when it's no longer needed
- */
- if (!mp->Base.Instructions) {
- ERROR("No instructions found in program\n");
- return;
- }
+ _mesa_bzero(state, sizeof(*state));
- for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
- int idx;
-
- for (i = 0; i < 3; i++) {
- idx = fpi->SrcReg[i].Index;
- switch (fpi->SrcReg[i].File) {
- case PROGRAM_TEMPORARY:
- if (!(temps_used & (1 << idx))) {
- cs->temps[idx].reg = -1;
- cs->temps[idx].refcount = 1;
- temps_used |= (1 << idx);
- } else
- cs->temps[idx].refcount++;
- break;
- case PROGRAM_INPUT:
- cs->inputs[idx].refcount++;
- break;
- default:
- break;
- }
- }
+ for(unit = 0; unit < 16; ++unit) {
+ if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) {
+ struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
- idx = fpi->DstReg.Index;
- if (fpi->DstReg.File == PROGRAM_TEMPORARY) {
- if (!(temps_used & (1 << idx))) {
- cs->temps[idx].reg = -1;
- cs->temps[idx].refcount = 1;
- temps_used |= (1 << idx);
- } else
- cs->temps[idx].refcount++;
+ state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
+ state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
}
}
- cs->temp_in_use = temps_used;
}
-static void update_params(struct r300_fragment_program *fp)
-{
- struct gl_fragment_program *mp = &fp->mesa_program;
-
- /* Ask Mesa nicely to fill in ParameterValues for us */
- if (mp->Base.Parameters)
- _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
-}
void r300TranslateFragmentShader(r300ContextPtr r300,
struct r300_fragment_program *fp)
{
- struct r300_pfs_compile_state *cs = NULL;
+ struct r300_fragment_program_external_state state;
+
+ build_state(r300, fp, &state);
+ if (_mesa_memcmp(&fp->state, &state, sizeof(state))) {
+ /* TODO: cache compiled programs */
+ fp->translated = GL_FALSE;
+ _mesa_memcpy(&fp->state, &state, sizeof(state));
+ }
if (!fp->translated) {
+ struct r300_fragment_program_compiler compiler;
+
+ compiler.r300 = r300;
+ compiler.fp = fp;
+ compiler.code = &fp->code;
+ compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base);
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Fragment Program: Initial program:\n");
+ _mesa_print_program(compiler.program);
+ }
- init_program(r300, fp);
- cs = fp->cs;
+ insert_WPOS_trailer(&compiler);
+
+ struct radeon_program_transformation transformations[] = {
+ { &transform_TEX, &compiler },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformTrigSimple, 0 }
+ };
+ radeonLocalTransform(
+ r300->radeon.glCtx,
+ compiler.program,
+ 3, transformations);
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Fragment Program: After native rewrite:\n");
+ _mesa_print_program(compiler.program);
+ }
- if (parse_program(fp) == GL_FALSE) {
- dump_program(fp);
- return;
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r300FPIsNativeSwizzle,
+ .BuildSwizzle = &r300FPBuildSwizzle,
+ .RewriteDepthOut = GL_TRUE
+ };
+ radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Compiler: after NqSSA-DCE:\n");
+ _mesa_print_program(compiler.program);
}
- /* Finish off */
- fp->node[fp->cur_node].alu_end =
- cs->nrslots - fp->node[fp->cur_node].alu_offset - 1;
- if (fp->node[fp->cur_node].tex_end < 0)
- fp->node[fp->cur_node].tex_end = 0;
- fp->alu_offset = 0;
- fp->alu_end = cs->nrslots - 1;
- fp->tex_offset = 0;
- fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0;
- assert(fp->node[fp->cur_node].alu_end >= 0);
- assert(fp->alu_end >= 0);
-
- fp->translated = GL_TRUE;
- if (RADEON_DEBUG & DEBUG_PIXEL)
- dump_program(fp);
- r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
+ if (!r300FragmentProgramEmit(&compiler))
+ fp->error = GL_TRUE;
+
+ /* Subtle: Rescue any parameters that have been added during transformations */
+ _mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
+ fp->mesa_program.Base.Parameters = compiler.program->Parameters;
+ compiler.program->Parameters = 0;
+
+ _mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL);
+
+ if (!fp->error)
+ fp->translated = GL_TRUE;
+ if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
+ r300FragmentProgramDump(fp, &fp->code);
+ r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
}
- update_params(fp);
+ update_params(r300, fp);
}
/* just some random things... */
-static void dump_program(struct r300_fragment_program *fp)
+void r300FragmentProgramDump(
+ struct r300_fragment_program *fp,
+ struct r300_fragment_program_code *code)
{
int n, i, j;
static int pc = 0;
fprintf(stderr, "pc=%d*************************************\n", pc++);
- fprintf(stderr, "Mesa program:\n");
- fprintf(stderr, "-------------\n");
- _mesa_print_program(&fp->mesa_program.Base);
- fflush(stdout);
-
fprintf(stderr, "Hardware program\n");
fprintf(stderr, "----------------\n");
- for (n = 0; n < (fp->cur_node + 1); n++) {
+ for (n = 0; n < (code->cur_node + 1); n++) {
fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
- "alu_end: %d, tex_end: %d\n", n,
- fp->node[n].alu_offset,
- fp->node[n].tex_offset,
- fp->node[n].alu_end, fp->node[n].tex_end);
+ "alu_end: %d, tex_end: %d, flags: %08x\n", n,
+ code->node[n].alu_offset,
+ code->node[n].tex_offset,
+ code->node[n].alu_end, code->node[n].tex_end,
+ code->node[n].flags);
- if (fp->tex.length) {
+ if (n > 0 || code->first_node_has_tex) {
fprintf(stderr, " TEX:\n");
- for (i = fp->node[n].tex_offset;
- i <= fp->node[n].tex_offset + fp->node[n].tex_end;
+ for (i = code->node[n].tex_offset;
+ i <= code->node[n].tex_offset + code->node[n].tex_end;
++i) {
const char *instr;
- switch ((fp->tex.
- inst[i] >> R300_FPITX_OPCODE_SHIFT) &
+ switch ((code->tex.
+ inst[i] >> R300_TEX_INST_SHIFT) &
15) {
- case R300_FPITX_OP_TEX:
+ case R300_TEX_OP_LD:
instr = "TEX";
break;
- case R300_FPITX_OP_KIL:
+ case R300_TEX_OP_KIL:
instr = "KIL";
break;
- case R300_FPITX_OP_TXP:
+ case R300_TEX_OP_TXP:
instr = "TXP";
break;
- case R300_FPITX_OP_TXB:
+ case R300_TEX_OP_TXB:
instr = "TXB";
break;
default:
@@ -2297,22 +513,20 @@ static void dump_program(struct r300_fragment_program *fp)
fprintf(stderr,
" %s t%i, %c%i, texture[%i] (%08x)\n",
instr,
- (fp->tex.
- inst[i] >> R300_FPITX_DST_SHIFT) & 31,
- (fp->tex.
- inst[i] & R300_FPITX_SRC_CONST) ? 'c' :
+ (code->tex.
+ inst[i] >> R300_DST_ADDR_SHIFT) & 31,
't',
- (fp->tex.
- inst[i] >> R300_FPITX_SRC_SHIFT) & 31,
- (fp->tex.
- inst[i] & R300_FPITX_IMAGE_MASK) >>
- R300_FPITX_IMAGE_SHIFT,
- fp->tex.inst[i]);
+ (code->tex.
+ inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
+ (code->tex.
+ inst[i] & R300_TEX_ID_MASK) >>
+ R300_TEX_ID_SHIFT,
+ code->tex.inst[i]);
}
}
- for (i = fp->node[n].alu_offset;
- i <= fp->node[n].alu_offset + fp->node[n].alu_end; ++i) {
+ for (i = code->node[n].alu_offset;
+ i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) {
char srcc[3][10], dstc[20];
char srca[3][10], dsta[20];
char argc[3][20];
@@ -2320,8 +534,8 @@ static void dump_program(struct r300_fragment_program *fp)
char flags[5], tmp[10];
for (j = 0; j < 3; ++j) {
- int regc = fp->alu.inst[i].inst1 >> (j * 6);
- int rega = fp->alu.inst[i].inst3 >> (j * 6);
+ int regc = code->alu.inst[i].inst1 >> (j * 6);
+ int rega = code->alu.inst[i].inst3 >> (j * 6);
sprintf(srcc[j], "%c%i",
(regc & 32) ? 'c' : 't', regc & 31);
@@ -2331,46 +545,46 @@ static void dump_program(struct r300_fragment_program *fp)
dstc[0] = 0;
sprintf(flags, "%s%s%s",
- (fp->alu.inst[i].
- inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "",
- (fp->alu.inst[i].
- inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "",
- (fp->alu.inst[i].
- inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : "");
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_REG_X) ? "x" : "",
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "",
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_REG_Z) ? "z" : "");
if (flags[0] != 0) {
sprintf(dstc, "t%i.%s ",
- (fp->alu.inst[i].
- inst1 >> R300_FPI1_DSTC_SHIFT) & 31,
+ (code->alu.inst[i].
+ inst1 >> R300_ALU_DSTC_SHIFT) & 31,
flags);
}
sprintf(flags, "%s%s%s",
- (fp->alu.inst[i].
- inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "",
- (fp->alu.inst[i].
- inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "",
- (fp->alu.inst[i].
- inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : "");
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
if (flags[0] != 0) {
sprintf(tmp, "o%i.%s",
- (fp->alu.inst[i].
- inst1 >> R300_FPI1_DSTC_SHIFT) & 31,
+ (code->alu.inst[i].
+ inst1 >> R300_ALU_DSTC_SHIFT) & 31,
flags);
strcat(dstc, tmp);
}
dsta[0] = 0;
- if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) {
+ if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) {
sprintf(dsta, "t%i.w ",
- (fp->alu.inst[i].
- inst3 >> R300_FPI3_DSTA_SHIFT) & 31);
+ (code->alu.inst[i].
+ inst3 >> R300_ALU_DSTA_SHIFT) & 31);
}
- if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) {
+ if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) {
sprintf(tmp, "o%i.w ",
- (fp->alu.inst[i].
- inst3 >> R300_FPI3_DSTA_SHIFT) & 31);
+ (code->alu.inst[i].
+ inst3 >> R300_ALU_DSTA_SHIFT) & 31);
strcat(dsta, tmp);
}
- if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) {
+ if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) {
strcat(dsta, "Z");
}
@@ -2378,31 +592,31 @@ static void dump_program(struct r300_fragment_program *fp)
"%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
" w: %3s %3s %3s -> %-20s (%08x)\n", i,
srcc[0], srcc[1], srcc[2], dstc,
- fp->alu.inst[i].inst1, srca[0], srca[1],
- srca[2], dsta, fp->alu.inst[i].inst3);
+ code->alu.inst[i].inst1, srca[0], srca[1],
+ srca[2], dsta, code->alu.inst[i].inst3);
for (j = 0; j < 3; ++j) {
- int regc = fp->alu.inst[i].inst0 >> (j * 7);
- int rega = fp->alu.inst[i].inst2 >> (j * 7);
+ int regc = code->alu.inst[i].inst0 >> (j * 7);
+ int rega = code->alu.inst[i].inst2 >> (j * 7);
int d;
char buf[20];
d = regc & 31;
if (d < 12) {
switch (d % 4) {
- case R300_FPI0_ARGC_SRC0C_XYZ:
+ case R300_ALU_ARGC_SRC0C_XYZ:
sprintf(buf, "%s.xyz",
srcc[d / 4]);
break;
- case R300_FPI0_ARGC_SRC0C_XXX:
+ case R300_ALU_ARGC_SRC0C_XXX:
sprintf(buf, "%s.xxx",
srcc[d / 4]);
break;
- case R300_FPI0_ARGC_SRC0C_YYY:
+ case R300_ALU_ARGC_SRC0C_YYY:
sprintf(buf, "%s.yyy",
srcc[d / 4]);
break;
- case R300_FPI0_ARGC_SRC0C_ZZZ:
+ case R300_ALU_ARGC_SRC0C_ZZZ:
sprintf(buf, "%s.zzz",
srcc[d / 4]);
break;
@@ -2465,8 +679,8 @@ static void dump_program(struct r300_fragment_program *fp)
fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
" w: %8s %8s %8s op: %08x\n",
argc[0], argc[1], argc[2],
- fp->alu.inst[i].inst0, arga[0], arga[1],
- arga[2], fp->alu.inst[i].inst2);
+ code->alu.inst[i].inst0, arga[0], arga[1],
+ arga[2], code->alu.inst[i].inst2);
}
}
}
diff --git a/r300/r300_fragprog.h b/r300/r300_fragprog.h
index 72fca77..b3a3cd2 100644
--- a/r300/r300_fragprog.h
+++ b/r300/r300_fragprog.h
@@ -40,65 +40,93 @@
#include "shader/prog_instruction.h"
#include "r300_context.h"
-
-typedef struct r300_fragment_program_swizzle {
- GLuint length;
- GLuint src[4];
- GLuint inst[8];
-} r300_fragment_program_swizzle_t;
-
-/* supported hw opcodes */
-#define PFS_OP_MAD 0
-#define PFS_OP_DP3 1
-#define PFS_OP_DP4 2
-#define PFS_OP_MIN 3
-#define PFS_OP_MAX 4
-#define PFS_OP_CMP 5
-#define PFS_OP_FRC 6
-#define PFS_OP_EX2 7
-#define PFS_OP_LG2 8
-#define PFS_OP_RCP 9
-#define PFS_OP_RSQ 10
-#define PFS_OP_REPL_ALPHA 11
-#define PFS_OP_CMPH 12
-#define MAX_PFS_OP 12
-
-#define PFS_FLAG_SAT (1 << 0)
-#define PFS_FLAG_ABS (1 << 1)
-
-#define ARG_NEG (1 << 5)
-#define ARG_ABS (1 << 6)
-#define ARG_MASK (127 << 0)
-#define ARG_STRIDE 7
-#define SRC_CONST (1 << 5)
-#define SRC_MASK (63 << 0)
-#define SRC_STRIDE 6
-
-#define NOP_INST0 ( \
- (R300_FPI0_OUTC_MAD) | \
- (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \
- (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \
- (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT))
-#define NOP_INST1 ( \
- ((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \
- ((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \
- ((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT))
-#define NOP_INST2 ( \
- (R300_FPI2_OUTA_MAD) | \
- (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \
- (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \
- (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT))
-#define NOP_INST3 ( \
- ((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \
- ((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \
- ((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT))
+#include "radeon_program.h"
#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
+#if 1
+
+/**
+ * Fragment program helper macros
+ */
+
+/* Produce unshifted source selectors */
+#define FP_TMP(idx) (idx)
+#define FP_CONST(idx) ((idx) | (1 << 5))
+
+/* Produce source/dest selector dword */
+#define FP_SELC_MASK_NO 0
+#define FP_SELC_MASK_X 1
+#define FP_SELC_MASK_Y 2
+#define FP_SELC_MASK_XY 3
+#define FP_SELC_MASK_Z 4
+#define FP_SELC_MASK_XZ 5
+#define FP_SELC_MASK_YZ 6
+#define FP_SELC_MASK_XYZ 7
+
+#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \
+ (((destidx) << R300_ALU_DSTC_SHIFT) | \
+ (FP_SELC_MASK_##regmask << 23) | \
+ (FP_SELC_MASK_##outmask << 26) | \
+ ((src0) << R300_ALU_SRC0C_SHIFT) | \
+ ((src1) << R300_ALU_SRC1C_SHIFT) | \
+ ((src2) << R300_ALU_SRC2C_SHIFT))
+
+#define FP_SELA_MASK_NO 0
+#define FP_SELA_MASK_W 1
+
+#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \
+ (((destidx) << R300_ALU_DSTA_SHIFT) | \
+ (FP_SELA_MASK_##regmask << 23) | \
+ (FP_SELA_MASK_##outmask << 24) | \
+ ((src0) << R300_ALU_SRC0A_SHIFT) | \
+ ((src1) << R300_ALU_SRC1A_SHIFT) | \
+ ((src2) << R300_ALU_SRC2A_SHIFT))
+
+/* Produce unshifted argument selectors */
+#define FP_ARGC(source) R300_ALU_ARGC_##source
+#define FP_ARGA(source) R300_ALU_ARGA_##source
+#define FP_ABS(arg) ((arg) | (1 << 6))
+#define FP_NEG(arg) ((arg) ^ (1 << 5))
+
+/* Produce instruction dword */
+#define FP_INSTRC(opcode,arg0,arg1,arg2) \
+ (R300_ALU_OUTC_##opcode | \
+ ((arg0) << R300_ALU_ARG0C_SHIFT) | \
+ ((arg1) << R300_ALU_ARG1C_SHIFT) | \
+ ((arg2) << R300_ALU_ARG2C_SHIFT))
+
+#define FP_INSTRA(opcode,arg0,arg1,arg2) \
+ (R300_ALU_OUTA_##opcode | \
+ ((arg0) << R300_ALU_ARG0A_SHIFT) | \
+ ((arg1) << R300_ALU_ARG1A_SHIFT) | \
+ ((arg2) << R300_ALU_ARG2A_SHIFT))
+
+#endif
+
struct r300_fragment_program;
extern void r300TranslateFragmentShader(r300ContextPtr r300,
struct r300_fragment_program *fp);
+
+/**
+ * Used internally by the r300 fragment program code to store compile-time
+ * only data.
+ */
+struct r300_fragment_program_compiler {
+ r300ContextPtr r300;
+ struct r300_fragment_program *fp;
+ struct r300_fragment_program_code *code;
+ struct gl_program *program;
+};
+
+extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler);
+
+
+extern void r300FragmentProgramDump(
+ struct r300_fragment_program *fp,
+ struct r300_fragment_program_code *code);
+
#endif
diff --git a/r300/r300_fragprog_emit.c b/r300/r300_fragprog_emit.c
new file mode 100644
index 0000000..9f0b7e3
--- /dev/null
+++ b/r300/r300_fragprog_emit.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * Emit the r300_fragment_program_code that can be understood by the hardware.
+ * Input is a pre-transformed radeon_program.
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \todo FogOption
+ */
+
+#include "r300_fragprog.h"
+
+#include "radeon_program_pair.h"
+#include "r300_fragprog_swizzle.h"
+#include "r300_reg.h"
+
+
+#define PROG_CODE \
+ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
+ struct r300_fragment_program_code *code = c->code
+
+#define error(fmt, args...) do { \
+ fprintf(stderr, "%s::%s(): " fmt "\n", \
+ __FILE__, __FUNCTION__, ##args); \
+ } while(0)
+
+
+static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex)
+{
+ PROG_CODE;
+
+ for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
+ if (code->constant[*hwindex].File == file &&
+ code->constant[*hwindex].Index == index)
+ break;
+ }
+
+ if (*hwindex >= code->const_nr) {
+ if (*hwindex >= PFS_NUM_CONST_REGS) {
+ error("Out of hw constants!\n");
+ return GL_FALSE;
+ }
+
+ code->const_nr++;
+ code->constant[*hwindex].File = file;
+ code->constant[*hwindex].Index = index;
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Mark a temporary register as used.
+ */
+static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
+{
+ if (index > code->max_temp_idx)
+ code->max_temp_idx = index;
+}
+
+
+static GLuint translate_rgb_opcode(GLuint opcode)
+{
+ switch(opcode) {
+ case OPCODE_CMP: return R300_ALU_OUTC_CMP;
+ case OPCODE_DP3: return R300_ALU_OUTC_DP3;
+ case OPCODE_DP4: return R300_ALU_OUTC_DP4;
+ case OPCODE_FRC: return R300_ALU_OUTC_FRC;
+ default:
+ error("translate_rgb_opcode(%i): Unknown opcode", opcode);
+ /* fall through */
+ case OPCODE_NOP:
+ /* fall through */
+ case OPCODE_MAD: return R300_ALU_OUTC_MAD;
+ case OPCODE_MAX: return R300_ALU_OUTC_MAX;
+ case OPCODE_MIN: return R300_ALU_OUTC_MIN;
+ case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
+ }
+}
+
+static GLuint translate_alpha_opcode(GLuint opcode)
+{
+ switch(opcode) {
+ case OPCODE_CMP: return R300_ALU_OUTA_CMP;
+ case OPCODE_DP3: return R300_ALU_OUTA_DP4;
+ case OPCODE_DP4: return R300_ALU_OUTA_DP4;
+ case OPCODE_EX2: return R300_ALU_OUTA_EX2;
+ case OPCODE_FRC: return R300_ALU_OUTA_FRC;
+ case OPCODE_LG2: return R300_ALU_OUTA_LG2;
+ default:
+ error("translate_rgb_opcode(%i): Unknown opcode", opcode);
+ /* fall through */
+ case OPCODE_NOP:
+ /* fall through */
+ case OPCODE_MAD: return R300_ALU_OUTA_MAD;
+ case OPCODE_MAX: return R300_ALU_OUTA_MAX;
+ case OPCODE_MIN: return R300_ALU_OUTA_MIN;
+ case OPCODE_RCP: return R300_ALU_OUTA_RCP;
+ case OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
+ }
+}
+
+/**
+ * Emit one paired ALU instruction.
+ */
+static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
+{
+ PROG_CODE;
+
+ if (code->alu.length >= PFS_MAX_ALU_INST) {
+ error("Too many ALU instructions");
+ return GL_FALSE;
+ }
+
+ int ip = code->alu.length++;
+ int j;
+ code->node[code->cur_node].alu_end++;
+
+ code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode);
+ code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode);
+
+ for(j = 0; j < 3; ++j) {
+ GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
+ if (!inst->RGB.Src[j].Constant)
+ use_temporary(code, inst->RGB.Src[j].Index);
+ code->alu.inst[ip].inst1 |= src << (6*j);
+
+ src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
+ if (!inst->Alpha.Src[j].Constant)
+ use_temporary(code, inst->Alpha.Src[j].Index);
+ code->alu.inst[ip].inst3 |= src << (6*j);
+
+ GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+ arg |= inst->RGB.Arg[j].Abs << 6;
+ arg |= inst->RGB.Arg[j].Negate << 5;
+ code->alu.inst[ip].inst0 |= arg << (7*j);
+
+ arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
+ arg |= inst->Alpha.Arg[j].Abs << 6;
+ arg |= inst->Alpha.Arg[j].Negate << 5;
+ code->alu.inst[ip].inst2 |= arg << (7*j);
+ }
+
+ if (inst->RGB.Saturate)
+ code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP;
+ if (inst->Alpha.Saturate)
+ code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP;
+
+ if (inst->RGB.WriteMask) {
+ use_temporary(code, inst->RGB.DestIndex);
+ code->alu.inst[ip].inst1 |=
+ (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
+ (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
+ }
+ if (inst->RGB.OutputWriteMask) {
+ code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
+ code->node[code->cur_node].flags |= R300_RGBA_OUT;
+ }
+
+ if (inst->Alpha.WriteMask) {
+ use_temporary(code, inst->Alpha.DestIndex);
+ code->alu.inst[ip].inst3 |=
+ (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
+ R300_ALU_DSTA_REG;
+ }
+ if (inst->Alpha.OutputWriteMask) {
+ code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT;
+ code->node[code->cur_node].flags |= R300_RGBA_OUT;
+ }
+ if (inst->Alpha.DepthWriteMask) {
+ code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH;
+ code->node[code->cur_node].flags |= R300_W_OUT;
+ c->fp->WritesDepth = GL_TRUE;
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Finish the current node without advancing to the next one.
+ */
+static GLboolean finish_node(struct r300_fragment_program_compiler *c)
+{
+ struct r300_fragment_program_code *code = c->code;
+ struct r300_fragment_program_node *node = &code->node[code->cur_node];
+
+ if (node->alu_end < 0) {
+ /* Generate a single NOP for this node */
+ struct radeon_pair_instruction inst;
+ _mesa_bzero(&inst, sizeof(inst));
+ if (!emit_alu(c, &inst))
+ return GL_FALSE;
+ }
+
+ if (node->tex_end < 0) {
+ if (code->cur_node == 0) {
+ node->tex_end = 0;
+ } else {
+ error("Node %i has no TEX instructions", code->cur_node);
+ return GL_FALSE;
+ }
+ } else {
+ if (code->cur_node == 0)
+ code->first_node_has_tex = 1;
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Begin a block of texture instructions.
+ * Create the necessary indirection.
+ */
+static GLboolean begin_tex(void* data)
+{
+ PROG_CODE;
+
+ if (code->cur_node == 0) {
+ if (code->node[0].alu_end < 0 &&
+ code->node[0].tex_end < 0)
+ return GL_TRUE;
+ }
+
+ if (code->cur_node == 3) {
+ error("Too many texture indirections");
+ return GL_FALSE;
+ }
+
+ if (!finish_node(c))
+ return GL_FALSE;
+
+ struct r300_fragment_program_node *node = &code->node[++code->cur_node];
+ node->alu_offset = code->alu.length;
+ node->alu_end = -1;
+ node->tex_offset = code->tex.length;
+ node->tex_end = -1;
+ return GL_TRUE;
+}
+
+
+static GLboolean emit_tex(void* data, struct prog_instruction* inst)
+{
+ PROG_CODE;
+
+ if (code->tex.length >= PFS_MAX_TEX_INST) {
+ error("Too many TEX instructions");
+ return GL_FALSE;
+ }
+
+ GLuint unit = inst->TexSrcUnit;
+ GLuint dest = inst->DstReg.Index;
+ GLuint opcode;
+
+ switch(inst->Opcode) {
+ case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+ case OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+ case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+ case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+ default:
+ error("Unknown texture opcode %i", inst->Opcode);
+ return GL_FALSE;
+ }
+
+ if (inst->Opcode == OPCODE_KIL) {
+ unit = 0;
+ dest = 0;
+ } else {
+ use_temporary(code, dest);
+ }
+
+ use_temporary(code, inst->SrcReg[0].Index);
+
+ code->node[code->cur_node].tex_end++;
+ code->tex.inst[code->tex.length++] =
+ (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
+ (dest << R300_DST_ADDR_SHIFT) |
+ (unit << R300_TEX_ID_SHIFT) |
+ (opcode << R300_TEX_INST_SHIFT);
+ return GL_TRUE;
+}
+
+
+static const struct radeon_pair_handler pair_handler = {
+ .EmitConst = &emit_const,
+ .EmitPaired = &emit_alu,
+ .EmitTex = &emit_tex,
+ .BeginTexBlock = &begin_tex,
+ .MaxHwTemps = PFS_NUM_TEMP_REGS
+};
+
+/**
+ * Final compilation step: Turn the intermediate radeon_program into
+ * machine-readable instructions.
+ */
+GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler)
+{
+ struct r300_fragment_program_code *code = compiler->code;
+
+ _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
+ code->node[0].alu_end = -1;
+ code->node[0].tex_end = -1;
+
+ if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
+ return GL_FALSE;
+
+ if (!finish_node(compiler))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
diff --git a/r300/r300_fragprog_swizzle.c b/r300/r300_fragprog_swizzle.c
new file mode 100644
index 0000000..a86d2bd
--- /dev/null
+++ b/r300/r300_fragprog_swizzle.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ * Utilities to deal with the somewhat odd restriction on R300 fragment
+ * program swizzles.
+ */
+
+#include "r300_fragprog_swizzle.h"
+
+#include "r300_reg.h"
+#include "radeon_nqssadce.h"
+
+#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO))
+
+struct swizzle_data {
+ GLuint hash; /**< swizzle value this matches */
+ GLuint base; /**< base value for hw swizzle */
+ GLuint stride; /**< difference in base between arg0/1/2 */
+};
+
+static const struct swizzle_data native_swizzles[] = {
+ {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4},
+ {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4},
+ {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4},
+ {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4},
+ {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1},
+ {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1},
+ {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
+ {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
+ {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
+ {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}
+};
+
+static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
+
+
+/**
+ * Find a native RGB swizzle that matches the given swizzle.
+ * Returns 0 if none found.
+ */
+static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle)
+{
+ int i, comp;
+
+ for(i = 0; i < num_native_swizzles; ++i) {
+ const struct swizzle_data* sd = &native_swizzles[i];
+ for(comp = 0; comp < 3; ++comp) {
+ GLuint swz = GET_SWZ(swizzle, comp);
+ if (swz == SWIZZLE_NIL)
+ continue;
+ if (swz != GET_SWZ(sd->hash, comp))
+ break;
+ }
+ if (comp == 3)
+ return sd;
+ }
+
+ return 0;
+}
+
+
+/**
+ * Check whether the given instruction supports the swizzle and negate
+ * combinations in the given source register.
+ */
+GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
+{
+ if (reg.Abs)
+ reg.NegateBase = 0;
+
+ if (opcode == OPCODE_KIL ||
+ opcode == OPCODE_TEX ||
+ opcode == OPCODE_TXB ||
+ opcode == OPCODE_TXP) {
+ int j;
+
+ if (reg.Abs || reg.NegateBase != (15*reg.NegateAbs))
+ return GL_FALSE;
+
+ for(j = 0; j < 4; ++j) {
+ GLuint swz = GET_SWZ(reg.Swizzle, j);
+ if (swz == SWIZZLE_NIL)
+ continue;
+ if (swz != j)
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+ }
+
+ GLuint relevant = 0;
+ int j;
+
+ for(j = 0; j < 3; ++j)
+ if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL)
+ relevant |= 1 << j;
+
+ if ((reg.NegateBase & relevant) && (reg.NegateBase & relevant) != relevant)
+ return GL_FALSE;
+
+ if (!lookup_native_swizzle(reg.Swizzle))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Generate MOV dst, src using only native swizzles.
+ */
+void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
+{
+ if (src.Abs)
+ src.NegateBase = 0;
+
+ while(dst.WriteMask) {
+ const struct swizzle_data *best_swizzle = 0;
+ GLuint best_matchcount = 0;
+ GLuint best_matchmask = 0;
+ GLboolean rgbnegate;
+ int i, comp;
+
+ for(i = 0; i < num_native_swizzles; ++i) {
+ const struct swizzle_data *sd = &native_swizzles[i];
+ GLuint matchcount = 0;
+ GLuint matchmask = 0;
+ for(comp = 0; comp < 3; ++comp) {
+ if (!GET_BIT(dst.WriteMask, comp))
+ continue;
+ GLuint swz = GET_SWZ(src.Swizzle, comp);
+ if (swz == SWIZZLE_NIL)
+ continue;
+ if (swz == GET_SWZ(sd->hash, comp)) {
+ matchcount++;
+ matchmask |= 1 << comp;
+ }
+ }
+ if (matchcount > best_matchcount) {
+ best_swizzle = sd;
+ best_matchcount = matchcount;
+ best_matchmask = matchmask;
+ if (matchmask == (dst.WriteMask & WRITEMASK_XYZ))
+ break;
+ }
+ }
+
+ if ((src.NegateBase & best_matchmask) != 0) {
+ best_matchmask &= src.NegateBase;
+ rgbnegate = !src.NegateAbs;
+ } else {
+ rgbnegate = src.NegateAbs;
+ }
+
+ struct prog_instruction *inst;
+
+ _mesa_insert_instructions(s->Program, s->IP, 1);
+ inst = s->Program->Instructions + s->IP++;
+ inst->Opcode = OPCODE_MOV;
+ inst->DstReg = dst;
+ inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
+ inst->SrcReg[0] = src;
+ /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
+
+ dst.WriteMask &= ~inst->DstReg.WriteMask;
+ }
+}
+
+
+/**
+ * Translate an RGB (XYZ) swizzle into the hardware code for the given
+ * instruction source.
+ */
+GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle)
+{
+ const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
+
+ if (!sd) {
+ _mesa_printf("Not a native swizzle: %08x\n", swizzle);
+ return 0;
+ }
+
+ return sd->base + src*sd->stride;
+}
+
+
+/**
+ * Translate an Alpha (W) swizzle into the hardware code for the given
+ * instruction source.
+ */
+GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle)
+{
+ if (swizzle < 3)
+ return swizzle + 3*src;
+
+ switch(swizzle) {
+ case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
+ case SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
+ case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+ default: return R300_ALU_ARGA_ONE;
+ }
+}
diff --git a/r300/r300_fragprog_swizzle.h b/r300/r300_fragprog_swizzle.h
new file mode 100644
index 0000000..3da99a9
--- /dev/null
+++ b/r300/r300_fragprog_swizzle.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_FRAGPROG_SWIZZLE_H_
+#define __R300_FRAGPROG_SWIZZLE_H_
+
+#include "glheader.h"
+#include "shader/prog_instruction.h"
+
+struct nqssadce_state;
+
+GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
+void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
+
+GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle);
+GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle);
+
+#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/r300/r300_ioctl.c b/r300/r300_ioctl.c
index ea94ce2..bd7f060 100644
--- a/r300/r300_ioctl.c
+++ b/r300/r300_ioctl.c
@@ -51,9 +51,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_ioctl.h"
#include "r300_cmdbuf.h"
#include "r300_state.h"
-#include "r300_program.h"
+#include "r300_vertprog.h"
#include "radeon_reg.h"
#include "r300_emit.h"
+#include "r300_fragprog.h"
#include "vblank.h"
@@ -106,19 +107,19 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
e32(cbpitch);
R300_STATECHANGE(r300, cmk);
- reg_start(R300_RB3D_COLORMASK, 0);
+ reg_start(RB3D_COLOR_CHANNEL_MASK, 0);
if (flags & CLEARBUFFER_COLOR) {
- e32((ctx->Color.ColorMask[BCOMP] ? R300_COLORMASK0_B : 0) |
- (ctx->Color.ColorMask[GCOMP] ? R300_COLORMASK0_G : 0) |
- (ctx->Color.ColorMask[RCOMP] ? R300_COLORMASK0_R : 0) |
- (ctx->Color.ColorMask[ACOMP] ? R300_COLORMASK0_A : 0));
+ e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
+ (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
+ (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
+ (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
} else {
e32(0x0);
}
R300_STATECHANGE(r300, zs);
- reg_start(R300_RB3D_ZSTENCIL_CNTL_0, 2);
+ reg_start(R300_ZB_CNTL, 2);
{
uint32_t t1, t2;
@@ -127,37 +128,28 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
t2 = 0x0;
if (flags & CLEARBUFFER_DEPTH) {
- t1 |= R300_RB3D_Z_WRITE_ONLY;
+ t1 |= R300_Z_ENABLE | R300_Z_WRITE_ENABLE;
t2 |=
- (R300_ZS_ALWAYS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT);
- } else {
- t1 |= R300_RB3D_Z_DISABLED_1; // disable
+ (R300_ZS_ALWAYS << R300_Z_FUNC_SHIFT);
}
if (flags & CLEARBUFFER_STENCIL) {
- t1 |= R300_RB3D_STENCIL_ENABLE;
+ t1 |= R300_STENCIL_ENABLE;
t2 |=
(R300_ZS_ALWAYS <<
- R300_RB3D_ZS1_FRONT_FUNC_SHIFT) |
+ R300_S_FRONT_FUNC_SHIFT) |
(R300_ZS_REPLACE <<
- R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) |
+ R300_S_FRONT_SFAIL_OP_SHIFT) |
(R300_ZS_REPLACE <<
- R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT) |
+ R300_S_FRONT_ZPASS_OP_SHIFT) |
(R300_ZS_REPLACE <<
- R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT) |
- (R300_ZS_ALWAYS <<
- R300_RB3D_ZS1_BACK_FUNC_SHIFT) |
- (R300_ZS_REPLACE <<
- R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) |
- (R300_ZS_REPLACE <<
- R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT) |
- (R300_ZS_REPLACE <<
- R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT);
+ R300_S_FRONT_ZFAIL_OP_SHIFT);
}
e32(t1);
e32(t2);
- e32(r300->state.stencil.clear);
+ e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
+ (ctx->Stencil.Clear & R300_STENCILREF_MASK));
}
cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__);
@@ -172,11 +164,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]);
cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
- reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
- e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
-
- reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
- e32(R300_RB3D_ZCACHE_UNKNOWN_03);
+ r300EmitCacheFlush(rmesa);
cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
}
@@ -190,10 +178,16 @@ static void r300EmitClearState(GLcontext * ctx)
int cmd_written = 0;
drm_radeon_cmd_header_t *cmd = NULL;
int has_tcl = 1;
+ int is_r500 = 0;
+ GLuint vap_cntl;
if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
has_tcl = 0;
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ is_r500 = 1;
+
+
/* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and
* R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are
* quite complex; see the functions in r300_emit.c.
@@ -203,46 +197,57 @@ static void r300EmitClearState(GLcontext * ctx)
* these registers, as well as the actual values used for rendering.
*/
R300_STATECHANGE(r300, vir[0]);
- reg_start(R300_VAP_INPUT_ROUTE_0_0, 0);
+ reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0);
if (!has_tcl)
- e32(0x22030003);
+ e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+ ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
else
- e32(0x21030003);
+ e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+ ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
/* disable fog */
R300_STATECHANGE(r300, fogs);
- reg_start(R300_RE_FOG_STATE, 0);
+ reg_start(R300_FG_FOG_BLEND, 0);
e32(0x0);
R300_STATECHANGE(r300, vir[1]);
- reg_start(R300_VAP_INPUT_ROUTE_1_0, 0);
- e32(0xF688F688);
+ reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0);
+ e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
+ (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
+ (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
+ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT))
+ << R300_SWIZZLE0_SHIFT) |
+ (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
+ (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
+ (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
+ ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT))
+ << R300_SWIZZLE1_SHIFT)));
/* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
R300_STATECHANGE(r300, vic);
- reg_start(R300_VAP_INPUT_CNTL_0, 1);
- e32(R300_INPUT_CNTL_0_COLOR);
+ reg_start(R300_VAP_VTX_STATE_CNTL, 1);
+ e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
- if (!has_tcl) {
- R300_STATECHANGE(r300, vte);
- /* comes from fglrx startup of clear */
- reg_start(R300_SE_VTE_CNTL, 1);
- e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
- R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
- R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
- R300_VPORT_Z_OFFSET_ENA);
- e32(0x8);
-
- reg_start(0x21dc, 0);
- e32(0xaaaaaaaa);
- }
+ R300_STATECHANGE(r300, vte);
+ /* comes from fglrx startup of clear */
+ reg_start(R300_SE_VTE_CNTL, 1);
+ e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
+ R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
+ R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
+ R300_VPORT_Z_OFFSET_ENA);
+ e32(0x8);
+
+ reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0);
+ e32(0xaaaaaaaa);
R300_STATECHANGE(r300, vof);
reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT);
- e32(0x0); /* no textures */
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
+ e32(0x0); /* no textures */
R300_STATECHANGE(r300, txe);
reg_start(R300_TX_ENABLE, 0);
@@ -258,7 +263,7 @@ static void r300EmitClearState(GLcontext * ctx)
efloat(0.0);
R300_STATECHANGE(r300, at);
- reg_start(R300_PP_ALPHA_TEST, 0);
+ reg_start(R300_FG_ALPHA_FUNC, 0);
e32(0x0);
R300_STATECHANGE(r300, bld);
@@ -266,78 +271,192 @@ static void r300EmitClearState(GLcontext * ctx)
e32(0x0);
e32(0x0);
- R300_STATECHANGE(r300, vap_clip_cntl);
- reg_start(R300_VAP_CLIP_CNTL, 0);
- e32(R300_221C_CLEAR);
+ if (has_tcl) {
+ R300_STATECHANGE(r300, vap_clip_cntl);
+ reg_start(R300_VAP_CLIP_CNTL, 0);
+ e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
+ }
R300_STATECHANGE(r300, ps);
- reg_start(R300_RE_POINTSIZE, 0);
+ reg_start(R300_GA_POINT_SIZE, 0);
e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
- R300_STATECHANGE(r300, ri);
- reg_start(R300_RS_INTERP_0, 8);
- for (i = 0; i < 8; ++i) {
- e32(R300_RS_INTERP_USED);
+ if (!is_r500) {
+ R300_STATECHANGE(r300, ri);
+ reg_start(R300_RS_IP_0, 7);
+ for (i = 0; i < 8; ++i) {
+ e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
+ }
+
+ R300_STATECHANGE(r300, rc);
+ /* The second constant is needed to get glxgears display anything .. */
+ reg_start(R300_RS_COUNT, 1);
+ e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+ e32(0x0);
+
+ R300_STATECHANGE(r300, rr);
+ reg_start(R300_RS_INST_0, 0);
+ e32(R300_RS_INST_COL_CN_WRITE);
+ } else {
+ R300_STATECHANGE(r300, ri);
+ reg_start(R500_RS_IP_0, 7);
+ for (i = 0; i < 8; ++i) {
+ e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+ }
+
+ R300_STATECHANGE(r300, rc);
+ /* The second constant is needed to get glxgears display anything .. */
+ reg_start(R300_RS_COUNT, 1);
+ e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+ e32(0x0);
+
+ R300_STATECHANGE(r300, rr);
+ reg_start(R500_RS_INST_0, 0);
+ e32(R500_RS_INST_COL_CN_WRITE);
+
}
- R300_STATECHANGE(r300, rc);
- /* The second constant is needed to get glxgears display anything .. */
- reg_start(R300_RS_CNTL_0, 1);
- e32((1 << R300_RS_CNTL_CI_CNT_SHIFT) | R300_RS_CNTL_0_UNKNOWN_18);
- e32(0x0);
+ if (!is_r500) {
+ R300_STATECHANGE(r300, fp);
+ reg_start(R300_US_CONFIG, 2);
+ e32(0x0);
+ e32(0x0);
+ e32(0x0);
+ reg_start(R300_US_CODE_ADDR_0, 3);
+ e32(0x0);
+ e32(0x0);
+ e32(0x0);
+ e32(R300_RGBA_OUT);
- R300_STATECHANGE(r300, rr);
- reg_start(R300_RS_ROUTE_0, 0);
- e32(R300_RS_ROUTE_0_COLOR);
+ R300_STATECHANGE(r300, fpi[0]);
+ R300_STATECHANGE(r300, fpi[1]);
+ R300_STATECHANGE(r300, fpi[2]);
+ R300_STATECHANGE(r300, fpi[3]);
- R300_STATECHANGE(r300, fp);
- reg_start(R300_PFS_CNTL_0, 2);
- e32(0x0);
- e32(0x0);
- e32(0x0);
- reg_start(R300_PFS_NODE_0, 3);
- e32(0x0);
- e32(0x0);
- e32(0x0);
- e32(R300_PFS_NODE_OUTPUT_COLOR);
+ reg_start(R300_US_ALU_RGB_INST_0, 0);
+ e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
- R300_STATECHANGE(r300, fpi[0]);
- R300_STATECHANGE(r300, fpi[1]);
- R300_STATECHANGE(r300, fpi[2]);
- R300_STATECHANGE(r300, fpi[3]);
+ reg_start(R300_US_ALU_RGB_ADDR_0, 0);
+ e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
- reg_start(R300_PFS_INSTR0_0, 0);
- e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
+ reg_start(R300_US_ALU_ALPHA_INST_0, 0);
+ e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
- reg_start(R300_PFS_INSTR1_0, 0);
- e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
+ reg_start(R300_US_ALU_ALPHA_ADDR_0, 0);
+ e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
+ } else {
+ R300_STATECHANGE(r300, fp);
+ reg_start(R500_US_CONFIG, 1);
+ e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+ e32(0x0);
+ reg_start(R500_US_CODE_ADDR, 2);
+ e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
+ e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
+ e32(R500_US_CODE_OFFSET_ADDR(0));
+
+ R300_STATECHANGE(r300, r500fp);
+ r500fp_start_fragment(0, 6);
+
+ e32(R500_INST_TYPE_OUT |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_LAST |
+ R500_INST_RGB_OMASK_R |
+ R500_INST_RGB_OMASK_G |
+ R500_INST_RGB_OMASK_B |
+ R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP);
+
+ e32(R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR1_CONST |
+ R500_RGB_ADDR2(0) |
+ R500_RGB_ADDR2_CONST);
+
+ e32(R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR1_CONST |
+ R500_ALPHA_ADDR2(0) |
+ R500_ALPHA_ADDR2_CONST);
+
+ e32(R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC0 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_B);
+
+ e32(R500_ALPHA_OP_CMP |
+ R500_ALPHA_SWIZ_A_A |
+ R500_ALPHA_SWIZ_B_A);
+
+ e32(R500_ALU_RGBA_OP_CMP |
+ R500_ALU_RGBA_R_SWIZ_0 |
+ R500_ALU_RGBA_G_SWIZ_0 |
+ R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_A_SWIZ_0);
+ }
- reg_start(R300_PFS_INSTR2_0, 0);
- e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
+ reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+ e32(0x00000000);
+ if (has_tcl) {
+ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (12 << R300_VF_MAX_VTX_NUM_SHIFT));
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
+ } else
+ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (5 << R300_VF_MAX_VTX_NUM_SHIFT));
+
+ if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
+ vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
+ vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
+ vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
+ vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+ else
+ vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
- reg_start(R300_PFS_INSTR3_0, 0);
- e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
+ R300_STATECHANGE(rmesa, vap_cntl);
+ reg_start(R300_VAP_CNTL, 0);
+ e32(vap_cntl);
if (has_tcl) {
R300_STATECHANGE(r300, pvs);
- reg_start(R300_VAP_PVS_CNTL_1, 2);
- e32((0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) |
- (0 << R300_PVS_CNTL_1_POS_END_SHIFT) |
- (1 << R300_PVS_CNTL_1_PROGRAM_END_SHIFT));
- e32(0x0);
- e32(1 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT);
+ reg_start(R300_VAP_PVS_CODE_CNTL_0, 2);
+
+ e32((0 << R300_PVS_FIRST_INST_SHIFT) |
+ (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (1 << R300_PVS_LAST_INST_SHIFT));
+ e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
+ (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
+ e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
R300_STATECHANGE(r300, vpi);
vsf_start_fragment(0x0, 8);
- e32(VP_OUT(ADD, OUT, 0, XYZW));
- e32(VP_IN(IN, 0));
- e32(VP_ZERO());
+
+ e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
+ e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+ e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
e32(0x0);
- e32(VP_OUT(ADD, OUT, 1, XYZW));
- e32(VP_IN(IN, 1));
- e32(VP_ZERO());
+ e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
+ e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+ e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
e32(0x0);
}
}
@@ -414,19 +533,22 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
void r300Flush(GLcontext * ctx)
{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
if (RADEON_DEBUG & DEBUG_IOCTL)
fprintf(stderr, "%s\n", __FUNCTION__);
- if (r300->cmdbuf.count_used > r300->cmdbuf.count_reemit)
- r300FlushCmdBuf(r300, __FUNCTION__);
+ if (rmesa->dma.flush)
+ rmesa->dma.flush( rmesa );
+
+ if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit)
+ r300FlushCmdBuf(rmesa, __FUNCTION__);
}
#ifdef USER_BUFFERS
#include "r300_mem.h"
-static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
+void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
{
struct r300_dma_buffer *dmabuf;
size = MAX2(size, RADEON_BUFFER_SIZE * 16);
@@ -438,9 +560,12 @@ static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
rmesa->dma.flush(rmesa);
}
- if (rmesa->dma.current.buf)
+ if (rmesa->dma.current.buf) {
+#ifdef USER_BUFFERS
+ r300_mem_use(rmesa, rmesa->dma.current.buf->id);
+#endif
r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
-
+ }
if (rmesa->dma.nr_released_bufs > 4)
r300FlushCmdBuf(rmesa, __FUNCTION__);
diff --git a/r300/r300_ioctl.h b/r300/r300_ioctl.h
index 7a19a2c..e1143fb 100644
--- a/r300/r300_ioctl.h
+++ b/r300/r300_ioctl.h
@@ -56,4 +56,5 @@ extern void r300AllocDmaRegion(r300ContextPtr rmesa,
extern void r300InitIoctlFuncs(struct dd_function_table *functions);
+extern void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size);
#endif /* __R300_IOCTL_H__ */
diff --git a/r300/r300_program.h b/r300/r300_program.h
deleted file mode 100644
index eddd783..0000000
--- a/r300/r300_program.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
-Copyright (C) 2004 Nicolai Haehnle. All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Nicolai Haehnle <prefect_@gmx.net>
- */
-
-#ifndef __R300_PROGRAM_H__
-#define __R300_PROGRAM_H__
-
-#include "r300_reg.h"
-
-/**
- * Vertex program helper macros
- */
-
-/* Produce out dword */
-#define VP_OUTCLASS_TMP R300_VPI_OUT_REG_CLASS_TEMPORARY
-#define VP_OUTCLASS_OUT R300_VPI_OUT_REG_CLASS_RESULT
-
-#define VP_OUTMASK_X R300_VPI_OUT_WRITE_X
-#define VP_OUTMASK_Y R300_VPI_OUT_WRITE_Y
-#define VP_OUTMASK_Z R300_VPI_OUT_WRITE_Z
-#define VP_OUTMASK_W R300_VPI_OUT_WRITE_W
-#define VP_OUTMASK_XY (VP_OUTMASK_X|VP_OUTMASK_Y)
-#define VP_OUTMASK_XZ (VP_OUTMASK_X|VP_OUTMASK_Z)
-#define VP_OUTMASK_XW (VP_OUTMASK_X|VP_OUTMASK_W)
-#define VP_OUTMASK_XYZ (VP_OUTMASK_XY|VP_OUTMASK_Z)
-#define VP_OUTMASK_XYW (VP_OUTMASK_XY|VP_OUTMASK_W)
-#define VP_OUTMASK_XZW (VP_OUTMASK_XZ|VP_OUTMASK_W)
-#define VP_OUTMASK_XYZW (VP_OUTMASK_XYZ|VP_OUTMASK_W)
-#define VP_OUTMASK_YZ (VP_OUTMASK_Y|VP_OUTMASK_Z)
-#define VP_OUTMASK_YW (VP_OUTMASK_Y|VP_OUTMASK_W)
-#define VP_OUTMASK_YZW (VP_OUTMASK_YZ|VP_OUTMASK_W)
-#define VP_OUTMASK_ZW (VP_OUTMASK_Z|VP_OUTMASK_W)
-
-#define VP_OUT(instr,outclass,outidx,outmask) \
- (R300_VPI_OUT_OP_##instr | \
- ((outidx) << R300_VPI_OUT_REG_INDEX_SHIFT) | \
- VP_OUTCLASS_##outclass | \
- VP_OUTMASK_##outmask)
-
-/* Produce in dword */
-#define VP_INCLASS_TMP R300_VPI_IN_REG_CLASS_TEMPORARY
-#define VP_INCLASS_IN R300_VPI_IN_REG_CLASS_ATTRIBUTE
-#define VP_INCLASS_CONST R300_VPI_IN_REG_CLASS_PARAMETER
-
-#define VP_IN(class,idx) \
- (((idx) << R300_VPI_IN_REG_INDEX_SHIFT) | \
- VP_INCLASS_##class | \
- (R300_VPI_IN_SELECT_X << R300_VPI_IN_X_SHIFT) | \
- (R300_VPI_IN_SELECT_Y << R300_VPI_IN_Y_SHIFT) | \
- (R300_VPI_IN_SELECT_Z << R300_VPI_IN_Z_SHIFT) | \
- (R300_VPI_IN_SELECT_W << R300_VPI_IN_W_SHIFT))
-#define VP_ZERO() \
- ((R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_X_SHIFT) | \
- (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_Y_SHIFT) | \
- (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_Z_SHIFT) | \
- (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_W_SHIFT))
-#define VP_ONE() \
- ((R300_VPI_IN_SELECT_ONE << R300_VPI_IN_X_SHIFT) | \
- (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_Y_SHIFT) | \
- (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_Z_SHIFT) | \
- (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_W_SHIFT))
-
-#define VP_NEG(in,comp) ((in) ^ (R300_VPI_IN_NEG_##comp))
-#define VP_NEGALL(in,comp) VP_NEG(VP_NEG(VP_NEG(VP_NEG((in),X),Y),Z),W)
-
-/**
- * Fragment program helper macros
- */
-
-/* Produce unshifted source selectors */
-#define FP_TMP(idx) (idx)
-#define FP_CONST(idx) ((idx) | (1 << 5))
-
-/* Produce source/dest selector dword */
-#define FP_SELC_MASK_NO 0
-#define FP_SELC_MASK_X 1
-#define FP_SELC_MASK_Y 2
-#define FP_SELC_MASK_XY 3
-#define FP_SELC_MASK_Z 4
-#define FP_SELC_MASK_XZ 5
-#define FP_SELC_MASK_YZ 6
-#define FP_SELC_MASK_XYZ 7
-
-#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \
- (((destidx) << R300_FPI1_DSTC_SHIFT) | \
- (FP_SELC_MASK_##regmask << 23) | \
- (FP_SELC_MASK_##outmask << 26) | \
- ((src0) << R300_FPI1_SRC0C_SHIFT) | \
- ((src1) << R300_FPI1_SRC1C_SHIFT) | \
- ((src2) << R300_FPI1_SRC2C_SHIFT))
-
-#define FP_SELA_MASK_NO 0
-#define FP_SELA_MASK_W 1
-
-#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \
- (((destidx) << R300_FPI3_DSTA_SHIFT) | \
- (FP_SELA_MASK_##regmask << 23) | \
- (FP_SELA_MASK_##outmask << 24) | \
- ((src0) << R300_FPI3_SRC0A_SHIFT) | \
- ((src1) << R300_FPI3_SRC1A_SHIFT) | \
- ((src2) << R300_FPI3_SRC2A_SHIFT))
-
-/* Produce unshifted argument selectors */
-#define FP_ARGC(source) R300_FPI0_ARGC_##source
-#define FP_ARGA(source) R300_FPI2_ARGA_##source
-#define FP_ABS(arg) ((arg) | (1 << 6))
-#define FP_NEG(arg) ((arg) ^ (1 << 5))
-
-/* Produce instruction dword */
-#define FP_INSTRC(opcode,arg0,arg1,arg2) \
- (R300_FPI0_OUTC_##opcode | \
- ((arg0) << R300_FPI0_ARG0C_SHIFT) | \
- ((arg1) << R300_FPI0_ARG1C_SHIFT) | \
- ((arg2) << R300_FPI0_ARG2C_SHIFT))
-
-#define FP_INSTRA(opcode,arg0,arg1,arg2) \
- (R300_FPI2_OUTA_##opcode | \
- ((arg0) << R300_FPI2_ARG0A_SHIFT) | \
- ((arg1) << R300_FPI2_ARG1A_SHIFT) | \
- ((arg2) << R300_FPI2_ARG2A_SHIFT))
-
-extern void debug_vp(GLcontext * ctx, struct gl_vertex_program *vp);
-
-#endif /* __R300_PROGRAM_H__ */
diff --git a/r300/r300_reg.h b/r300/r300_reg.h
index e5501b6..778db96 100644
--- a/r300/r300_reg.h
+++ b/r300/r300_reg.h
@@ -67,9 +67,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/*
* Vertex Array Processing (VAP) Control
- * Stolen from r200 code from Christoph Brill (It's a guess!)
*/
#define R300_VAP_CNTL 0x2080
+# define R300_PVS_NUM_SLOTS_SHIFT 0
+# define R300_PVS_NUM_CNTLRS_SHIFT 4
+# define R300_PVS_NUM_FPUS_SHIFT 8
+# define R300_VF_MAX_VTX_NUM_SHIFT 18
+# define R300_GL_CLIP_SPACE_DEF (0 << 22)
+# define R300_DX_CLIP_SPACE_DEF (1 << 22)
+# define R500_TCL_STATE_OPTIMIZATION (1 << 23)
/* This register is written directly and also starts data section
* in many 3d CP_PACKET3's
@@ -106,16 +112,19 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* number of vertices */
# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16
-/* BEGIN: Wild guesses */
+#define R500_VAP_INDEX_OFFSET 0x208c
+
#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090
# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0)
-# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT (1<<1)
-# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) /* GUESS */
-# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) /* GUESS */
-# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) /* GUESS */
-# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) /* GUESS */
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4)
+# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16)
#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094
+ /* each of the following is 3 bits wide, specifies number
+ of components */
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
@@ -124,30 +133,64 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
-/* END: Wild guesses */
+# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0
+# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1
+# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2
+# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3
+# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4
#define R300_SE_VTE_CNTL 0x20b0
-# define R300_VPORT_X_SCALE_ENA 0x00000001
-# define R300_VPORT_X_OFFSET_ENA 0x00000002
-# define R300_VPORT_Y_SCALE_ENA 0x00000004
-# define R300_VPORT_Y_OFFSET_ENA 0x00000008
-# define R300_VPORT_Z_SCALE_ENA 0x00000010
-# define R300_VPORT_Z_OFFSET_ENA 0x00000020
-# define R300_VTX_XY_FMT 0x00000100
-# define R300_VTX_Z_FMT 0x00000200
-# define R300_VTX_W0_FMT 0x00000400
-# define R300_VTX_W0_NORMALIZE 0x00000800
-# define R300_VTX_ST_DENORMALIZED 0x00001000
+# define R300_VPORT_X_SCALE_ENA (1 << 0)
+# define R300_VPORT_X_OFFSET_ENA (1 << 1)
+# define R300_VPORT_Y_SCALE_ENA (1 << 2)
+# define R300_VPORT_Y_OFFSET_ENA (1 << 3)
+# define R300_VPORT_Z_SCALE_ENA (1 << 4)
+# define R300_VPORT_Z_OFFSET_ENA (1 << 5)
+# define R300_VTX_XY_FMT (1 << 8)
+# define R300_VTX_Z_FMT (1 << 9)
+# define R300_VTX_W0_FMT (1 << 10)
+# define R300_SERIAL_PROC_ENA (1 << 11)
/* BEGIN: Vertex data assembly - lots of uncertainties */
/* gap */
+/* Maximum Vertex Indx Clamp */
+#define R300_VAP_VF_MAX_VTX_INDX 0x2134
+/* Minimum Vertex Indx Clamp */
+#define R300_VAP_VF_MIN_VTX_INDX 0x2138
+
+/** Vertex assembler/processor control status */
#define R300_VAP_CNTL_STATUS 0x2140
+/* No swap at all (default) */
# define R300_VC_NO_SWAP (0 << 0)
+/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */
# define R300_VC_16BIT_SWAP (1 << 0)
+/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */
# define R300_VC_32BIT_SWAP (2 << 0)
+/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */
+# define R300_VC_HALF_DWORD_SWAP (3 << 0)
+/* The TCL engine will not be used (as it is logically or even physically removed) */
# define R300_VAP_TCL_BYPASS (1 << 8)
+/* Read only flag if TCL engine is busy. */
+# define R300_VAP_PVS_BUSY (1 << 11)
+/* TODO: gap for MAX_MPS */
+/* Read only flag if the vertex store is busy. */
+# define R300_VAP_VS_BUSY (1 << 24)
+/* Read only flag if the reciprocal engine is busy. */
+# define R300_VAP_RCP_BUSY (1 << 25)
+/* Read only flag if the viewport transform engine is busy. */
+# define R300_VAP_VTE_BUSY (1 << 26)
+/* Read only flag if the memory interface unit is busy. */
+# define R300_VAP_MUI_BUSY (1 << 27)
+/* Read only flag if the vertex cache is busy. */
+# define R300_VAP_VC_BUSY (1 << 28)
+/* Read only flag if the vertex fetcher is busy. */
+# define R300_VAP_VF_BUSY (1 << 29)
+/* Read only flag if the register pipeline is busy. */
+# define R300_VAP_REGPIPE_BUSY (1 << 30)
+/* Read only flag if the VAP engine is busy. */
+# define R300_VAP_VAP_BUSY (1 << 31)
/* gap */
@@ -175,27 +218,31 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* Always set COMPONENTS_4 in immediate mode.
*/
-#define R300_VAP_INPUT_ROUTE_0_0 0x2150
-# define R300_INPUT_ROUTE_COMPONENTS_1 (0 << 0)
-# define R300_INPUT_ROUTE_COMPONENTS_2 (1 << 0)
-# define R300_INPUT_ROUTE_COMPONENTS_3 (2 << 0)
-# define R300_INPUT_ROUTE_COMPONENTS_4 (3 << 0)
-# define R300_INPUT_ROUTE_COMPONENTS_RGBA (4 << 0) /* GUESS */
-# define R300_VAP_INPUT_ROUTE_IDX_SHIFT 8
-# define R300_VAP_INPUT_ROUTE_IDX_MASK (31 << 8) /* GUESS */
-# define R300_VAP_INPUT_ROUTE_END (1 << 13)
-# define R300_INPUT_ROUTE_IMMEDIATE_MODE (0 << 14) /* GUESS */
-# define R300_INPUT_ROUTE_FLOAT (1 << 14) /* GUESS */
-# define R300_INPUT_ROUTE_UNSIGNED_BYTE (2 << 14) /* GUESS */
-# define R300_INPUT_ROUTE_FLOAT_COLOR (3 << 14) /* GUESS */
-#define R300_VAP_INPUT_ROUTE_0_1 0x2154
-#define R300_VAP_INPUT_ROUTE_0_2 0x2158
-#define R300_VAP_INPUT_ROUTE_0_3 0x215C
-#define R300_VAP_INPUT_ROUTE_0_4 0x2160
-#define R300_VAP_INPUT_ROUTE_0_5 0x2164
-#define R300_VAP_INPUT_ROUTE_0_6 0x2168
-#define R300_VAP_INPUT_ROUTE_0_7 0x216C
-
+#define R300_VAP_PROG_STREAM_CNTL_0 0x2150
+# define R300_DATA_TYPE_0_SHIFT 0
+# define R300_DATA_TYPE_FLOAT_1 0
+# define R300_DATA_TYPE_FLOAT_2 1
+# define R300_DATA_TYPE_FLOAT_3 2
+# define R300_DATA_TYPE_FLOAT_4 3
+# define R300_DATA_TYPE_BYTE 4
+# define R300_DATA_TYPE_D3DCOLOR 5
+# define R300_DATA_TYPE_SHORT_2 6
+# define R300_DATA_TYPE_SHORT_4 7
+# define R300_DATA_TYPE_VECTOR_3_TTT 8
+# define R300_DATA_TYPE_VECTOR_3_EET 9
+# define R300_SKIP_DWORDS_SHIFT 4
+# define R300_DST_VEC_LOC_SHIFT 8
+# define R300_LAST_VEC (1 << 13)
+# define R300_SIGNED (1 << 14)
+# define R300_NORMALIZE (1 << 15)
+# define R300_DATA_TYPE_1_SHIFT 16
+#define R300_VAP_PROG_STREAM_CNTL_1 0x2154
+#define R300_VAP_PROG_STREAM_CNTL_2 0x2158
+#define R300_VAP_PROG_STREAM_CNTL_3 0x215C
+#define R300_VAP_PROG_STREAM_CNTL_4 0x2160
+#define R300_VAP_PROG_STREAM_CNTL_5 0x2164
+#define R300_VAP_PROG_STREAM_CNTL_6 0x2168
+#define R300_VAP_PROG_STREAM_CNTL_7 0x216C
/* gap */
/* Notes:
@@ -203,9 +250,26 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* if vertex program uses only position, fglrx will set normal, too
* - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal.
*/
-#define R300_VAP_INPUT_CNTL_0 0x2180
-# define R300_INPUT_CNTL_0_COLOR 0x00000001
-#define R300_VAP_INPUT_CNTL_1 0x2184
+#define R300_VAP_VTX_STATE_CNTL 0x2180
+# define R300_COLOR_0_ASSEMBLY_SHIFT 0
+# define R300_SEL_COLOR 0
+# define R300_SEL_USER_COLOR_0 1
+# define R300_SEL_USER_COLOR_1 2
+# define R300_COLOR_1_ASSEMBLY_SHIFT 2
+# define R300_COLOR_2_ASSEMBLY_SHIFT 4
+# define R300_COLOR_3_ASSEMBLY_SHIFT 6
+# define R300_COLOR_4_ASSEMBLY_SHIFT 8
+# define R300_COLOR_5_ASSEMBLY_SHIFT 10
+# define R300_COLOR_6_ASSEMBLY_SHIFT 12
+# define R300_COLOR_7_ASSEMBLY_SHIFT 14
+# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16)
+
+/*
+ * Each bit in this field applies to the corresponding vector in the VSM
+ * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit
+ * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream.
+ */
+#define R300_VAP_VSM_VTX_ASSM 0x2184
# define R300_INPUT_CNTL_POS 0x00000001
# define R300_INPUT_CNTL_NORMAL 0x00000002
# define R300_INPUT_CNTL_COLOR 0x00000004
@@ -218,6 +282,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */
# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */
+/* Programmable Stream Control Signed Normalize Control */
+#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc
+# define SGN_NORM_ZERO 0
+# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1
+# define SGN_NORM_NO_ZERO 2
+
/* gap */
/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0
@@ -227,26 +297,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* mode, the swizzling pattern is e.g. used to set zw components in texture
* coordinates with only tweo components.
*/
-#define R300_VAP_INPUT_ROUTE_1_0 0x21E0
+#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0
+# define R300_SWIZZLE0_SHIFT 0
+# define R300_SWIZZLE_SELECT_X_SHIFT 0
+# define R300_SWIZZLE_SELECT_Y_SHIFT 3
+# define R300_SWIZZLE_SELECT_Z_SHIFT 6
+# define R300_SWIZZLE_SELECT_W_SHIFT 9
+
+# define R300_SWIZZLE_SELECT_X 0
+# define R300_SWIZZLE_SELECT_Y 1
+# define R300_SWIZZLE_SELECT_Z 2
+# define R300_SWIZZLE_SELECT_W 3
+# define R300_SWIZZLE_SELECT_FP_ZERO 4
+# define R300_SWIZZLE_SELECT_FP_ONE 5
+/* alternate forms for r300_emit.c */
# define R300_INPUT_ROUTE_SELECT_X 0
# define R300_INPUT_ROUTE_SELECT_Y 1
# define R300_INPUT_ROUTE_SELECT_Z 2
# define R300_INPUT_ROUTE_SELECT_W 3
# define R300_INPUT_ROUTE_SELECT_ZERO 4
# define R300_INPUT_ROUTE_SELECT_ONE 5
-# define R300_INPUT_ROUTE_SELECT_MASK 7
-# define R300_INPUT_ROUTE_X_SHIFT 0
-# define R300_INPUT_ROUTE_Y_SHIFT 3
-# define R300_INPUT_ROUTE_Z_SHIFT 6
-# define R300_INPUT_ROUTE_W_SHIFT 9
-# define R300_INPUT_ROUTE_ENABLE (15 << 12)
-#define R300_VAP_INPUT_ROUTE_1_1 0x21E4
-#define R300_VAP_INPUT_ROUTE_1_2 0x21E8
-#define R300_VAP_INPUT_ROUTE_1_3 0x21EC
-#define R300_VAP_INPUT_ROUTE_1_4 0x21F0
-#define R300_VAP_INPUT_ROUTE_1_5 0x21F4
-#define R300_VAP_INPUT_ROUTE_1_6 0x21F8
-#define R300_VAP_INPUT_ROUTE_1_7 0x21FC
+
+# define R300_WRITE_ENA_SHIFT 12
+# define R300_WRITE_ENA_X 1
+# define R300_WRITE_ENA_Y 2
+# define R300_WRITE_ENA_Z 4
+# define R300_WRITE_ENA_W 8
+# define R300_SWIZZLE1_SHIFT 16
+#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4
+#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8
+#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec
+#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0
+#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4
+#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8
+#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc
/* END: Vertex data assembly */
@@ -278,23 +362,35 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* Multiple vertex programs and parameter sets can be loaded at once,
* which could explain the size discrepancy.
*/
-#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200
-# define R300_PVS_UPLOAD_PROGRAM 0x00000000
-# define R300_PVS_UPLOAD_PARAMETERS 0x00000200
-# define R300_PVS_UPLOAD_CLIP_PLANE0 0x00000400
-# define R300_PVS_UPLOAD_CLIP_PLANE1 0x00000401
-# define R300_PVS_UPLOAD_CLIP_PLANE2 0x00000402
-# define R300_PVS_UPLOAD_CLIP_PLANE3 0x00000403
-# define R300_PVS_UPLOAD_CLIP_PLANE4 0x00000404
-# define R300_PVS_UPLOAD_CLIP_PLANE5 0x00000405
-# define R300_PVS_UPLOAD_POINTSIZE 0x00000406
-
-# define R500_PVS_UPLOAD_CLIP_PLANE0 0x00000600
-# define R500_PVS_UPLOAD_CLIP_PLANE1 0x00000601
-# define R500_PVS_UPLOAD_CLIP_PLANE2 0x00000602
-# define R500_PVS_UPLOAD_CLIP_PLANE3 0x00000603
-# define R500_PVS_UPLOAD_CLIP_PLANE4 0x00000604
-# define R500_PVS_UPLOAD_CLIP_PLANE5 0x00000605
+#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200
+# define R300_PVS_CODE_START 0
+# define R300_MAX_PVS_CODE_LINES 256
+# define R500_MAX_PVS_CODE_LINES 1024
+# define R300_PVS_CONST_START 512
+# define R500_PVS_CONST_START 1024
+# define R300_MAX_PVS_CONST_VECS 256
+# define R500_MAX_PVS_CONST_VECS 1024
+# define R300_PVS_UCP_START 1024
+# define R500_PVS_UCP_START 1536
+# define R300_POINT_VPORT_SCALE_OFFSET 1030
+# define R500_POINT_VPORT_SCALE_OFFSET 1542
+# define R300_POINT_GEN_TEX_OFFSET 1031
+# define R500_POINT_GEN_TEX_OFFSET 1543
+
+/*
+ * These are obsolete defines form r300_context.h, but they might give some
+ * clues when investigating the addresses further...
+ */
+#if 0
+#define VSF_DEST_PROGRAM 0x0
+#define VSF_DEST_MATRIX0 0x200
+#define VSF_DEST_MATRIX1 0x204
+#define VSF_DEST_MATRIX2 0x208
+#define VSF_DEST_VECTOR0 0x20c
+#define VSF_DEST_VECTOR1 0x20d
+#define VSF_DEST_UNKNOWN1 0x400
+#define VSF_DEST_UNKNOWN2 0x406
+#endif
/* gap */
@@ -314,9 +410,33 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view
*/
#define R300_VAP_CLIP_CNTL 0x221C
-# define R300_221C_NORMAL 0x00000000
-# define R300_221C_CLEAR 0x0001C000
-#define R300_VAP_UCP_ENABLE_0 (1 << 0)
+# define R300_VAP_UCP_ENABLE_0 (1 << 0)
+# define R300_VAP_UCP_ENABLE_1 (1 << 1)
+# define R300_VAP_UCP_ENABLE_2 (1 << 2)
+# define R300_VAP_UCP_ENABLE_3 (1 << 3)
+# define R300_VAP_UCP_ENABLE_4 (1 << 4)
+# define R300_VAP_UCP_ENABLE_5 (1 << 5)
+# define R300_PS_UCP_MODE_DIST_COP (0 << 14)
+# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14)
+# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14)
+# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14)
+# define R300_CLIP_DISABLE (1 << 16)
+# define R300_UCP_CULL_ONLY_ENABLE (1 << 17)
+# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18)
+# define R500_COLOR2_IS_TEXTURE (1 << 20)
+# define R500_COLOR3_IS_TEXTURE (1 << 21)
+
+/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first
+ * plane is per-pixel and the second plane is per-vertex.
+ *
+ * This was determined by experimentation alone but I believe it is correct.
+ *
+ * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest.
+ */
+#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220
+#define R300_VAP_GB_VERT_DISC_ADJ 0x2224
+#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
+#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
/* gap */
@@ -325,10 +445,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* Therefore, I suspect writing zero to 0x2284 synchronizes the engine and
* avoids bugs caused by still running shaders reading bad data from memory.
*/
-#define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */
+#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284
-/* Absolutely no clue what this register is about. */
-#define R300_VAP_UNKNOWN_2288 0x2288
+/* This register is used to define the number of core clocks to wait for a
+ * vertex to be received by the VAP input controller (while the primitive
+ * path is backed up) before forcing any accumulated vertices to be submitted
+ * to the vertex processing path.
+ */
+#define VAP_PVS_VTX_TIMEOUT_REG 0x2288
# define R300_2288_R300 0x00750000 /* -- nh */
# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
@@ -341,25 +465,27 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* The meaning of the two UNKNOWN fields is obviously not known. However,
* experiments so far have shown that both *must* point to an instruction
* inside the vertex program, otherwise the GPU locks up.
+ *
* fglrx usually sets CNTL_3_UNKNOWN to the end of the program and
- * CNTL_1_UNKNOWN points to instruction where last write to position takes
- * place.
+ * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to
+ * position takes place.
+ *
* Most likely this is used to ignore rest of the program in cases
* where group of verts arent visible. For some reason this "section"
* is sometimes accepted other instruction that have no relationship with
- *position calculations.
+ * position calculations.
*/
-#define R300_VAP_PVS_CNTL_1 0x22D0
-# define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0
-# define R300_PVS_CNTL_1_POS_END_SHIFT 10
-# define R300_PVS_CNTL_1_PROGRAM_END_SHIFT 20
+#define R300_VAP_PVS_CODE_CNTL_0 0x22D0
+# define R300_PVS_FIRST_INST_SHIFT 0
+# define R300_PVS_XYZW_VALID_INST_SHIFT 10
+# define R300_PVS_LAST_INST_SHIFT 20
/* Addresses are relative the the vertex program parameters area. */
-#define R300_VAP_PVS_CNTL_2 0x22D4
-# define R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0
-# define R300_PVS_CNTL_2_PARAM_COUNT_SHIFT 16
-#define R300_VAP_PVS_CNTL_3 0x22D8
-# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT 10
-# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0
+#define R300_VAP_PVS_CONST_CNTL 0x22D4
+# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0
+# define R300_PVS_MAX_CONST_ADDR_SHIFT 16
+#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
+# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
+#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
* immediate vertices
@@ -407,17 +533,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* (or something closely related to that).
* This bit is rather fatal at the time being due to lackings at pixel
* shader side
+ * Specifies top of Raster pipe specific enable controls.
*/
#define R300_GB_ENABLE 0x4008
-# define R300_GB_POINT_STUFF_ENABLE (1<<0)
-# define R300_GB_LINE_STUFF_ENABLE (1<<1)
-# define R300_GB_TRIANGLE_STUFF_ENABLE (1<<2)
-# define R300_GB_STENCIL_AUTO_ENABLE (1<<4)
-# define R300_GB_UNK31 (1<<31)
+# define R300_GB_POINT_STUFF_DISABLE (0 << 0)
+# define R300_GB_POINT_STUFF_ENABLE (1 << 0) /* Specifies if points will have stuffed texture coordinates. */
+# define R300_GB_LINE_STUFF_DISABLE (0 << 1)
+# define R300_GB_LINE_STUFF_ENABLE (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */
+# define R300_GB_TRIANGLE_STUFF_DISABLE (0 << 2)
+# define R300_GB_TRIANGLE_STUFF_ENABLE (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */
+# define R300_GB_STENCIL_AUTO_DISABLE (0 << 4)
+# define R300_GB_STENCIL_AUTO_ENABLE (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */
+# define R300_GB_STENCIL_AUTO_FORCE (2 << 4) /* Force 0 into dzy low bit. */
+
/* each of the following is 2 bits wide */
-#define R300_GB_TEX_REPLICATE 0
-#define R300_GB_TEX_ST 1
-#define R300_GB_TEX_STR 2
+#define R300_GB_TEX_REPLICATE 0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */
+#define R300_GB_TEX_ST 1 /* Stuff with source texture coordinates (S,T). */
+#define R300_GB_TEX_STR 2 /* Stuff with source texture coordinates (S,T,R). */
# define R300_GB_TEX0_SOURCE_SHIFT 16
# define R300_GB_TEX1_SOURCE_SHIFT 18
# define R300_GB_TEX2_SOURCE_SHIFT 20
@@ -428,7 +560,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_GB_TEX7_SOURCE_SHIFT 30
/* MSPOS - positions for multisample antialiasing (?) */
-#define R300_GB_MSPOS0 0x4010
+#define R300_GB_MSPOS0 0x4010
/* shifts - each of the fields is 4 bits */
# define R300_GB_MSPOS0__MS_X0_SHIFT 0
# define R300_GB_MSPOS0__MS_Y0_SHIFT 4
@@ -439,7 +571,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_GB_MSPOS0__MSBD0_Y 24
# define R300_GB_MSPOS0__MSBD0_X 28
-#define R300_GB_MSPOS1 0x4014
+#define R300_GB_MSPOS1 0x4014
# define R300_GB_MSPOS1__MS_X3_SHIFT 0
# define R300_GB_MSPOS1__MS_Y3_SHIFT 4
# define R300_GB_MSPOS1__MS_X4_SHIFT 8
@@ -448,31 +580,47 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_GB_MSPOS1__MS_Y5_SHIFT 20
# define R300_GB_MSPOS1__MSBD1 24
-
-#define R300_GB_TILE_CONFIG 0x4018
-# define R300_GB_TILE_ENABLE (1<<0)
-# define R300_GB_TILE_PIPE_COUNT_RV300 0
-# define R300_GB_TILE_PIPE_COUNT_R300 (3<<1)
-# define R300_GB_TILE_PIPE_COUNT_R420 (7<<1)
-# define R300_GB_TILE_PIPE_COUNT_RV410 (3<<1)
-# define R300_GB_TILE_SIZE_8 0
-# define R300_GB_TILE_SIZE_16 (1<<4)
-# define R300_GB_TILE_SIZE_32 (2<<4)
-# define R300_GB_SUPER_SIZE_1 (0<<6)
-# define R300_GB_SUPER_SIZE_2 (1<<6)
-# define R300_GB_SUPER_SIZE_4 (2<<6)
-# define R300_GB_SUPER_SIZE_8 (3<<6)
-# define R300_GB_SUPER_SIZE_16 (4<<6)
-# define R300_GB_SUPER_SIZE_32 (5<<6)
-# define R300_GB_SUPER_SIZE_64 (6<<6)
-# define R300_GB_SUPER_SIZE_128 (7<<6)
+/* Specifies the graphics pipeline configuration for rasterization. */
+#define R300_GB_TILE_CONFIG 0x4018
+# define R300_GB_TILE_DISABLE (0 << 0)
+# define R300_GB_TILE_ENABLE (1 << 0)
+# define R300_GB_TILE_PIPE_COUNT_RV300 (0 << 1) /* RV350 (1 pipe, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R300 (3 << 1) /* R300 (2 pipes, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R420_3P (6 << 1) /* R420-3P (3 pipes, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R420 (7 << 1) /* R420 (4 pipes, 1 ctx) */
+# define R300_GB_TILE_SIZE_8 (0 << 4)
+# define R300_GB_TILE_SIZE_16 (1 << 4)
+# define R300_GB_TILE_SIZE_32 (2 << 4)
+# define R300_GB_SUPER_SIZE_1 (0 << 6)
+# define R300_GB_SUPER_SIZE_2 (1 << 6)
+# define R300_GB_SUPER_SIZE_4 (2 << 6)
+# define R300_GB_SUPER_SIZE_8 (3 << 6)
+# define R300_GB_SUPER_SIZE_16 (4 << 6)
+# define R300_GB_SUPER_SIZE_32 (5 << 6)
+# define R300_GB_SUPER_SIZE_64 (6 << 6)
+# define R300_GB_SUPER_SIZE_128 (7 << 6)
# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */
# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */
-# define R300_GB_SUPER_TILE_A 0
-# define R300_GB_SUPER_TILE_B (1<<15)
-# define R300_GB_SUBPIXEL_1_12 0
-# define R300_GB_SUBPIXEL_1_16 (1<<16)
-
+# define R300_GB_SUPER_TILE_A (0 << 15)
+# define R300_GB_SUPER_TILE_B (1 << 15)
+# define R300_GB_SUBPIXEL_1_12 (0 << 16)
+# define R300_GB_SUBPIXEL_1_16 (1 << 16)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17)
+# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19)
+# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19)
+# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20)
+# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20)
+# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21)
+# define GB_TILE_CONFIG_SUBPRECISION (0 << 22)
+# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23)
+# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23)
+# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24)
+# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24)
+
+/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */
#define R300_GB_FIFO_SIZE 0x4024
/* each of the following is 2 bits wide */
#define R300_GB_FIFO_SIZE_32 0
@@ -496,30 +644,102 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */
# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24
-#define R300_GB_SELECT 0x401C
-# define R300_GB_FOG_SELECT_C0A 0
-# define R300_GB_FOG_SELECT_C1A 1
-# define R300_GB_FOG_SELECT_C2A 2
-# define R300_GB_FOG_SELECT_C3A 3
-# define R300_GB_FOG_SELECT_1_1_W 4
-# define R300_GB_FOG_SELECT_Z 5
-# define R300_GB_DEPTH_SELECT_Z 0
-# define R300_GB_DEPTH_SELECT_1_1_W (1<<3)
-# define R300_GB_W_SELECT_1_W 0
-# define R300_GB_W_SELECT_1 (1<<4)
-
-#define R300_GB_AA_CONFIG 0x4020
-# define R300_AA_DISABLE 0x00
-# define R300_AA_ENABLE 0x01
-# define R300_AA_SUBSAMPLES_2 0
-# define R300_AA_SUBSAMPLES_3 (1<<1)
-# define R300_AA_SUBSAMPLES_4 (2<<1)
-# define R300_AA_SUBSAMPLES_6 (3<<1)
+#define GB_Z_PEQ_CONFIG 0x4028
+# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0)
+# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0)
+
+/* Specifies various polygon specific selects (fog, depth, perspective). */
+#define R300_GB_SELECT 0x401c
+# define R300_GB_FOG_SELECT_C0A (0 << 0)
+# define R300_GB_FOG_SELECT_C1A (1 << 0)
+# define R300_GB_FOG_SELECT_C2A (2 << 0)
+# define R300_GB_FOG_SELECT_C3A (3 << 0)
+# define R300_GB_FOG_SELECT_1_1_W (4 << 0)
+# define R300_GB_FOG_SELECT_Z (5 << 0)
+# define R300_GB_DEPTH_SELECT_Z (0 << 3
+# define R300_GB_DEPTH_SELECT_1_1_W (1 << 3)
+# define R300_GB_W_SELECT_1_W (0 << 4)
+# define R300_GB_W_SELECT_1 (1 << 4)
+# define R300_GB_FOG_STUFF_DISABLE (0 << 5)
+# define R300_GB_FOG_STUFF_ENABLE (1 << 5)
+# define R300_GB_FOG_STUFF_TEX_SHIFT 6
+# define R300_GB_FOG_STUFF_TEX_MASK 0x000003c0
+# define R300_GB_FOG_STUFF_COMP_SHIFT 10
+# define R300_GB_FOG_STUFF_COMP_MASK 0x00000c00
+
+/* Specifies the graphics pipeline configuration for antialiasing. */
+#define GB_AA_CONFIG 0x4020
+# define GB_AA_CONFIG_AA_DISABLE (0 << 0)
+# define GB_AA_CONFIG_AA_ENABLE (1 << 0)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1)
+
+/* Selects which of 4 pipes are active. */
+#define GB_PIPE_SELECT 0x402c
+# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0
+# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2
+# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4
+# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6
+# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8
+# define GB_PIPE_SELECT_MAX_PIPE 12
+# define GB_PIPE_SELECT_BAD_PIPES 14
+# define GB_PIPE_SELECT_CONFIG_PIPES 18
+
+
+/* Specifies the sizes of the various FIFO`s in the sc/rs. */
+#define GB_FIFO_SIZE1 0x4070
+/* High water mark for SC input fifo */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0
+# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f
+/* High water mark for SC input fifo (B) */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6
+# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0
+/* High water mark for RS colors' fifo */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12
+# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000
+/* High water mark for RS textures' fifo */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18
+# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000
+
+/* This table specifies the source location and format for up to 16 texture
+ * addresses (i[0]:i[15]) and four colors (c[0]:c[3])
+ */
+#define R500_RS_IP_0 0x4074
+#define R500_RS_IP_1 0x4078
+#define R500_RS_IP_2 0x407C
+#define R500_RS_IP_3 0x4080
+#define R500_RS_IP_4 0x4084
+#define R500_RS_IP_5 0x4088
+#define R500_RS_IP_6 0x408C
+#define R500_RS_IP_7 0x4090
+#define R500_RS_IP_8 0x4094
+#define R500_RS_IP_9 0x4098
+#define R500_RS_IP_10 0x409C
+#define R500_RS_IP_11 0x40A0
+#define R500_RS_IP_12 0x40A4
+#define R500_RS_IP_13 0x40A8
+#define R500_RS_IP_14 0x40AC
+#define R500_RS_IP_15 0x40B0
+#define R500_RS_IP_PTR_K0 62
+#define R500_RS_IP_PTR_K1 63
+#define R500_RS_IP_TEX_PTR_S_SHIFT 0
+#define R500_RS_IP_TEX_PTR_T_SHIFT 6
+#define R500_RS_IP_TEX_PTR_R_SHIFT 12
+#define R500_RS_IP_TEX_PTR_Q_SHIFT 18
+#define R500_RS_IP_COL_PTR_SHIFT 24
+#define R500_RS_IP_COL_FMT_SHIFT 27
+# define R500_RS_COL_PTR(x) (x << 24)
+# define R500_RS_COL_FMT(x) (x << 27)
+/* gap */
+#define R500_RS_IP_OFFSET_DIS (0 << 31)
+#define R500_RS_IP_OFFSET_EN (1 << 31)
/* gap */
/* Zero to flush caches. */
-#define R300_TX_CNTL 0x4100
+#define R300_TX_INVALTAGS 0x4100
#define R300_TX_FLUSH 0x0
/* The upper enable bits are guessed, based on fglrx reported limits. */
@@ -541,53 +761,335 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_ENABLE_14 (1 << 14)
# define R300_TX_ENABLE_15 (1 << 15)
-/* The pointsize is given in multiples of 6. The pointsize can be
- * enormous: Clear() renders a single point that fills the entire
- * framebuffer.
+#define R500_TX_FILTER_4 0x4110
+# define R500_TX_WEIGHT_1_SHIFT (0)
+# define R500_TX_WEIGHT_0_SHIFT (11)
+# define R500_TX_WEIGHT_PAIR (1<<22)
+# define R500_TX_PHASE_SHIFT (23)
+# define R500_TX_DIRECTION_HORIZONTAL (0<<27)
+# define R500_TX_DIRECTION_VERITCAL (1<<27)
+
+/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
+#define R300_GA_POINT_S0 0x4200
+
+/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
+#define R300_GA_POINT_T0 0x4204
+
+/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
+#define R300_GA_POINT_S1 0x4208
+
+/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
+#define R300_GA_POINT_T1 0x420c
+
+/* Specifies amount to shift integer position of vertex (screen space) before
+ * converting to float for triangle stipple.
*/
-#define R300_RE_POINTSIZE 0x421C
-# define R300_POINTSIZE_Y_SHIFT 0
-# define R300_POINTSIZE_Y_MASK (0xFFFF << 0) /* GUESS */
-# define R300_POINTSIZE_X_SHIFT 16
-# define R300_POINTSIZE_X_MASK (0xFFFF << 16) /* GUESS */
+#define R300_GA_TRIANGLE_STIPPLE 0x4214
+# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0
+# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f
+# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16
+# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000
+
+/* The pointsize is given in multiples of 6. The pointsize can be enormous:
+ * Clear() renders a single point that fills the entire framebuffer.
+ * 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in
+ * 8b precision).
+ */
+#define R300_GA_POINT_SIZE 0x421C
+# define R300_POINTSIZE_Y_SHIFT 0
+# define R300_POINTSIZE_Y_MASK 0x0000ffff
+# define R300_POINTSIZE_X_SHIFT 16
+# define R300_POINTSIZE_X_MASK 0xffff0000
# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6)
-/* The line width is given in multiples of 6.
+/* Blue fill color */
+#define R500_GA_FILL_R 0x4220
+
+/* Blue fill color */
+#define R500_GA_FILL_G 0x4224
+
+/* Blue fill color */
+#define R500_GA_FILL_B 0x4228
+
+/* Alpha fill color */
+#define R500_GA_FILL_A 0x422c
+
+
+/* Specifies maximum and minimum point & sprite sizes for per vertex size
+ * specification. The lower part (15:0) is MIN and (31:16) is max.
+ */
+#define R300_GA_POINT_MINMAX 0x4230
+# define R300_GA_POINT_MINMAX_MIN_SHIFT 0
+# define R300_GA_POINT_MINMAX_MIN_MASK (0xFFFF << 0)
+# define R300_GA_POINT_MINMAX_MAX_SHIFT 16
+# define R300_GA_POINT_MINMAX_MAX_MASK (0xFFFF << 16)
+
+/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b
+ * subprecision); (16.0) fixed format.
+ *
+ * The line width is given in multiples of 6.
* In default mode lines are classified as vertical lines.
* HO: horizontal
* VE: vertical or horizontal
* HO & VE: no classification
*/
-#define R300_RE_LINE_CNT 0x4234
-# define R300_LINESIZE_SHIFT 0
-# define R300_LINESIZE_MASK (0xFFFF << 0) /* GUESS */
-# define R300_LINESIZE_MAX (R300_LINESIZE_MASK / 6)
+#define R300_GA_LINE_CNTL 0x4234
+# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0
+# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff
+# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16)
+# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16)
+# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */
+# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */
+# define R500_GA_LINE_CNTL_SORT_NO (0 << 18)
+# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18)
+/** TODO: looks wrong */
+# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6)
+/** TODO: looks wrong */
# define R300_LINE_CNT_HO (1 << 16)
+/** TODO: looks wrong */
# define R300_LINE_CNT_VE (1 << 17)
-/* Some sort of scale or clamp value for texcoordless textures. */
-#define R300_RE_UNK4238 0x4238
-
-/* Something shade related */
-#define R300_RE_SHADE 0x4274
-
-#define R300_RE_SHADE_MODEL 0x4278
-# define R300_RE_SHADE_MODEL_SMOOTH 0x3aaaa
-# define R300_RE_SHADE_MODEL_FLAT 0x39595
-
-/* Dangerous */
-#define R300_RE_POLYGON_MODE 0x4288
-# define R300_PM_ENABLED (1 << 0)
-# define R300_PM_FRONT_POINT (0 << 0)
-# define R300_PM_BACK_POINT (0 << 0)
-# define R300_PM_FRONT_LINE (1 << 4)
-# define R300_PM_FRONT_FILL (1 << 5)
-# define R300_PM_BACK_LINE (1 << 7)
-# define R300_PM_BACK_FILL (1 << 8)
-
-/* Fog parameters */
-#define R300_RE_FOG_SCALE 0x4294
-#define R300_RE_FOG_START 0x4298
+/* Line Stipple configuration information. */
+#define R300_GA_LINE_STIPPLE_CONFIG 0x4238
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2
+# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc
+
+/* Used to load US instructions and constants */
+#define R500_GA_US_VECTOR_INDEX 0x4250
+# define R500_GA_US_VECTOR_INDEX_SHIFT 0
+# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff
+# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16)
+# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16)
+# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17)
+# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17)
+
+/* Data register for loading US instructions and constants */
+#define R500_GA_US_VECTOR_DATA 0x4254
+
+/* Specifies color properties and mappings of textures. */
+#define R500_GA_COLOR_CONTROL_PS3 0x4258
+# define R500_TEX0_SHADING_PS3_SOLID (0 << 0)
+# define R500_TEX0_SHADING_PS3_FLAT (1 << 0)
+# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0)
+# define R500_TEX1_SHADING_PS3_SOLID (0 << 2)
+# define R500_TEX1_SHADING_PS3_FLAT (1 << 2)
+# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2)
+# define R500_TEX2_SHADING_PS3_SOLID (0 << 4)
+# define R500_TEX2_SHADING_PS3_FLAT (1 << 4)
+# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4)
+# define R500_TEX3_SHADING_PS3_SOLID (0 << 6)
+# define R500_TEX3_SHADING_PS3_FLAT (1 << 6)
+# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6)
+# define R500_TEX4_SHADING_PS3_SOLID (0 << 8)
+# define R500_TEX4_SHADING_PS3_FLAT (1 << 8)
+# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8)
+# define R500_TEX5_SHADING_PS3_SOLID (0 << 10)
+# define R500_TEX5_SHADING_PS3_FLAT (1 << 10)
+# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10)
+# define R500_TEX6_SHADING_PS3_SOLID (0 << 12)
+# define R500_TEX6_SHADING_PS3_FLAT (1 << 12)
+# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12)
+# define R500_TEX7_SHADING_PS3_SOLID (0 << 14)
+# define R500_TEX7_SHADING_PS3_FLAT (1 << 14)
+# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14)
+# define R500_TEX8_SHADING_PS3_SOLID (0 << 16)
+# define R500_TEX8_SHADING_PS3_FLAT (1 << 16)
+# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16)
+# define R500_TEX9_SHADING_PS3_SOLID (0 << 18)
+# define R500_TEX9_SHADING_PS3_FLAT (1 << 18)
+# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18)
+# define R500_TEX10_SHADING_PS3_SOLID (0 << 20)
+# define R500_TEX10_SHADING_PS3_FLAT (1 << 20)
+# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20)
+# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22)
+# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26)
+
+/* Returns idle status of various G3D block, captured when GA_IDLE written or
+ * when hard or soft reset asserted.
+ */
+#define R500_GA_IDLE 0x425c
+# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0)
+# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1)
+# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2)
+# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3)
+# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4)
+# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5)
+# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6)
+# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7)
+# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8)
+# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9)
+# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10)
+# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11)
+# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12)
+# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13)
+# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14)
+# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15)
+# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16)
+# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17)
+# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18)
+# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19)
+# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20)
+# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21)
+# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22)
+# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23)
+# define R500_GA_IDLE_SU_IDLE (0 << 24)
+# define R500_GA_IDLE_GA_IDLE (0 << 25)
+# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26)
+
+/* Current value of stipple accumulator. */
+#define R300_GA_LINE_STIPPLE_VALUE 0x4260
+
+/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */
+#define R300_GA_LINE_S0 0x4264
+/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */
+#define R300_GA_LINE_S1 0x4268
+
+/* GA Input fifo high water marks */
+#define R500_GA_FIFO_CNTL 0x4270
+# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007
+# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0
+# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038
+# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3
+# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0
+# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6
+
+/* GA enhance/tweaks */
+#define R300_GA_ENHANCE 0x4274
+# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0)
+# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */
+# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1)
+# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */
+# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */
+# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */
+# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3)
+# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */
+
+#define R300_GA_COLOR_CONTROL 0x4278
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0)
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT (1 << 0)
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD (2 << 0)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID (0 << 2)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT (1 << 2)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD (2 << 2)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID (0 << 4)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT (1 << 4)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD (2 << 4)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID (0 << 6)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT (1 << 6)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD (2 << 6)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID (0 << 8)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT (1 << 8)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD (2 << 8)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID (0 << 10)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT (1 << 10)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD (2 << 10)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID (0 << 12)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT (1 << 12)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD (2 << 12)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID (0 << 14)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT (1 << 14)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD (2 << 14)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST (0 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD (2 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST (3 << 16)
+
+/** TODO: might be candidate for removal */
+# define R300_RE_SHADE_MODEL_SMOOTH ( \
+ R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST )
+/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */
+# define R300_RE_SHADE_MODEL_FLAT ( \
+ R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST )
+
+/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */
+#define R300_GA_SOLID_RG 0x427c
+# define GA_SOLID_RG_COLOR_GREEN_SHIFT 0
+# define GA_SOLID_RG_COLOR_GREEN_MASK 0x0000ffff
+# define GA_SOLID_RG_COLOR_RED_SHIFT 16
+# define GA_SOLID_RG_COLOR_RED_MASK 0xffff0000
+/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */
+#define R300_GA_SOLID_BA 0x4280
+# define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0
+# define GA_SOLID_BA_COLOR_ALPHA_MASK 0x0000ffff
+# define GA_SOLID_BA_COLOR_BLUE_SHIFT 16
+# define GA_SOLID_BA_COLOR_BLUE_MASK 0xffff0000
+
+/* Polygon Mode
+ * Dangerous
+ */
+#define R300_GA_POLY_MODE 0x4288
+# define R300_GA_POLY_MODE_DISABLE (0 << 0)
+# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */
+/* reserved */
+# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4)
+# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4)
+# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4)
+/* reserved */
+# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7)
+# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7)
+# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7)
+/* reserved */
+
+/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */
+#define R300_GA_ROUND_MODE 0x428c
+# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0)
+# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0)
+# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2)
+# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2)
+# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4)
+# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4)
+# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5)
+# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5)
+# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6
+# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0
+
+/* Specifies x & y offsets for vertex data after conversion to FP.
+ * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b
+ * subprecision).
+ */
+#define R300_GA_OFFSET 0x4290
+# define R300_GA_OFFSET_X_OFFSET_SHIFT 0
+# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff
+# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16
+# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000
+
+/* Specifies the scale to apply to fog. */
+#define R300_GA_FOG_SCALE 0x4294
+/* Specifies the offset to apply to fog. */
+#define R300_GA_FOG_OFFSET 0x4298
+/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */
+#define R300_GA_SOFT_RESET 0x429c
/* Not sure why there are duplicate of factor and constant values.
* My best guess so far is that there are seperate zbiases for test and write.
@@ -595,11 +1097,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* Some of the tests indicate that fgl has a fallback implementation of zbias
* via pixel shaders.
*/
-#define R300_RE_ZBIAS_CNTL 0x42A0 /* GUESS */
-#define R300_RE_ZBIAS_T_FACTOR 0x42A4
-#define R300_RE_ZBIAS_T_CONSTANT 0x42A8
-#define R300_RE_ZBIAS_W_FACTOR 0x42AC
-#define R300_RE_ZBIAS_W_CONSTANT 0x42B0
+#define R300_SU_TEX_WRAP 0x42A0
+#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4
+#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8
+#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC
+#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0
/* This register needs to be set to (1<<1) for RV350 to correctly
* perform depth test (see --vb-triangles in r300_demo)
@@ -610,31 +1112,44 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* One to enable depth test and one for depth write.
* Yet this doesnt explain why depth writes work ...
*/
-#define R300_RE_OCCLUSION_CNTL 0x42B4
-# define R300_OCCLUSION_ON (1<<1)
+#define R300_SU_POLY_OFFSET_ENABLE 0x42B4
+# define R300_FRONT_ENABLE (1 << 0)
+# define R300_BACK_ENABLE (1 << 1)
+# define R300_PARA_ENABLE (1 << 2)
-#define R300_RE_CULL_CNTL 0x42B8
+#define R300_SU_CULL_MODE 0x42B8
# define R300_CULL_FRONT (1 << 0)
# define R300_CULL_BACK (1 << 1)
# define R300_FRONT_FACE_CCW (0 << 2)
# define R300_FRONT_FACE_CW (1 << 2)
+/* SU Depth Scale value */
+#define R300_SU_DEPTH_SCALE 0x42c0
+/* SU Depth Offset value */
+#define R300_SU_DEPTH_OFFSET 0x42c4
+
/* BEGIN: Rasterization / Interpolators - many guesses */
-/* 0_UNKNOWN_18 has always been set except for clear operations.
+/*
* TC_CNT is the number of incoming texture coordinate sets (i.e. it depends
* on the vertex program, *not* the fragment program)
*/
-#define R300_RS_CNTL_0 0x4300
-# define R300_RS_CNTL_TC_CNT_SHIFT 2
-# define R300_RS_CNTL_TC_CNT_MASK (7 << 2)
- /* number of color interpolators used */
-# define R300_RS_CNTL_CI_CNT_SHIFT 7
-# define R300_RS_CNTL_0_UNKNOWN_18 (1 << 18)
- /* Guess: RS_CNTL_1 holds the index of the highest used RS_ROUTE_n
- register. */
-#define R300_RS_CNTL_1 0x4304
+#define R300_RS_COUNT 0x4300
+# define R300_IT_COUNT_SHIFT 0
+# define R300_IT_COUNT_MASK 0x0000007f
+# define R300_IC_COUNT_SHIFT 7
+# define R300_IC_COUNT_MASK 0x00000780
+# define R300_W_ADDR_SHIFT 12
+# define R300_W_ADDR_MASK 0x0003f000
+# define R300_HIRES_DIS (0 << 18)
+# define R300_HIRES_EN (1 << 18)
+
+#define R300_RS_INST_COUNT 0x4304
+# define R300_RS_INST_COUNT_SHIFT 0
+# define R300_RS_INST_COUNT_MASK 0x0000000f
+# define R300_RS_TX_OFFSET_SHIFT 5
+# define R300_RS_TX_OFFSET_MASK 0x000000e0
/* gap */
@@ -651,63 +1166,108 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* Note: The _UNKNOWN constants are always set in their respective
* register. I don't know if this is necessary.
*/
-#define R300_RS_INTERP_0 0x4310
-#define R300_RS_INTERP_1 0x4314
-# define R300_RS_INTERP_1_UNKNOWN 0x40
-#define R300_RS_INTERP_2 0x4318
-# define R300_RS_INTERP_2_UNKNOWN 0x80
-#define R300_RS_INTERP_3 0x431C
-# define R300_RS_INTERP_3_UNKNOWN 0xC0
-#define R300_RS_INTERP_4 0x4320
-#define R300_RS_INTERP_5 0x4324
-#define R300_RS_INTERP_6 0x4328
-#define R300_RS_INTERP_7 0x432C
-# define R300_RS_INTERP_SRC_SHIFT 2
-# define R300_RS_INTERP_SRC_MASK (7 << 2)
-# define R300_RS_INTERP_USED 0x00D10000
+#define R300_RS_IP_0 0x4310
+#define R300_RS_IP_1 0x4314
+#define R300_RS_IP_2 0x4318
+#define R300_RS_IP_3 0x431C
+# define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */
+# define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */
+# define R300_RS_TEX_PTR(x) (x << 0)
+# define R300_RS_COL_PTR(x) (x << 6)
+# define R300_RS_COL_FMT(x) (x << 9)
+# define R300_RS_COL_FMT_RGBA 0
+# define R300_RS_COL_FMT_RGB0 1
+# define R300_RS_COL_FMT_RGB1 2
+# define R300_RS_COL_FMT_000A 4
+# define R300_RS_COL_FMT_0000 5
+# define R300_RS_COL_FMT_0001 6
+# define R300_RS_COL_FMT_111A 8
+# define R300_RS_COL_FMT_1110 9
+# define R300_RS_COL_FMT_1111 10
+# define R300_RS_SEL_S(x) (x << 13)
+# define R300_RS_SEL_T(x) (x << 16)
+# define R300_RS_SEL_R(x) (x << 19)
+# define R300_RS_SEL_Q(x) (x << 22)
+# define R300_RS_SEL_C0 0
+# define R300_RS_SEL_C1 1
+# define R300_RS_SEL_C2 2
+# define R300_RS_SEL_C3 3
+# define R300_RS_SEL_K0 4
+# define R300_RS_SEL_K1 5
+
+
+/* */
+#define R500_RS_INST_0 0x4320
+#define R500_RS_INST_1 0x4324
+#define R500_RS_INST_2 0x4328
+#define R500_RS_INST_3 0x432c
+#define R500_RS_INST_4 0x4330
+#define R500_RS_INST_5 0x4334
+#define R500_RS_INST_6 0x4338
+#define R500_RS_INST_7 0x433c
+#define R500_RS_INST_8 0x4340
+#define R500_RS_INST_9 0x4344
+#define R500_RS_INST_10 0x4348
+#define R500_RS_INST_11 0x434c
+#define R500_RS_INST_12 0x4350
+#define R500_RS_INST_13 0x4354
+#define R500_RS_INST_14 0x4358
+#define R500_RS_INST_15 0x435c
+#define R500_RS_INST_TEX_ID_SHIFT 0
+#define R500_RS_INST_TEX_CN_WRITE (1 << 4)
+#define R500_RS_INST_TEX_ADDR_SHIFT 5
+#define R500_RS_INST_COL_ID_SHIFT 12
+#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16)
+#define R500_RS_INST_COL_CN_WRITE (1 << 16)
+#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16)
+#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16)
+#define R500_RS_INST_COL_ADDR_SHIFT 18
+#define R500_RS_INST_TEX_ADJ (1 << 25)
+#define R500_RS_INST_W_CN (1 << 26)
/* These DWORDs control how vertex data is routed into fragment program
* registers, after interpolators.
*/
-#define R300_RS_ROUTE_0 0x4330
-#define R300_RS_ROUTE_1 0x4334
-#define R300_RS_ROUTE_2 0x4338
-#define R300_RS_ROUTE_3 0x433C /* GUESS */
-#define R300_RS_ROUTE_4 0x4340 /* GUESS */
-#define R300_RS_ROUTE_5 0x4344 /* GUESS */
-#define R300_RS_ROUTE_6 0x4348 /* GUESS */
-#define R300_RS_ROUTE_7 0x434C /* GUESS */
-# define R300_RS_ROUTE_SOURCE_INTERP_0 0
-# define R300_RS_ROUTE_SOURCE_INTERP_1 1
-# define R300_RS_ROUTE_SOURCE_INTERP_2 2
-# define R300_RS_ROUTE_SOURCE_INTERP_3 3
-# define R300_RS_ROUTE_SOURCE_INTERP_4 4
-# define R300_RS_ROUTE_SOURCE_INTERP_5 5 /* GUESS */
-# define R300_RS_ROUTE_SOURCE_INTERP_6 6 /* GUESS */
-# define R300_RS_ROUTE_SOURCE_INTERP_7 7 /* GUESS */
-# define R300_RS_ROUTE_ENABLE (1 << 3) /* GUESS */
-# define R300_RS_ROUTE_DEST_SHIFT 6
-# define R300_RS_ROUTE_DEST_MASK (31 << 6) /* GUESS */
-
-/* Special handling for color: When the fragment program uses color,
- * the ROUTE_0_COLOR bit is set and ROUTE_0_COLOR_DEST contains the
- * color register index.
- *
- * Apperently you may set the R300_RS_ROUTE_0_COLOR bit, but not provide any
- * R300_RS_ROUTE_0_COLOR_DEST value; this setup is used for clearing the state.
- * See r300_ioctl.c:r300EmitClearState. I'm not sure if this setup is strictly
- * correct or not. - Oliver.
- */
-# define R300_RS_ROUTE_0_COLOR (1 << 14)
-# define R300_RS_ROUTE_0_COLOR_DEST_SHIFT 17
-# define R300_RS_ROUTE_0_COLOR_DEST_MASK (31 << 17) /* GUESS */
-/* As above, but for secondary color */
-# define R300_RS_ROUTE_1_COLOR1 (1 << 14)
-# define R300_RS_ROUTE_1_COLOR1_DEST_SHIFT 17
-# define R300_RS_ROUTE_1_COLOR1_DEST_MASK (31 << 17)
-# define R300_RS_ROUTE_1_UNKNOWN11 (1 << 11)
+#define R300_RS_INST_0 0x4330
+#define R300_RS_INST_1 0x4334
+#define R300_RS_INST_2 0x4338
+#define R300_RS_INST_3 0x433C /* GUESS */
+#define R300_RS_INST_4 0x4340 /* GUESS */
+#define R300_RS_INST_5 0x4344 /* GUESS */
+#define R300_RS_INST_6 0x4348 /* GUESS */
+#define R300_RS_INST_7 0x434C /* GUESS */
+# define R300_RS_INST_TEX_ID(x) ((x) << 0)
+# define R300_RS_INST_TEX_CN_WRITE (1 << 3)
+# define R300_RS_INST_TEX_ADDR_SHIFT 6
+# define R300_RS_INST_COL_ID(x) ((x) << 11)
+# define R300_RS_INST_COL_CN_WRITE (1 << 14)
+# define R300_RS_INST_COL_ADDR_SHIFT 17
+# define R300_RS_INST_TEX_ADJ (1 << 22)
+# define R300_RS_COL_BIAS_UNUSED_SHIFT 23
+
/* END: Rasterization / Interpolators - many guesses */
+/* Hierarchical Z Enable */
+#define R300_SC_HYPERZ 0x43a4
+# define R300_SC_HYPERZ_DISABLE (0 << 0)
+# define R300_SC_HYPERZ_ENABLE (1 << 0)
+# define R300_SC_HYPERZ_MIN (0 << 1)
+# define R300_SC_HYPERZ_MAX (1 << 1)
+# define R300_SC_HYPERZ_ADJ_256 (0 << 2)
+# define R300_SC_HYPERZ_ADJ_128 (1 << 2)
+# define R300_SC_HYPERZ_ADJ_64 (2 << 2)
+# define R300_SC_HYPERZ_ADJ_32 (3 << 2)
+# define R300_SC_HYPERZ_ADJ_16 (4 << 2)
+# define R300_SC_HYPERZ_ADJ_8 (5 << 2)
+# define R300_SC_HYPERZ_ADJ_4 (6 << 2)
+# define R300_SC_HYPERZ_ADJ_2 (7 << 2)
+# define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5)
+# define R300_SC_HYPERZ_HZ_Z0MIN (1 << 5)
+# define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6)
+# define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6)
+
+#define R300_SC_EDGERULE 0x43a8
+
/* BEGIN: Scissors and cliprects */
/* There are four clipping rectangles. Their corner coordinates are inclusive.
@@ -724,21 +1284,21 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* For some reason, the top-left corner of the framebuffer is at (1440, 1440)
* for the purpose of clipping and scissors.
*/
-#define R300_RE_CLIPRECT_TL_0 0x43B0
-#define R300_RE_CLIPRECT_BR_0 0x43B4
-#define R300_RE_CLIPRECT_TL_1 0x43B8
-#define R300_RE_CLIPRECT_BR_1 0x43BC
-#define R300_RE_CLIPRECT_TL_2 0x43C0
-#define R300_RE_CLIPRECT_BR_2 0x43C4
-#define R300_RE_CLIPRECT_TL_3 0x43C8
-#define R300_RE_CLIPRECT_BR_3 0x43CC
+#define R300_SC_CLIPRECT_TL_0 0x43B0
+#define R300_SC_CLIPRECT_BR_0 0x43B4
+#define R300_SC_CLIPRECT_TL_1 0x43B8
+#define R300_SC_CLIPRECT_BR_1 0x43BC
+#define R300_SC_CLIPRECT_TL_2 0x43C0
+#define R300_SC_CLIPRECT_BR_2 0x43C4
+#define R300_SC_CLIPRECT_TL_3 0x43C8
+#define R300_SC_CLIPRECT_BR_3 0x43CC
# define R300_CLIPRECT_OFFSET 1440
# define R300_CLIPRECT_MASK 0x1FFF
# define R300_CLIPRECT_X_SHIFT 0
# define R300_CLIPRECT_X_MASK (0x1FFF << 0)
# define R300_CLIPRECT_Y_SHIFT 13
# define R300_CLIPRECT_Y_MASK (0x1FFF << 13)
-#define R300_RE_CLIPRECT_CNTL 0x43D0
+#define R300_SC_CLIP_RULE 0x43D0
# define R300_CLIP_OUT (1 << 0)
# define R300_CLIP_0 (1 << 1)
# define R300_CLIP_1 (1 << 2)
@@ -758,13 +1318,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* gap */
-#define R300_RE_SCISSORS_TL 0x43E0
-#define R300_RE_SCISSORS_BR 0x43E4
+#define R300_SC_SCISSORS_TL 0x43E0
+#define R300_SC_SCISSORS_BR 0x43E4
# define R300_SCISSORS_OFFSET 1440
# define R300_SCISSORS_X_SHIFT 0
# define R300_SCISSORS_X_MASK (0x1FFF << 0)
# define R300_SCISSORS_Y_SHIFT 13
# define R300_SCISSORS_Y_MASK (0x1FFF << 13)
+
+/* Screen door sample mask */
+#define R300_SC_SCREENDOOR 0x43e8
+
/* END: Scissors and cliprects */
/* BEGIN: Texture specification */
@@ -774,43 +1338,55 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* unit. This means that e.g. the offset for texture image unit N is found in
* register TX_OFFSET_0 + (4*N)
*/
-#define R300_TX_FILTER_0 0x4400
+#define R300_TX_FILTER0_0 0x4400
+#define R300_TX_FILTER0_1 0x4404
+#define R300_TX_FILTER0_2 0x4408
+#define R300_TX_FILTER0_3 0x440c
+#define R300_TX_FILTER0_4 0x4410
+#define R300_TX_FILTER0_5 0x4414
+#define R300_TX_FILTER0_6 0x4418
+#define R300_TX_FILTER0_7 0x441c
+#define R300_TX_FILTER0_8 0x4420
+#define R300_TX_FILTER0_9 0x4424
+#define R300_TX_FILTER0_10 0x4428
+#define R300_TX_FILTER0_11 0x442c
+#define R300_TX_FILTER0_12 0x4430
+#define R300_TX_FILTER0_13 0x4434
+#define R300_TX_FILTER0_14 0x4438
+#define R300_TX_FILTER0_15 0x443c
# define R300_TX_REPEAT 0
# define R300_TX_MIRRORED 1
-# define R300_TX_CLAMP 4
# define R300_TX_CLAMP_TO_EDGE 2
+# define R300_TX_MIRROR_ONCE_TO_EDGE 3
+# define R300_TX_CLAMP 4
+# define R300_TX_MIRROR_ONCE 5
# define R300_TX_CLAMP_TO_BORDER 6
+# define R300_TX_MIRROR_ONCE_TO_BORDER 7
# define R300_TX_WRAP_S_SHIFT 0
# define R300_TX_WRAP_S_MASK (7 << 0)
# define R300_TX_WRAP_T_SHIFT 3
# define R300_TX_WRAP_T_MASK (7 << 3)
-# define R300_TX_WRAP_Q_SHIFT 6
-# define R300_TX_WRAP_Q_MASK (7 << 6)
+# define R300_TX_WRAP_R_SHIFT 6
+# define R300_TX_WRAP_R_MASK (7 << 6)
+# define R300_TX_MAG_FILTER_4 (0 << 9)
# define R300_TX_MAG_FILTER_NEAREST (1 << 9)
# define R300_TX_MAG_FILTER_LINEAR (2 << 9)
+# define R300_TX_MAG_FILTER_ANISO (3 << 9)
# define R300_TX_MAG_FILTER_MASK (3 << 9)
# define R300_TX_MIN_FILTER_NEAREST (1 << 11)
# define R300_TX_MIN_FILTER_LINEAR (2 << 11)
-# define R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST (5 << 11)
-# define R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR (9 << 11)
-# define R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST (6 << 11)
-# define R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR (10 << 11)
-
-/* NOTE: NEAREST doesnt seem to exist.
- * Im not seting MAG_FILTER_MASK and (3 << 11) on for all
- * anisotropy modes because that would void selected mag filter
- */
-# define R300_TX_MIN_FILTER_ANISO_NEAREST (0 << 13)
-# define R300_TX_MIN_FILTER_ANISO_LINEAR (0 << 13)
-# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (1 << 13)
-# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR (2 << 13)
-# define R300_TX_MIN_FILTER_MASK ( (15 << 11) | (3 << 13) )
-# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21)
-# define R300_TX_MAX_ANISO_2_TO_1 (2 << 21)
-# define R300_TX_MAX_ANISO_4_TO_1 (4 << 21)
-# define R300_TX_MAX_ANISO_8_TO_1 (6 << 21)
-# define R300_TX_MAX_ANISO_16_TO_1 (8 << 21)
-# define R300_TX_MAX_ANISO_MASK (14 << 21)
+# define R300_TX_MIN_FILTER_ANISO (3 << 11)
+# define R300_TX_MIN_FILTER_MASK (3 << 11)
+# define R300_TX_MIN_FILTER_MIP_NONE (0 << 13)
+# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13)
+# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13)
+# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13)
+# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21)
+# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21)
+# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21)
+# define R300_TX_MAX_ANISO_8_TO_1 (3 << 21)
+# define R300_TX_MAX_ANISO_16_TO_1 (4 << 21)
+# define R300_TX_MAX_ANISO_MASK (7 << 21)
#define R300_TX_FILTER1_0 0x4440
# define R300_CHROMA_KEY_MODE_DISABLE 0
@@ -818,7 +1394,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_CHROMA_KEY_BLEND 2
# define R300_MC_ROUND_NORMAL (0<<2)
# define R300_MC_ROUND_MPEG4 (1<<2)
-# define R300_LOD_BIAS_MASK 0x1fff
+# define R300_LOD_BIAS_SHIFT 3
+# define R300_LOD_BIAS_MASK 0x1ff8
# define R300_EDGE_ANISO_EDGE_DIAG (0<<13)
# define R300_EDGE_ANISO_EDGE_ONLY (1<<13)
# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14)
@@ -829,12 +1406,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_TRI_PERF_3_8 (3<<15)
# define R300_ANISO_THRESHOLD_MASK (7<<17)
+# define R500_MACRO_SWITCH (1<<22)
+# define R500_BORDER_FIX (1<<31)
+
#define R300_TX_SIZE_0 0x4480
# define R300_TX_WIDTHMASK_SHIFT 0
# define R300_TX_WIDTHMASK_MASK (2047 << 0)
# define R300_TX_HEIGHTMASK_SHIFT 11
# define R300_TX_HEIGHTMASK_MASK (2047 << 11)
-# define R300_TX_UNK23 (1 << 23)
+# define R300_TX_DEPTHMASK_SHIFT 22
+# define R300_TX_DEPTHMASK_MASK (0xf << 22)
# define R300_TX_MAX_MIP_LEVEL_SHIFT 26
# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26)
# define R300_TX_SIZE_PROJECTED (1<<30)
@@ -845,7 +1426,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
They are given meanings as R, G, B and Alpha by the swizzle
specification */
# define R300_TX_FORMAT_X8 0x0
+# define R500_TX_FORMAT_X1 0x0 // bit set in format 2
# define R300_TX_FORMAT_X16 0x1
+# define R500_TX_FORMAT_X1_REV 0x0 // bit set in format 2
# define R300_TX_FORMAT_Y4X4 0x2
# define R300_TX_FORMAT_Y8X8 0x3
# define R300_TX_FORMAT_Y16X16 0x4
@@ -866,9 +1449,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */
# define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */
# define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */
+
+ /* These two values are wrong, but they're the only values that
+ * produce any even vaguely correct results. Can r300 only do 16-bit
+ * depth textures?
+ */
+# define R300_TX_FORMAT_X24_Y8 0x1e
+# define R300_TX_FORMAT_X32 0x1e
+
/* 0x16 - some 16 bit green format.. ?? */
-# define R300_TX_FORMAT_UNK25 (1 << 25) /* no swizzle */
-# define R300_TX_FORMAT_CUBIC_MAP (1 << 26)
+# define R300_TX_FORMAT_3D (1 << 25)
+# define R300_TX_FORMAT_CUBIC_MAP (2 << 25)
/* gap */
/* Floating point formats */
@@ -921,7 +1512,18 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_FORMAT_YUV_MODE 0x00800000
-#define R300_TX_PITCH_0 0x4500 /* obvious missing in gap */
+#define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */
+# define R300_TX_PITCHMASK_SHIFT 0
+# define R300_TX_PITCHMASK_MASK (2047 << 0)
+# define R500_TXFORMAT_MSB (1 << 14)
+# define R500_TXWIDTH_BIT11 (1 << 15)
+# define R500_TXHEIGHT_BIT11 (1 << 16)
+# define R500_POW2FIX2FLT (1 << 17)
+# define R500_SEL_FILTER4_TC0 (0 << 18)
+# define R500_SEL_FILTER4_TC1 (1 << 18)
+# define R500_SEL_FILTER4_TC2 (2 << 18)
+# define R500_SEL_FILTER4_TC3 (3 << 18)
+
#define R300_TX_OFFSET_0 0x4540
/* BEGIN: Guess from R200 */
# define R300_TXO_ENDIAN_NO_SWAP (0 << 0)
@@ -929,15 +1531,50 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0)
# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
# define R300_TXO_MACRO_TILE (1 << 2)
+# define R300_TXO_MICRO_TILE_LINEAR (0 << 3)
# define R300_TXO_MICRO_TILE (1 << 3)
+# define R300_TXO_MICRO_TILE_SQUARE (2 << 3)
# define R300_TXO_OFFSET_MASK 0xffffffe0
# define R300_TXO_OFFSET_SHIFT 5
/* END: Guess from R200 */
/* 32 bit chroma key */
#define R300_TX_CHROMA_KEY_0 0x4580
+#define R300_TX_CHROMA_KEY_1 0x4584
+#define R300_TX_CHROMA_KEY_2 0x4588
+#define R300_TX_CHROMA_KEY_3 0x458c
+#define R300_TX_CHROMA_KEY_4 0x4590
+#define R300_TX_CHROMA_KEY_5 0x4594
+#define R300_TX_CHROMA_KEY_6 0x4598
+#define R300_TX_CHROMA_KEY_7 0x459c
+#define R300_TX_CHROMA_KEY_8 0x45a0
+#define R300_TX_CHROMA_KEY_9 0x45a4
+#define R300_TX_CHROMA_KEY_10 0x45a8
+#define R300_TX_CHROMA_KEY_11 0x45ac
+#define R300_TX_CHROMA_KEY_12 0x45b0
+#define R300_TX_CHROMA_KEY_13 0x45b4
+#define R300_TX_CHROMA_KEY_14 0x45b8
+#define R300_TX_CHROMA_KEY_15 0x45bc
/* ff00ff00 == { 0, 1.0, 0, 1.0 } */
-#define R300_TX_BORDER_COLOR_0 0x45C0
+
+/* Border Color */
+#define R300_TX_BORDER_COLOR_0 0x45c0
+#define R300_TX_BORDER_COLOR_1 0x45c4
+#define R300_TX_BORDER_COLOR_2 0x45c8
+#define R300_TX_BORDER_COLOR_3 0x45cc
+#define R300_TX_BORDER_COLOR_4 0x45d0
+#define R300_TX_BORDER_COLOR_5 0x45d4
+#define R300_TX_BORDER_COLOR_6 0x45d8
+#define R300_TX_BORDER_COLOR_7 0x45dc
+#define R300_TX_BORDER_COLOR_8 0x45e0
+#define R300_TX_BORDER_COLOR_9 0x45e4
+#define R300_TX_BORDER_COLOR_10 0x45e8
+#define R300_TX_BORDER_COLOR_11 0x45ec
+#define R300_TX_BORDER_COLOR_12 0x45f0
+#define R300_TX_BORDER_COLOR_13 0x45f4
+#define R300_TX_BORDER_COLOR_14 0x45f8
+#define R300_TX_BORDER_COLOR_15 0x45fc
+
/* END: Texture specification */
@@ -960,23 +1597,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* offsets into the respective instruction streams, while *_END points to the
* last instruction relative to this offset.
*/
-#define R300_PFS_CNTL_0 0x4600
+#define R300_US_CONFIG 0x4600
# define R300_PFS_CNTL_LAST_NODES_SHIFT 0
# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0)
# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3)
-#define R300_PFS_CNTL_1 0x4604
+#define R300_US_PIXSIZE 0x4604
/* There is an unshifted value here which has so far always been equal to the
* index of the highest used temporary register.
*/
-#define R300_PFS_CNTL_2 0x4608
+#define R300_US_CODE_OFFSET 0x4608
# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0
# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0)
# define R300_PFS_CNTL_ALU_END_SHIFT 6
# define R300_PFS_CNTL_ALU_END_MASK (63 << 6)
-# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 12
-# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 12) /* GUESS */
+# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13
+# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13)
# define R300_PFS_CNTL_TEX_END_SHIFT 18
-# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* GUESS */
+# define R300_PFS_CNTL_TEX_END_MASK (31 << 18)
/* gap */
@@ -986,48 +1623,66 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* first node is stored in NODE_2, the second node is stored in NODE_3.
*
* Offsets are relative to the master offset from PFS_CNTL_2.
- * LAST_NODE is set for the last node, and only for the last node.
*/
-#define R300_PFS_NODE_0 0x4610
-#define R300_PFS_NODE_1 0x4614
-#define R300_PFS_NODE_2 0x4618
-#define R300_PFS_NODE_3 0x461C
-# define R300_PFS_NODE_ALU_OFFSET_SHIFT 0
-# define R300_PFS_NODE_ALU_OFFSET_MASK (63 << 0)
-# define R300_PFS_NODE_ALU_END_SHIFT 6
-# define R300_PFS_NODE_ALU_END_MASK (63 << 6)
-# define R300_PFS_NODE_TEX_OFFSET_SHIFT 12
-# define R300_PFS_NODE_TEX_OFFSET_MASK (31 << 12)
-# define R300_PFS_NODE_TEX_END_SHIFT 17
-# define R300_PFS_NODE_TEX_END_MASK (31 << 17)
-/*# define R300_PFS_NODE_LAST_NODE (1 << 22) */
-# define R300_PFS_NODE_OUTPUT_COLOR (1 << 22)
-# define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23)
+#define R300_US_CODE_ADDR_0 0x4610
+#define R300_US_CODE_ADDR_1 0x4614
+#define R300_US_CODE_ADDR_2 0x4618
+#define R300_US_CODE_ADDR_3 0x461C
+# define R300_ALU_START_SHIFT 0
+# define R300_ALU_START_MASK (63 << 0)
+# define R300_ALU_SIZE_SHIFT 6
+# define R300_ALU_SIZE_MASK (63 << 6)
+# define R300_TEX_START_SHIFT 12
+# define R300_TEX_START_MASK (31 << 12)
+# define R300_TEX_SIZE_SHIFT 17
+# define R300_TEX_SIZE_MASK (31 << 17)
+# define R300_RGBA_OUT (1 << 22)
+# define R300_W_OUT (1 << 23)
/* TEX
* As far as I can tell, texture instructions cannot write into output
* registers directly. A subsequent ALU instruction is always necessary,
* even if it's just MAD o0, r0, 1, 0
*/
-#define R300_PFS_TEXI_0 0x4620
-# define R300_FPITX_SRC_SHIFT 0
-# define R300_FPITX_SRC_MASK (31 << 0)
- /* GUESS */
-# define R300_FPITX_SRC_CONST (1 << 5)
-# define R300_FPITX_DST_SHIFT 6
-# define R300_FPITX_DST_MASK (31 << 6)
-# define R300_FPITX_IMAGE_SHIFT 11
- /* GUESS based on layout and native limits */
-# define R300_FPITX_IMAGE_MASK (15 << 11)
-/* Unsure if these are opcodes, or some kind of bitfield, but this is how
- * they were set when I checked
- */
-# define R300_FPITX_OPCODE_SHIFT 15
-# define R300_FPITX_OP_TEX 1
-# define R300_FPITX_OP_KIL 2
-# define R300_FPITX_OP_TXP 3
-# define R300_FPITX_OP_TXB 4
-# define R300_FPITX_OPCODE_MASK (7 << 15)
+#define R300_US_TEX_INST_0 0x4620
+# define R300_SRC_ADDR_SHIFT 0
+# define R300_SRC_ADDR_MASK (31 << 0)
+# define R300_DST_ADDR_SHIFT 6
+# define R300_DST_ADDR_MASK (31 << 6)
+# define R300_TEX_ID_SHIFT 11
+# define R300_TEX_ID_MASK (15 << 11)
+# define R300_TEX_INST_SHIFT 15
+# define R300_TEX_OP_NOP 0
+# define R300_TEX_OP_LD 1
+# define R300_TEX_OP_KIL 2
+# define R300_TEX_OP_TXP 3
+# define R300_TEX_OP_TXB 4
+# define R300_TEX_INST_MASK (7 << 15)
+
+/* Output format from the unfied shader */
+#define R300_US_OUT_FMT 0x46A4
+# define R300_US_OUT_FMT_C4_8 (0 << 0)
+# define R300_US_OUT_FMT_C4_10 (1 << 0)
+# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0)
+# define R300_US_OUT_FMT_C_16 (3 << 0)
+# define R300_US_OUT_FMT_C2_16 (4 << 0)
+# define R300_US_OUT_FMT_C4_16 (5 << 0)
+# define R300_US_OUT_FMT_C_16_MPEG (6 << 0)
+# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0)
+# define R300_US_OUT_FMT_C2_4 (8 << 0)
+# define R300_US_OUT_FMT_C_3_3_2 (9 << 0)
+# define R300_US_OUT_FMT_C_6_5_6 (10 << 0)
+# define R300_US_OUT_FMT_C_11_11_10 (11 << 0)
+# define R300_US_OUT_FMT_C_10_11_11 (12 << 0)
+# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0)
+/* reserved */
+# define R300_US_OUT_FMT_UNUSED (15 << 0)
+# define R300_US_OUT_FMT_C_16_FP (16 << 0)
+# define R300_US_OUT_FMT_C2_16_FP (17 << 0)
+# define R300_US_OUT_FMT_C4_16_FP (18 << 0)
+# define R300_US_OUT_FMT_C_32_FP (19 << 0)
+# define R300_US_OUT_FMT_C2_32_FP (20 << 0)
+# define R300_US_OUT_FMT_C4_32_FP (20 << 0)
/* ALU
* The ALU instructions register blocks are enumerated according to the order
@@ -1093,172 +1748,256 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* - Set FPI0/FPI2_SPECIAL_LRP
* Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD
*/
-#define R300_PFS_INSTR1_0 0x46C0
-# define R300_FPI1_SRC0C_SHIFT 0
-# define R300_FPI1_SRC0C_MASK (31 << 0)
-# define R300_FPI1_SRC0C_CONST (1 << 5)
-# define R300_FPI1_SRC1C_SHIFT 6
-# define R300_FPI1_SRC1C_MASK (31 << 6)
-# define R300_FPI1_SRC1C_CONST (1 << 11)
-# define R300_FPI1_SRC2C_SHIFT 12
-# define R300_FPI1_SRC2C_MASK (31 << 12)
-# define R300_FPI1_SRC2C_CONST (1 << 17)
-# define R300_FPI1_SRC_MASK 0x0003ffff
-# define R300_FPI1_DSTC_SHIFT 18
-# define R300_FPI1_DSTC_MASK (31 << 18)
-# define R300_FPI1_DSTC_REG_MASK_SHIFT 23
-# define R300_FPI1_DSTC_REG_X (1 << 23)
-# define R300_FPI1_DSTC_REG_Y (1 << 24)
-# define R300_FPI1_DSTC_REG_Z (1 << 25)
-# define R300_FPI1_DSTC_OUTPUT_MASK_SHIFT 26
-# define R300_FPI1_DSTC_OUTPUT_X (1 << 26)
-# define R300_FPI1_DSTC_OUTPUT_Y (1 << 27)
-# define R300_FPI1_DSTC_OUTPUT_Z (1 << 28)
-
-#define R300_PFS_INSTR3_0 0x47C0
-# define R300_FPI3_SRC0A_SHIFT 0
-# define R300_FPI3_SRC0A_MASK (31 << 0)
-# define R300_FPI3_SRC0A_CONST (1 << 5)
-# define R300_FPI3_SRC1A_SHIFT 6
-# define R300_FPI3_SRC1A_MASK (31 << 6)
-# define R300_FPI3_SRC1A_CONST (1 << 11)
-# define R300_FPI3_SRC2A_SHIFT 12
-# define R300_FPI3_SRC2A_MASK (31 << 12)
-# define R300_FPI3_SRC2A_CONST (1 << 17)
-# define R300_FPI3_SRC_MASK 0x0003ffff
-# define R300_FPI3_DSTA_SHIFT 18
-# define R300_FPI3_DSTA_MASK (31 << 18)
-# define R300_FPI3_DSTA_REG (1 << 23)
-# define R300_FPI3_DSTA_OUTPUT (1 << 24)
-# define R300_FPI3_DSTA_DEPTH (1 << 27)
-
-#define R300_PFS_INSTR0_0 0x48C0
-# define R300_FPI0_ARGC_SRC0C_XYZ 0
-# define R300_FPI0_ARGC_SRC0C_XXX 1
-# define R300_FPI0_ARGC_SRC0C_YYY 2
-# define R300_FPI0_ARGC_SRC0C_ZZZ 3
-# define R300_FPI0_ARGC_SRC1C_XYZ 4
-# define R300_FPI0_ARGC_SRC1C_XXX 5
-# define R300_FPI0_ARGC_SRC1C_YYY 6
-# define R300_FPI0_ARGC_SRC1C_ZZZ 7
-# define R300_FPI0_ARGC_SRC2C_XYZ 8
-# define R300_FPI0_ARGC_SRC2C_XXX 9
-# define R300_FPI0_ARGC_SRC2C_YYY 10
-# define R300_FPI0_ARGC_SRC2C_ZZZ 11
-# define R300_FPI0_ARGC_SRC0A 12
-# define R300_FPI0_ARGC_SRC1A 13
-# define R300_FPI0_ARGC_SRC2A 14
-# define R300_FPI0_ARGC_SRC1C_LRP 15
-# define R300_FPI0_ARGC_ZERO 20
-# define R300_FPI0_ARGC_ONE 21
- /* GUESS */
-# define R300_FPI0_ARGC_HALF 22
-# define R300_FPI0_ARGC_SRC0C_YZX 23
-# define R300_FPI0_ARGC_SRC1C_YZX 24
-# define R300_FPI0_ARGC_SRC2C_YZX 25
-# define R300_FPI0_ARGC_SRC0C_ZXY 26
-# define R300_FPI0_ARGC_SRC1C_ZXY 27
-# define R300_FPI0_ARGC_SRC2C_ZXY 28
-# define R300_FPI0_ARGC_SRC0CA_WZY 29
-# define R300_FPI0_ARGC_SRC1CA_WZY 30
-# define R300_FPI0_ARGC_SRC2CA_WZY 31
-
-# define R300_FPI0_ARG0C_SHIFT 0
-# define R300_FPI0_ARG0C_MASK (31 << 0)
-# define R300_FPI0_ARG0C_NEG (1 << 5)
-# define R300_FPI0_ARG0C_ABS (1 << 6)
-# define R300_FPI0_ARG1C_SHIFT 7
-# define R300_FPI0_ARG1C_MASK (31 << 7)
-# define R300_FPI0_ARG1C_NEG (1 << 12)
-# define R300_FPI0_ARG1C_ABS (1 << 13)
-# define R300_FPI0_ARG2C_SHIFT 14
-# define R300_FPI0_ARG2C_MASK (31 << 14)
-# define R300_FPI0_ARG2C_NEG (1 << 19)
-# define R300_FPI0_ARG2C_ABS (1 << 20)
-# define R300_FPI0_SPECIAL_LRP (1 << 21)
-# define R300_FPI0_OUTC_MAD (0 << 23)
-# define R300_FPI0_OUTC_DP3 (1 << 23)
-# define R300_FPI0_OUTC_DP4 (2 << 23)
-# define R300_FPI0_OUTC_MIN (4 << 23)
-# define R300_FPI0_OUTC_MAX (5 << 23)
-# define R300_FPI0_OUTC_CMPH (7 << 23)
-# define R300_FPI0_OUTC_CMP (8 << 23)
-# define R300_FPI0_OUTC_FRC (9 << 23)
-# define R300_FPI0_OUTC_REPL_ALPHA (10 << 23)
-# define R300_FPI0_OUTC_SAT (1 << 30)
-# define R300_FPI0_INSERT_NOP (1 << 31)
-
-#define R300_PFS_INSTR2_0 0x49C0
-# define R300_FPI2_ARGA_SRC0C_X 0
-# define R300_FPI2_ARGA_SRC0C_Y 1
-# define R300_FPI2_ARGA_SRC0C_Z 2
-# define R300_FPI2_ARGA_SRC1C_X 3
-# define R300_FPI2_ARGA_SRC1C_Y 4
-# define R300_FPI2_ARGA_SRC1C_Z 5
-# define R300_FPI2_ARGA_SRC2C_X 6
-# define R300_FPI2_ARGA_SRC2C_Y 7
-# define R300_FPI2_ARGA_SRC2C_Z 8
-# define R300_FPI2_ARGA_SRC0A 9
-# define R300_FPI2_ARGA_SRC1A 10
-# define R300_FPI2_ARGA_SRC2A 11
-# define R300_FPI2_ARGA_SRC1A_LRP 15
-# define R300_FPI2_ARGA_ZERO 16
-# define R300_FPI2_ARGA_ONE 17
- /* GUESS */
-# define R300_FPI2_ARGA_HALF 18
-# define R300_FPI2_ARG0A_SHIFT 0
-# define R300_FPI2_ARG0A_MASK (31 << 0)
-# define R300_FPI2_ARG0A_NEG (1 << 5)
- /* GUESS */
-# define R300_FPI2_ARG0A_ABS (1 << 6)
-# define R300_FPI2_ARG1A_SHIFT 7
-# define R300_FPI2_ARG1A_MASK (31 << 7)
-# define R300_FPI2_ARG1A_NEG (1 << 12)
- /* GUESS */
-# define R300_FPI2_ARG1A_ABS (1 << 13)
-# define R300_FPI2_ARG2A_SHIFT 14
-# define R300_FPI2_ARG2A_MASK (31 << 14)
-# define R300_FPI2_ARG2A_NEG (1 << 19)
- /* GUESS */
-# define R300_FPI2_ARG2A_ABS (1 << 20)
-# define R300_FPI2_SPECIAL_LRP (1 << 21)
-# define R300_FPI2_OUTA_MAD (0 << 23)
-# define R300_FPI2_OUTA_DP4 (1 << 23)
-# define R300_FPI2_OUTA_MIN (2 << 23)
-# define R300_FPI2_OUTA_MAX (3 << 23)
-# define R300_FPI2_OUTA_CMP (6 << 23)
-# define R300_FPI2_OUTA_FRC (7 << 23)
-# define R300_FPI2_OUTA_EX2 (8 << 23)
-# define R300_FPI2_OUTA_LG2 (9 << 23)
-# define R300_FPI2_OUTA_RCP (10 << 23)
-# define R300_FPI2_OUTA_RSQ (11 << 23)
-# define R300_FPI2_OUTA_SAT (1 << 30)
-# define R300_FPI2_UNKNOWN_31 (1 << 31)
+#define R300_US_ALU_RGB_ADDR_0 0x46C0
+# define R300_ALU_SRC0C_SHIFT 0
+# define R300_ALU_SRC0C_MASK (31 << 0)
+# define R300_ALU_SRC0C_CONST (1 << 5)
+# define R300_ALU_SRC1C_SHIFT 6
+# define R300_ALU_SRC1C_MASK (31 << 6)
+# define R300_ALU_SRC1C_CONST (1 << 11)
+# define R300_ALU_SRC2C_SHIFT 12
+# define R300_ALU_SRC2C_MASK (31 << 12)
+# define R300_ALU_SRC2C_CONST (1 << 17)
+# define R300_ALU_SRC_MASK 0x0003ffff
+# define R300_ALU_DSTC_SHIFT 18
+# define R300_ALU_DSTC_MASK (31 << 18)
+# define R300_ALU_DSTC_REG_MASK_SHIFT 23
+# define R300_ALU_DSTC_REG_X (1 << 23)
+# define R300_ALU_DSTC_REG_Y (1 << 24)
+# define R300_ALU_DSTC_REG_Z (1 << 25)
+# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26
+# define R300_ALU_DSTC_OUTPUT_X (1 << 26)
+# define R300_ALU_DSTC_OUTPUT_Y (1 << 27)
+# define R300_ALU_DSTC_OUTPUT_Z (1 << 28)
+
+#define R300_US_ALU_ALPHA_ADDR_0 0x47C0
+# define R300_ALU_SRC0A_SHIFT 0
+# define R300_ALU_SRC0A_MASK (31 << 0)
+# define R300_ALU_SRC0A_CONST (1 << 5)
+# define R300_ALU_SRC1A_SHIFT 6
+# define R300_ALU_SRC1A_MASK (31 << 6)
+# define R300_ALU_SRC1A_CONST (1 << 11)
+# define R300_ALU_SRC2A_SHIFT 12
+# define R300_ALU_SRC2A_MASK (31 << 12)
+# define R300_ALU_SRC2A_CONST (1 << 17)
+# define R300_ALU_SRC_MASK 0x0003ffff
+# define R300_ALU_DSTA_SHIFT 18
+# define R300_ALU_DSTA_MASK (31 << 18)
+# define R300_ALU_DSTA_REG (1 << 23)
+# define R300_ALU_DSTA_OUTPUT (1 << 24)
+# define R300_ALU_DSTA_DEPTH (1 << 27)
+
+#define R300_US_ALU_RGB_INST_0 0x48C0
+# define R300_ALU_ARGC_SRC0C_XYZ 0
+# define R300_ALU_ARGC_SRC0C_XXX 1
+# define R300_ALU_ARGC_SRC0C_YYY 2
+# define R300_ALU_ARGC_SRC0C_ZZZ 3
+# define R300_ALU_ARGC_SRC1C_XYZ 4
+# define R300_ALU_ARGC_SRC1C_XXX 5
+# define R300_ALU_ARGC_SRC1C_YYY 6
+# define R300_ALU_ARGC_SRC1C_ZZZ 7
+# define R300_ALU_ARGC_SRC2C_XYZ 8
+# define R300_ALU_ARGC_SRC2C_XXX 9
+# define R300_ALU_ARGC_SRC2C_YYY 10
+# define R300_ALU_ARGC_SRC2C_ZZZ 11
+# define R300_ALU_ARGC_SRC0A 12
+# define R300_ALU_ARGC_SRC1A 13
+# define R300_ALU_ARGC_SRC2A 14
+# define R300_ALU_ARGC_SRCP_XYZ 15
+# define R300_ALU_ARGC_SRCP_XXX 16
+# define R300_ALU_ARGC_SRCP_YYY 17
+# define R300_ALU_ARGC_SRCP_ZZZ 18
+# define R300_ALU_ARGC_SRCP_WWW 19
+# define R300_ALU_ARGC_ZERO 20
+# define R300_ALU_ARGC_ONE 21
+# define R300_ALU_ARGC_HALF 22
+# define R300_ALU_ARGC_SRC0C_YZX 23
+# define R300_ALU_ARGC_SRC1C_YZX 24
+# define R300_ALU_ARGC_SRC2C_YZX 25
+# define R300_ALU_ARGC_SRC0C_ZXY 26
+# define R300_ALU_ARGC_SRC1C_ZXY 27
+# define R300_ALU_ARGC_SRC2C_ZXY 28
+# define R300_ALU_ARGC_SRC0CA_WZY 29
+# define R300_ALU_ARGC_SRC1CA_WZY 30
+# define R300_ALU_ARGC_SRC2CA_WZY 31
+
+# define R300_ALU_ARG0C_SHIFT 0
+# define R300_ALU_ARG0C_MASK (31 << 0)
+# define R300_ALU_ARG0C_NOP (0 << 5)
+# define R300_ALU_ARG0C_NEG (1 << 5)
+# define R300_ALU_ARG0C_ABS (2 << 5)
+# define R300_ALU_ARG0C_NAB (3 << 5)
+# define R300_ALU_ARG1C_SHIFT 7
+# define R300_ALU_ARG1C_MASK (31 << 7)
+# define R300_ALU_ARG1C_NOP (0 << 12)
+# define R300_ALU_ARG1C_NEG (1 << 12)
+# define R300_ALU_ARG1C_ABS (2 << 12)
+# define R300_ALU_ARG1C_NAB (3 << 12)
+# define R300_ALU_ARG2C_SHIFT 14
+# define R300_ALU_ARG2C_MASK (31 << 14)
+# define R300_ALU_ARG2C_NOP (0 << 19)
+# define R300_ALU_ARG2C_NEG (1 << 19)
+# define R300_ALU_ARG2C_ABS (2 << 19)
+# define R300_ALU_ARG2C_NAB (3 << 19)
+# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21)
+# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21)
+# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21)
+# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21)
+
+# define R300_ALU_OUTC_MAD (0 << 23)
+# define R300_ALU_OUTC_DP3 (1 << 23)
+# define R300_ALU_OUTC_DP4 (2 << 23)
+# define R300_ALU_OUTC_D2A (3 << 23)
+# define R300_ALU_OUTC_MIN (4 << 23)
+# define R300_ALU_OUTC_MAX (5 << 23)
+# define R300_ALU_OUTC_CMPH (7 << 23)
+# define R300_ALU_OUTC_CMP (8 << 23)
+# define R300_ALU_OUTC_FRC (9 << 23)
+# define R300_ALU_OUTC_REPL_ALPHA (10 << 23)
+
+# define R300_ALU_OUTC_MOD_NOP (0 << 27)
+# define R300_ALU_OUTC_MOD_MUL2 (1 << 27)
+# define R300_ALU_OUTC_MOD_MUL4 (2 << 27)
+# define R300_ALU_OUTC_MOD_MUL8 (3 << 27)
+# define R300_ALU_OUTC_MOD_DIV2 (4 << 27)
+# define R300_ALU_OUTC_MOD_DIV4 (5 << 27)
+# define R300_ALU_OUTC_MOD_DIV8 (6 << 27)
+
+# define R300_ALU_OUTC_CLAMP (1 << 30)
+# define R300_ALU_INSERT_NOP (1 << 31)
+
+#define R300_US_ALU_ALPHA_INST_0 0x49C0
+# define R300_ALU_ARGA_SRC0C_X 0
+# define R300_ALU_ARGA_SRC0C_Y 1
+# define R300_ALU_ARGA_SRC0C_Z 2
+# define R300_ALU_ARGA_SRC1C_X 3
+# define R300_ALU_ARGA_SRC1C_Y 4
+# define R300_ALU_ARGA_SRC1C_Z 5
+# define R300_ALU_ARGA_SRC2C_X 6
+# define R300_ALU_ARGA_SRC2C_Y 7
+# define R300_ALU_ARGA_SRC2C_Z 8
+# define R300_ALU_ARGA_SRC0A 9
+# define R300_ALU_ARGA_SRC1A 10
+# define R300_ALU_ARGA_SRC2A 11
+# define R300_ALU_ARGA_SRCP_X 12
+# define R300_ALU_ARGA_SRCP_Y 13
+# define R300_ALU_ARGA_SRCP_Z 14
+# define R300_ALU_ARGA_SRCP_W 15
+
+# define R300_ALU_ARGA_ZERO 16
+# define R300_ALU_ARGA_ONE 17
+# define R300_ALU_ARGA_HALF 18
+# define R300_ALU_ARG0A_SHIFT 0
+# define R300_ALU_ARG0A_MASK (31 << 0)
+# define R300_ALU_ARG0A_NOP (0 << 5)
+# define R300_ALU_ARG0A_NEG (1 << 5)
+# define R300_ALU_ARG0A_ABS (2 << 5)
+# define R300_ALU_ARG0A_NAB (3 << 5)
+# define R300_ALU_ARG1A_SHIFT 7
+# define R300_ALU_ARG1A_MASK (31 << 7)
+# define R300_ALU_ARG1A_NOP (0 << 12)
+# define R300_ALU_ARG1A_NEG (1 << 12)
+# define R300_ALU_ARG1A_ABS (2 << 12)
+# define R300_ALU_ARG1A_NAB (3 << 12)
+# define R300_ALU_ARG2A_SHIFT 14
+# define R300_ALU_ARG2A_MASK (31 << 14)
+# define R300_ALU_ARG2A_NOP (0 << 19)
+# define R300_ALU_ARG2A_NEG (1 << 19)
+# define R300_ALU_ARG2A_ABS (2 << 19)
+# define R300_ALU_ARG2A_NAB (3 << 19)
+# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21)
+# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21)
+# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21)
+# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21)
+
+# define R300_ALU_OUTA_MAD (0 << 23)
+# define R300_ALU_OUTA_DP4 (1 << 23)
+# define R300_ALU_OUTA_MIN (2 << 23)
+# define R300_ALU_OUTA_MAX (3 << 23)
+# define R300_ALU_OUTA_CND (5 << 23)
+# define R300_ALU_OUTA_CMP (6 << 23)
+# define R300_ALU_OUTA_FRC (7 << 23)
+# define R300_ALU_OUTA_EX2 (8 << 23)
+# define R300_ALU_OUTA_LG2 (9 << 23)
+# define R300_ALU_OUTA_RCP (10 << 23)
+# define R300_ALU_OUTA_RSQ (11 << 23)
+
+# define R300_ALU_OUTA_MOD_NOP (0 << 27)
+# define R300_ALU_OUTA_MOD_MUL2 (1 << 27)
+# define R300_ALU_OUTA_MOD_MUL4 (2 << 27)
+# define R300_ALU_OUTA_MOD_MUL8 (3 << 27)
+# define R300_ALU_OUTA_MOD_DIV2 (4 << 27)
+# define R300_ALU_OUTA_MOD_DIV4 (5 << 27)
+# define R300_ALU_OUTA_MOD_DIV8 (6 << 27)
+
+# define R300_ALU_OUTA_CLAMP (1 << 30)
/* END: Fragment program instruction set */
-/* Fog state and color */
-#define R300_RE_FOG_STATE 0x4BC0
-# define R300_FOG_ENABLE (1 << 0)
-# define R300_FOG_MODE_LINEAR (0 << 1)
-# define R300_FOG_MODE_EXP (1 << 1)
-# define R300_FOG_MODE_EXP2 (2 << 1)
-# define R300_FOG_MODE_MASK (3 << 1)
-#define R300_FOG_COLOR_R 0x4BC8
-#define R300_FOG_COLOR_G 0x4BCC
-#define R300_FOG_COLOR_B 0x4BD0
-
-#define R300_PP_ALPHA_TEST 0x4BD4
-# define R300_REF_ALPHA_MASK 0x000000ff
-# define R300_ALPHA_TEST_FAIL (0 << 8)
-# define R300_ALPHA_TEST_LESS (1 << 8)
-# define R300_ALPHA_TEST_LEQUAL (3 << 8)
-# define R300_ALPHA_TEST_EQUAL (2 << 8)
-# define R300_ALPHA_TEST_GEQUAL (6 << 8)
-# define R300_ALPHA_TEST_GREATER (4 << 8)
-# define R300_ALPHA_TEST_NEQUAL (5 << 8)
-# define R300_ALPHA_TEST_PASS (7 << 8)
-# define R300_ALPHA_TEST_OP_MASK (7 << 8)
-# define R300_ALPHA_TEST_ENABLE (1 << 11)
+/* Fog: Fog Blending Enable */
+#define R300_FG_FOG_BLEND 0x4bc0
+# define R300_FG_FOG_BLEND_DISABLE (0 << 0)
+# define R300_FG_FOG_BLEND_ENABLE (1 << 0)
+# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1)
+# define R300_FG_FOG_BLEND_FN_EXP (1 << 1)
+# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1)
+# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1)
+# define R300_FG_FOG_BLEND_FN_MASK (3 << 1)
+
+/* Fog: Red Component of Fog Color */
+#define R300_FG_FOG_COLOR_R 0x4bc8
+/* Fog: Green Component of Fog Color */
+#define R300_FG_FOG_COLOR_G 0x4bcc
+/* Fog: Blue Component of Fog Color */
+#define R300_FG_FOG_COLOR_B 0x4bd0
+# define R300_FG_FOG_COLOR_MASK 0x000003ff
+
+/* Fog: Constant Factor for Fog Blending */
+#define R300_FG_FOG_FACTOR 0x4bc4
+# define FG_FOG_FACTOR_MASK 0x000003ff
+
+/* Fog: Alpha function */
+#define R300_FG_ALPHA_FUNC 0x4bd4
+# define R300_FG_ALPHA_FUNC_VAL_MASK 0x000000ff
+# define R300_FG_ALPHA_FUNC_NEVER (0 << 8)
+# define R300_FG_ALPHA_FUNC_LESS (1 << 8)
+# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8)
+# define R300_FG_ALPHA_FUNC_LE (3 << 8)
+# define R300_FG_ALPHA_FUNC_GREATER (4 << 8)
+# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8)
+# define R300_FG_ALPHA_FUNC_GE (6 << 8)
+# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8)
+# define R300_ALPHA_TEST_OP_MASK (7 << 8)
+# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11)
+# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11)
+
+# define R500_FG_ALPHA_FUNC_10BIT (0 << 12)
+# define R500_FG_ALPHA_FUNC_8BIT (1 << 12)
+
+# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16)
+# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16)
+# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17)
+# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17)
+
+# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20)
+# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20)
+
+# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24)
+# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */
+# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25)
+# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25)
+
+# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28)
+# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28)
+
+
+/* Fog: Where does the depth come from? */
+#define R300_FG_DEPTH_SRC 0x4bd8
+# define R300_FG_DEPTH_SRC_SCAN (0 << 0)
+# define R300_FG_DEPTH_SRC_SHADER (1 << 0)
+
+/* Fog: Alpha Compare Value */
+#define R500_FG_ALPHA_VALUE 0x4be0
+# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff
/* gap */
@@ -1267,12 +2006,33 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_PFS_PARAM_0_Y 0x4C04
#define R300_PFS_PARAM_0_Z 0x4C08
#define R300_PFS_PARAM_0_W 0x4C0C
-/* GUESS: PARAM_31 is last, based on native limits reported by fglrx */
+/* last consts */
#define R300_PFS_PARAM_31_X 0x4DF0
#define R300_PFS_PARAM_31_Y 0x4DF4
#define R300_PFS_PARAM_31_Z 0x4DF8
#define R300_PFS_PARAM_31_W 0x4DFC
+/* Unpipelined. */
+#define R300_RB3D_CCTL 0x4e00
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5)
+# define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7)
+# define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7)
+# define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9)
+# define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9)
+# define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10)
+# define R300_RB3D_CCTL_CMASK_ENABLE (1 << 10)
+/* reserved */
+# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE (0 << 12)
+# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE (1 << 12)
+# define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE (0 << 13)
+# define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE (1 << 13)
+# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE (0 << 14)
+# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE (1 << 14)
+
+
/* Notes:
* - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in
* the application
@@ -1284,9 +2044,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_RB3D_CBLEND 0x4E04
#define R300_RB3D_ABLEND 0x4E08
/* the following only appear in CBLEND */
-# define R300_BLEND_ENABLE (1 << 0)
-# define R300_BLEND_UNKNOWN (3 << 1)
-# define R300_BLEND_NO_SEPARATE (1 << 3)
+# define R300_ALPHA_BLEND_ENABLE (1 << 0)
+# define R300_SEPARATE_ALPHA_ENABLE (1 << 1)
+# define R300_READ_ENABLE (1 << 2)
+# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3)
+
/* the following are shared between CBLEND and ABLEND */
# define R300_FCN_MASK (3 << 12)
# define R300_COMB_FCN_ADD_CLAMP (0 << 12)
@@ -1315,67 +2083,213 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_BLEND_MASK (63)
# define R300_SRC_BLEND_SHIFT (16)
# define R300_DST_BLEND_SHIFT (24)
+
+/* Constant color used by the blender. Pipelined through the blender.
+ * Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE,
+ * RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead.
+ */
#define R300_RB3D_BLEND_COLOR 0x4E10
-#define R300_RB3D_COLORMASK 0x4E0C
-# define R300_COLORMASK0_B (1<<0)
-# define R300_COLORMASK0_G (1<<1)
-# define R300_COLORMASK0_R (1<<2)
-# define R300_COLORMASK0_A (1<<3)
-/* gap */
-#define R300_RB3D_COLOROFFSET0 0x4E28
-# define R300_COLOROFFSET_MASK 0xFFFFFFF0 /* GUESS */
-#define R300_RB3D_COLOROFFSET1 0x4E2C /* GUESS */
-#define R300_RB3D_COLOROFFSET2 0x4E30 /* GUESS */
-#define R300_RB3D_COLOROFFSET3 0x4E34 /* GUESS */
+/* 3D Color Channel Mask. If all the channels used in the current color format
+ * are disabled, then the cb will discard all the incoming quads. Pipelined
+ * through the blender.
+ */
+#define RB3D_COLOR_CHANNEL_MASK 0x4E0C
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 (1 << 0)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK0 (1 << 2)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1 (1 << 4)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK1 (1 << 6)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2 (1 << 8)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK2 (1 << 10)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3 (1 << 12)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK3 (1 << 14)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15)
+
+/* Clear color that is used when the color mask is set to 00. Unpipelined.
+ * Program this register with a 32-bit value in ARGB8888 or ARGB2101010
+ * formats, ignoring the fields.
+ */
+#define RB3D_COLOR_CLEAR_VALUE 0x4e14
/* gap */
-/* Bit 16: Larger tiles
+/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */
+#define RB3D_CLRCMP_CLR 0x4e20
+
+/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */
+#define RB3D_CLRCMP_MSK 0x4e24
+
+/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */
+#define R300_RB3D_COLOROFFSET0 0x4E28
+# define R300_COLOROFFSET_MASK 0xFFFFFFE0
+/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */
+#define R300_RB3D_COLOROFFSET1 0x4E2C
+/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */
+#define R300_RB3D_COLOROFFSET2 0x4E30
+/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */
+#define R300_RB3D_COLOROFFSET3 0x4E34
+
+/* Color buffer format and tiling control for all the multibuffers and the
+ * pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any
+ * of the registers are changed.
+ *
+ * Bit 16: Larger tiles
* Bit 17: 4x2 tiles
* Bit 18: Extremely weird tile like, but some pixels duplicated?
*/
#define R300_RB3D_COLORPITCH0 0x4E38
-# define R300_COLORPITCH_MASK 0x00001FF8 /* GUESS */
-# define R300_COLOR_TILE_ENABLE (1 << 16) /* GUESS */
-# define R300_COLOR_MICROTILE_ENABLE (1 << 17) /* GUESS */
-# define R300_COLOR_ENDIAN_NO_SWAP (0 << 18) /* GUESS */
-# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 18) /* GUESS */
-# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 18) /* GUESS */
-# define R300_COLOR_FORMAT_RGB565 (2 << 22)
-# define R300_COLOR_FORMAT_ARGB8888 (3 << 22)
-#define R300_RB3D_COLORPITCH1 0x4E3C /* GUESS */
-#define R300_RB3D_COLORPITCH2 0x4E40 /* GUESS */
-#define R300_RB3D_COLORPITCH3 0x4E44 /* GUESS */
+# define R300_COLORPITCH_MASK 0x00003FFE
+# define R300_COLOR_TILE_DISABLE (0 << 16)
+# define R300_COLOR_TILE_ENABLE (1 << 16)
+# define R300_COLOR_MICROTILE_DISABLE (0 << 17)
+# define R300_COLOR_MICROTILE_ENABLE (1 << 17)
+# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */
+# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19)
+# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19)
+# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19)
+# define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19)
+# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21)
+# define R500_COLOR_FORMAT_UV1010 (1 << 21)
+# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */
+# define R300_COLOR_FORMAT_ARGB1555 (3 << 21)
+# define R300_COLOR_FORMAT_RGB565 (4 << 21)
+# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21)
+# define R300_COLOR_FORMAT_ARGB8888 (6 << 21)
+# define R300_COLOR_FORMAT_ARGB32323232 (7 << 21)
+/* reserved */
+# define R300_COLOR_FORMAT_I8 (9 << 21)
+# define R300_COLOR_FORMAT_ARGB16161616 (10 << 21)
+# define R300_COLOR_FORMAT_VYUY (11 << 21)
+# define R300_COLOR_FORMAT_YVYU (12 << 21)
+# define R300_COLOR_FORMAT_UV88 (13 << 21)
+# define R500_COLOR_FORMAT_I10 (14 << 21)
+# define R300_COLOR_FORMAT_ARGB4444 (15 << 21)
+#define R300_RB3D_COLORPITCH1 0x4E3C
+#define R300_RB3D_COLORPITCH2 0x4E40
+#define R300_RB3D_COLORPITCH3 0x4E44
/* gap */
-/* Guess by Vladimir.
+/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then
+ * a flush or free will not occur upon a write to this register, but a sync
+ * will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE
+ * are zero but DC_FINISH is one, then a sync will be sent immediately -- the
+ * cb will not wait for all the previous operations to complete before sending
+ * the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to
+ * zero.
+ *
* Set to 0A before 3D operations, set to 02 afterwards.
*/
-#define R300_RB3D_DSTCACHE_CTLSTAT 0x4E4C
-# define R300_RB3D_DSTCACHE_UNKNOWN_02 0x00000002
-# define R300_RB3D_DSTCACHE_UNKNOWN_0A 0x0000000A
+#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4)
+
+#define R300_RB3D_DITHER_CTL 0x4E50
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0)
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND (1 << 0)
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT (2 << 0)
+/* reserved */
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE (0 << 2)
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND (1 << 2)
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2)
+/* reserved */
+
+/* Resolve buffer destination address. The cache must be empty before changing
+ * this register if the cb is in resolve mode. Unpipelined
+ */
+#define R300_RB3D_AARESOLVE_OFFSET 0x4e80
+# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5
+# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */
+
+/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before
+ * changing this register if the cb is in resolve mode. Unpipelined
+ */
+#define R300_RB3D_AARESOLVE_PITCH 0x4e84
+# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1
+# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */
+
+/* Resolve Buffer Control. Unpipelined */
+#define R300_RB3D_AARESOLVE_CTL 0x4e88
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2)
+
+
+/* Discard src pixels less than or equal to threshold. */
+#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0
+/* Discard src pixels greater than or equal to threshold. */
+#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000
+
+/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */
+#define R300_RB3D_ROPCNTL 0x4e18
+# define R300_RB3D_ROPCNTL_ROP_ENABLE 0x00000004
+# define R300_RB3D_ROPCNTL_ROP_MASK (15 << 8)
+# define R300_RB3D_ROPCNTL_ROP_SHIFT 8
+
+/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */
+#define R300_RB3D_CLRCMP_FLIPE 0x4e1c
+
+/* Sets the fifo sizes */
+#define R500_RB3D_FIFO_SIZE 0x4ef4
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0)
+
+/* Constant color used by the blender. Pipelined through the blender. */
+#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8
+# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff
+# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0
+# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000
+# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16
+
+/* Constant color used by the blender. Pipelined through the blender. */
+#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc
+# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff
+# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0
+# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000
+# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16
/* gap */
/* There seems to be no "write only" setting, so use Z-test = ALWAYS
* for this.
* Bit (1<<8) is the "test" bit. so plain write is 6 - vd
*/
-#define R300_RB3D_ZSTENCIL_CNTL_0 0x4F00
-# define R300_RB3D_Z_DISABLED_1 0x00000010
-# define R300_RB3D_Z_DISABLED_2 0x00000014
-# define R300_RB3D_Z_TEST 0x00000012
-# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016
-# define R300_RB3D_Z_WRITE_ONLY 0x00000006
-
-# define R300_RB3D_Z_TEST 0x00000012
-# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016
-# define R300_RB3D_Z_WRITE_ONLY 0x00000006
-# define R300_RB3D_STENCIL_ENABLE 0x00000001
-
-#define R300_RB3D_ZSTENCIL_CNTL_1 0x4F04
+#define R300_ZB_CNTL 0x4F00
+# define R300_STENCIL_ENABLE (1 << 0)
+# define R300_Z_ENABLE (1 << 1)
+# define R300_Z_WRITE_ENABLE (1 << 2)
+# define R300_Z_SIGNED_COMPARE (1 << 3)
+# define R300_STENCIL_FRONT_BACK (1 << 4)
+
+#define R300_ZB_ZSTENCILCNTL 0x4f04
/* functions */
# define R300_ZS_NEVER 0
# define R300_ZS_LESS 1
@@ -1395,162 +2309,344 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_ZS_INVERT 5
# define R300_ZS_INCR_WRAP 6
# define R300_ZS_DECR_WRAP 7
+# define R300_Z_FUNC_SHIFT 0
/* front and back refer to operations done for front
and back faces, i.e. separate stencil function support */
-# define R300_RB3D_ZS1_DEPTH_FUNC_SHIFT 0
-# define R300_RB3D_ZS1_FRONT_FUNC_SHIFT 3
-# define R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT 6
-# define R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT 9
-# define R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT 12
-# define R300_RB3D_ZS1_BACK_FUNC_SHIFT 15
-# define R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT 18
-# define R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT 21
-# define R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT 24
-
-#define R300_RB3D_ZSTENCIL_CNTL_2 0x4F08
-# define R300_RB3D_ZS2_STENCIL_REF_SHIFT 0
-# define R300_RB3D_ZS2_STENCIL_MASK 0xFF
-# define R300_RB3D_ZS2_STENCIL_MASK_SHIFT 8
-# define R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT 16
+# define R300_S_FRONT_FUNC_SHIFT 3
+# define R300_S_FRONT_SFAIL_OP_SHIFT 6
+# define R300_S_FRONT_ZPASS_OP_SHIFT 9
+# define R300_S_FRONT_ZFAIL_OP_SHIFT 12
+# define R300_S_BACK_FUNC_SHIFT 15
+# define R300_S_BACK_SFAIL_OP_SHIFT 18
+# define R300_S_BACK_ZPASS_OP_SHIFT 21
+# define R300_S_BACK_ZFAIL_OP_SHIFT 24
+
+#define R300_ZB_STENCILREFMASK 0x4f08
+# define R300_STENCILREF_SHIFT 0
+# define R300_STENCILREF_MASK 0x000000ff
+# define R300_STENCILMASK_SHIFT 8
+# define R300_STENCILMASK_MASK 0x0000ff00
+# define R300_STENCILWRITEMASK_SHIFT 16
+# define R300_STENCILWRITEMASK_MASK 0x00ff0000
/* gap */
-#define R300_RB3D_ZSTENCIL_FORMAT 0x4F10
-# define R300_DEPTH_FORMAT_16BIT_INT_Z (0 << 0)
-# define R300_DEPTH_FORMAT_24BIT_INT_Z (2 << 0)
- /* 16 bit format or some aditional bit ? */
-# define R300_DEPTH_FORMAT_UNK32 (32 << 0)
+#define R300_ZB_FORMAT 0x4f10
+# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0)
+# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0)
+# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0)
+/* reserved up to (15 << 0) */
+# define R300_INVERT_13E3_LEADING_ONES (0 << 4)
+# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4)
-#define R300_RB3D_EARLY_Z 0x4F14
-# define R300_EARLY_Z_DISABLE (0 << 0)
-# define R300_EARLY_Z_ENABLE (1 << 0)
+#define R300_ZB_ZTOP 0x4F14
+# define R300_ZTOP_DISABLE (0 << 0)
+# define R300_ZTOP_ENABLE (1 << 0)
/* gap */
-#define R300_RB3D_ZCACHE_CTLSTAT 0x4F18 /* GUESS */
-# define R300_RB3D_ZCACHE_UNKNOWN_01 0x1
-# define R300_RB3D_ZCACHE_UNKNOWN_03 0x3
+#define R300_ZB_ZCACHE_CTLSTAT 0x4f18
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31)
+
+#define R300_ZB_BW_CNTL 0x4f1c
+# define R300_HIZ_DISABLE (0 << 0)
+# define R300_HIZ_ENABLE (1 << 0)
+# define R300_HIZ_MIN (0 << 1)
+# define R300_HIZ_MAX (1 << 1)
+# define R300_FAST_FILL_DISABLE (0 << 2)
+# define R300_FAST_FILL_ENABLE (1 << 2)
+# define R300_RD_COMP_DISABLE (0 << 3)
+# define R300_RD_COMP_ENABLE (1 << 3)
+# define R300_WR_COMP_DISABLE (0 << 4)
+# define R300_WR_COMP_ENABLE (1 << 4)
+# define R300_ZB_CB_CLEAR_RMW (0 << 5)
+# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5)
+# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6)
+# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6)
+
+# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7)
+# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7)
+# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8)
+# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8)
+
+# define R500_BMASK_ENABLE (0 << 10)
+# define R500_BMASK_DISABLE (1 << 10)
+# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11)
+# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11)
+# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12)
+# define R500_HIZ_FP_EXP_BITS_1 (1 << 12)
+# define R500_HIZ_FP_EXP_BITS_2 (2 << 12)
+# define R500_HIZ_FP_EXP_BITS_3 (3 << 12)
+# define R500_HIZ_FP_EXP_BITS_4 (4 << 12)
+# define R500_HIZ_FP_EXP_BITS_5 (5 << 12)
+# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15)
+# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15)
+# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16)
+# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16)
+# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17)
+# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17)
+# define R500_PEQ_PACKING_DISABLE (0 << 18)
+# define R500_PEQ_PACKING_ENABLE (1 << 18)
+# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18)
+# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18)
+
/* gap */
-#define R300_RB3D_DEPTHOFFSET 0x4F20
-#define R300_RB3D_DEPTHPITCH 0x4F24
-# define R300_DEPTHPITCH_MASK 0x00001FF8 /* GUESS */
-# define R300_DEPTH_TILE_ENABLE (1 << 16) /* GUESS */
-# define R300_DEPTH_MICROTILE_ENABLE (1 << 17) /* GUESS */
-# define R300_DEPTH_ENDIAN_NO_SWAP (0 << 18) /* GUESS */
-# define R300_DEPTH_ENDIAN_WORD_SWAP (1 << 18) /* GUESS */
-# define R300_DEPTH_ENDIAN_DWORD_SWAP (2 << 18) /* GUESS */
-
-/* BEGIN: Vertex program instruction set */
-
-/* Every instruction is four dwords long:
- * DWORD 0: output and opcode
- * DWORD 1: first argument
- * DWORD 2: second argument
- * DWORD 3: third argument
- *
- * Notes:
- * - ABS r, a is implemented as MAX r, a, -a
- * - MOV is implemented as ADD to zero
- * - XPD is implemented as MUL + MAD
- * - FLR is implemented as FRC + ADD
- * - apparently, fglrx tries to schedule instructions so that there is at
- * least one instruction between the write to a temporary and the first
- * read from said temporary; however, violations of this scheduling are
- * allowed
- * - register indices seem to be unrelated with OpenGL aliasing to
- * conventional state
- * - only one attribute and one parameter can be loaded at a time; however,
- * the same attribute/parameter can be used for more than one argument
- * - the second software argument for POW is the third hardware argument
- * (no idea why)
- * - MAD with only temporaries as input seems to use VPI_OUT_SELECT_MAD_2
- *
- * There is some magic surrounding LIT:
- * The single argument is replicated across all three inputs, but swizzled:
- * First argument: xyzy
- * Second argument: xyzx
- * Third argument: xyzw
- * Whenever the result is used later in the fragment program, fglrx forces
- * x and w to be 1.0 in the input selection; I don't know whether this is
- * strictly necessary
+/* Z Buffer Address Offset.
+ * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles.
*/
-#define R300_VPI_OUT_OP_DOT (1 << 0)
-#define R300_VPI_OUT_OP_MUL (2 << 0)
-#define R300_VPI_OUT_OP_ADD (3 << 0)
-#define R300_VPI_OUT_OP_MAD (4 << 0)
-#define R300_VPI_OUT_OP_DST (5 << 0)
-#define R300_VPI_OUT_OP_FRC (6 << 0)
-#define R300_VPI_OUT_OP_MAX (7 << 0)
-#define R300_VPI_OUT_OP_MIN (8 << 0)
-#define R300_VPI_OUT_OP_SGE (9 << 0)
-#define R300_VPI_OUT_OP_SLT (10 << 0)
- /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, vector(scalar, vector) */
-#define R300_VPI_OUT_OP_UNK12 (12 << 0)
-#define R300_VPI_OUT_OP_ARL (13 << 0)
-#define R300_VPI_OUT_OP_EXP (65 << 0)
-#define R300_VPI_OUT_OP_LOG (66 << 0)
- /* Used in fog computations, scalar(scalar) */
-#define R300_VPI_OUT_OP_UNK67 (67 << 0)
-#define R300_VPI_OUT_OP_LIT (68 << 0)
-#define R300_VPI_OUT_OP_POW (69 << 0)
-#define R300_VPI_OUT_OP_RCP (70 << 0)
-#define R300_VPI_OUT_OP_RSQ (72 << 0)
- /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, scalar(scalar) */
-#define R300_VPI_OUT_OP_UNK73 (73 << 0)
-#define R300_VPI_OUT_OP_EX2 (75 << 0)
-#define R300_VPI_OUT_OP_LG2 (76 << 0)
-#define R300_VPI_OUT_OP_MAD_2 (128 << 0)
- /* all temps, vector(scalar, vector, vector) */
-#define R300_VPI_OUT_OP_UNK129 (129 << 0)
-
-#define R300_VPI_OUT_REG_CLASS_TEMPORARY (0 << 8)
-#define R300_VPI_OUT_REG_CLASS_ADDR (1 << 8)
-#define R300_VPI_OUT_REG_CLASS_RESULT (2 << 8)
-#define R300_VPI_OUT_REG_CLASS_MASK (31 << 8)
-
-#define R300_VPI_OUT_REG_INDEX_SHIFT 13
- /* GUESS based on fglrx native limits */
-#define R300_VPI_OUT_REG_INDEX_MASK (31 << 13)
-
-#define R300_VPI_OUT_WRITE_X (1 << 20)
-#define R300_VPI_OUT_WRITE_Y (1 << 21)
-#define R300_VPI_OUT_WRITE_Z (1 << 22)
-#define R300_VPI_OUT_WRITE_W (1 << 23)
-
-#define R300_VPI_IN_REG_CLASS_TEMPORARY (0 << 0)
-#define R300_VPI_IN_REG_CLASS_ATTRIBUTE (1 << 0)
-#define R300_VPI_IN_REG_CLASS_PARAMETER (2 << 0)
-#define R300_VPI_IN_REG_CLASS_NONE (9 << 0)
-#define R300_VPI_IN_REG_CLASS_MASK (31 << 0)
-
-#define R300_VPI_IN_REG_INDEX_SHIFT 5
- /* GUESS based on fglrx native limits */
-#define R300_VPI_IN_REG_INDEX_MASK (255 << 5)
-
-/* The R300 can select components from the input register arbitrarily.
- * Use the following constants, shifted by the component shift you
- * want to select
+#define R300_ZB_DEPTHOFFSET 0x4f20
+
+/* Z Buffer Pitch and Endian Control */
+#define R300_ZB_DEPTHPITCH 0x4f24
+# define R300_DEPTHPITCH_MASK 0x00003FFC
+# define R300_DEPTHMACROTILE_DISABLE (0 << 16)
+# define R300_DEPTHMACROTILE_ENABLE (1 << 16)
+# define R300_DEPTHMICROTILE_LINEAR (0 << 17)
+# define R300_DEPTHMICROTILE_TILED (1 << 17)
+# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17)
+# define R300_DEPTHENDIAN_NO_SWAP (0 << 18)
+# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18)
+# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18)
+# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18)
+
+/* Z Buffer Clear Value */
+#define R300_ZB_DEPTHCLEARVALUE 0x4f28
+
+/* Hierarchical Z Memory Offset */
+#define R300_ZB_HIZ_OFFSET 0x4f44
+
+/* Hierarchical Z Write Index */
+#define R300_ZB_HIZ_WRINDEX 0x4f48
+
+/* Hierarchical Z Data */
+#define R300_ZB_HIZ_DWORD 0x4f4c
+
+/* Hierarchical Z Read Index */
+#define R300_ZB_HIZ_RDINDEX 0x4f50
+
+/* Hierarchical Z Pitch */
+#define R300_ZB_HIZ_PITCH 0x4f54
+
+/* Z Buffer Z Pass Counter Data */
+#define R300_ZB_ZPASS_DATA 0x4f58
+
+/* Z Buffer Z Pass Counter Address */
+#define R300_ZB_ZPASS_ADDR 0x4f5c
+
+/* Depth buffer X and Y coordinate offset */
+#define R300_ZB_DEPTHXY_OFFSET 0x4f60
+# define R300_DEPTHX_OFFSET_SHIFT 1
+# define R300_DEPTHX_OFFSET_MASK 0x000007FE
+# define R300_DEPTHY_OFFSET_SHIFT 17
+# define R300_DEPTHY_OFFSET_MASK 0x07FE0000
+
+/* Sets the fifo sizes */
+#define R500_ZB_FIFO_SIZE 0x4fd0
+# define R500_OP_FIFO_SIZE_FULL (0 << 0)
+# define R500_OP_FIFO_SIZE_HALF (1 << 0)
+# define R500_OP_FIFO_SIZE_QUATER (2 << 0)
+# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0)
+
+/* Stencil Reference Value and Mask for backfacing quads */
+/* R300_ZB_STENCILREFMASK handles front face */
+#define R500_ZB_STENCILREFMASK_BF 0x4fd4
+# define R500_STENCILREF_SHIFT 0
+# define R500_STENCILREF_MASK 0x000000ff
+# define R500_STENCILMASK_SHIFT 8
+# define R500_STENCILMASK_MASK 0x0000ff00
+# define R500_STENCILWRITEMASK_SHIFT 16
+# define R500_STENCILWRITEMASK_MASK 0x00ff0000
+
+/**
+ * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION
+ *
+ * The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector
+ * Engine instruction or a Math Engine instruction.
*/
-#define R300_VPI_IN_SELECT_X 0
-#define R300_VPI_IN_SELECT_Y 1
-#define R300_VPI_IN_SELECT_Z 2
-#define R300_VPI_IN_SELECT_W 3
-#define R300_VPI_IN_SELECT_ZERO 4
-#define R300_VPI_IN_SELECT_ONE 5
-#define R300_VPI_IN_SELECT_MASK 7
-
-#define R300_VPI_IN_X_SHIFT 13
-#define R300_VPI_IN_Y_SHIFT 16
-#define R300_VPI_IN_Z_SHIFT 19
-#define R300_VPI_IN_W_SHIFT 22
-
-#define R300_VPI_IN_NEG_X (1 << 25)
-#define R300_VPI_IN_NEG_Y (1 << 26)
-#define R300_VPI_IN_NEG_Z (1 << 27)
-#define R300_VPI_IN_NEG_W (1 << 28)
-/* END: Vertex program instruction set */
+
+/*\{*/
+
+enum {
+ /* R3XX */
+ VECTOR_NO_OP = 0,
+ VE_DOT_PRODUCT = 1,
+ VE_MULTIPLY = 2,
+ VE_ADD = 3,
+ VE_MULTIPLY_ADD = 4,
+ VE_DISTANCE_VECTOR = 5,
+ VE_FRACTION = 6,
+ VE_MAXIMUM = 7,
+ VE_MINIMUM = 8,
+ VE_SET_GREATER_THAN_EQUAL = 9,
+ VE_SET_LESS_THAN = 10,
+ VE_MULTIPLYX2_ADD = 11,
+ VE_MULTIPLY_CLAMP = 12,
+ VE_FLT2FIX_DX = 13,
+ VE_FLT2FIX_DX_RND = 14,
+ /* R5XX */
+ VE_PRED_SET_EQ_PUSH = 15,
+ VE_PRED_SET_GT_PUSH = 16,
+ VE_PRED_SET_GTE_PUSH = 17,
+ VE_PRED_SET_NEQ_PUSH = 18,
+ VE_COND_WRITE_EQ = 19,
+ VE_COND_WRITE_GT = 20,
+ VE_COND_WRITE_GTE = 21,
+ VE_COND_WRITE_NEQ = 22,
+ VE_COND_MUX_EQ = 23,
+ VE_COND_MUX_GT = 24,
+ VE_COND_MUX_GTE = 25,
+ VE_SET_GREATER_THAN = 26,
+ VE_SET_EQUAL = 27,
+ VE_SET_NOT_EQUAL = 28,
+};
+
+enum {
+ /* R3XX */
+ MATH_NO_OP = 0,
+ ME_EXP_BASE2_DX = 1,
+ ME_LOG_BASE2_DX = 2,
+ ME_EXP_BASEE_FF = 3,
+ ME_LIGHT_COEFF_DX = 4,
+ ME_POWER_FUNC_FF = 5,
+ ME_RECIP_DX = 6,
+ ME_RECIP_FF = 7,
+ ME_RECIP_SQRT_DX = 8,
+ ME_RECIP_SQRT_FF = 9,
+ ME_MULTIPLY = 10,
+ ME_EXP_BASE2_FULL_DX = 11,
+ ME_LOG_BASE2_FULL_DX = 12,
+ ME_POWER_FUNC_FF_CLAMP_B = 13,
+ ME_POWER_FUNC_FF_CLAMP_B1 = 14,
+ ME_POWER_FUNC_FF_CLAMP_01 = 15,
+ ME_SIN = 16,
+ ME_COS = 17,
+ /* R5XX */
+ ME_LOG_BASE2_IEEE = 18,
+ ME_RECIP_IEEE = 19,
+ ME_RECIP_SQRT_IEEE = 20,
+ ME_PRED_SET_EQ = 21,
+ ME_PRED_SET_GT = 22,
+ ME_PRED_SET_GTE = 23,
+ ME_PRED_SET_NEQ = 24,
+ ME_PRED_SET_CLR = 25,
+ ME_PRED_SET_INV = 26,
+ ME_PRED_SET_POP = 27,
+ ME_PRED_SET_RESTORE = 28,
+};
+
+enum {
+ /* R3XX */
+ PVS_MACRO_OP_2CLK_MADD = 0,
+ PVS_MACRO_OP_2CLK_M2X_ADD = 1,
+};
+
+enum {
+ PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */
+ PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */
+ PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */
+ PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */
+};
+
+enum {
+ PVS_DST_REG_TEMPORARY = 0, /* Intermediate Storage */
+ PVS_DST_REG_A0 = 1, /* Address Register Storage */
+ PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */
+ PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */
+ PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */
+ PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */
+};
+
+enum {
+ PVS_SRC_SELECT_X = 0, /* Select X Component */
+ PVS_SRC_SELECT_Y = 1, /* Select Y Component */
+ PVS_SRC_SELECT_Z = 2, /* Select Z Component */
+ PVS_SRC_SELECT_W = 3, /* Select W Component */
+ PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */
+ PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */
+};
+
+/* PVS Opcode & Destination Operand Description */
+
+enum {
+ PVS_DST_OPCODE_MASK = 0x3f,
+ PVS_DST_OPCODE_SHIFT = 0,
+ PVS_DST_MATH_INST_MASK = 0x1,
+ PVS_DST_MATH_INST_SHIFT = 6,
+ PVS_DST_MACRO_INST_MASK = 0x1,
+ PVS_DST_MACRO_INST_SHIFT = 7,
+ PVS_DST_REG_TYPE_MASK = 0xf,
+ PVS_DST_REG_TYPE_SHIFT = 8,
+ PVS_DST_ADDR_MODE_1_MASK = 0x1,
+ PVS_DST_ADDR_MODE_1_SHIFT = 12,
+ PVS_DST_OFFSET_MASK = 0x7f,
+ PVS_DST_OFFSET_SHIFT = 13,
+ PVS_DST_WE_X_MASK = 0x1,
+ PVS_DST_WE_X_SHIFT = 20,
+ PVS_DST_WE_Y_MASK = 0x1,
+ PVS_DST_WE_Y_SHIFT = 21,
+ PVS_DST_WE_Z_MASK = 0x1,
+ PVS_DST_WE_Z_SHIFT = 22,
+ PVS_DST_WE_W_MASK = 0x1,
+ PVS_DST_WE_W_SHIFT = 23,
+ PVS_DST_VE_SAT_MASK = 0x1,
+ PVS_DST_VE_SAT_SHIFT = 24,
+ PVS_DST_ME_SAT_MASK = 0x1,
+ PVS_DST_ME_SAT_SHIFT = 25,
+ PVS_DST_PRED_ENABLE_MASK = 0x1,
+ PVS_DST_PRED_ENABLE_SHIFT = 26,
+ PVS_DST_PRED_SENSE_MASK = 0x1,
+ PVS_DST_PRED_SENSE_SHIFT = 27,
+ PVS_DST_DUAL_MATH_OP_MASK = 0x3,
+ PVS_DST_DUAL_MATH_OP_SHIFT = 27,
+ PVS_DST_ADDR_SEL_MASK = 0x3,
+ PVS_DST_ADDR_SEL_SHIFT = 29,
+ PVS_DST_ADDR_MODE_0_MASK = 0x1,
+ PVS_DST_ADDR_MODE_0_SHIFT = 31,
+};
+
+/* PVS Source Operand Description */
+
+enum {
+ PVS_SRC_REG_TYPE_MASK = 0x3,
+ PVS_SRC_REG_TYPE_SHIFT = 0,
+ SPARE_0_MASK = 0x1,
+ SPARE_0_SHIFT = 2,
+ PVS_SRC_ABS_XYZW_MASK = 0x1,
+ PVS_SRC_ABS_XYZW_SHIFT = 3,
+ PVS_SRC_ADDR_MODE_0_MASK = 0x1,
+ PVS_SRC_ADDR_MODE_0_SHIFT = 4,
+ PVS_SRC_OFFSET_MASK = 0xff,
+ PVS_SRC_OFFSET_SHIFT = 5,
+ PVS_SRC_SWIZZLE_X_MASK = 0x7,
+ PVS_SRC_SWIZZLE_X_SHIFT = 13,
+ PVS_SRC_SWIZZLE_Y_MASK = 0x7,
+ PVS_SRC_SWIZZLE_Y_SHIFT = 16,
+ PVS_SRC_SWIZZLE_Z_MASK = 0x7,
+ PVS_SRC_SWIZZLE_Z_SHIFT = 19,
+ PVS_SRC_SWIZZLE_W_MASK = 0x7,
+ PVS_SRC_SWIZZLE_W_SHIFT = 22,
+ PVS_SRC_MODIFIER_X_MASK = 0x1,
+ PVS_SRC_MODIFIER_X_SHIFT = 25,
+ PVS_SRC_MODIFIER_Y_MASK = 0x1,
+ PVS_SRC_MODIFIER_Y_SHIFT = 26,
+ PVS_SRC_MODIFIER_Z_MASK = 0x1,
+ PVS_SRC_MODIFIER_Z_SHIFT = 27,
+ PVS_SRC_MODIFIER_W_MASK = 0x1,
+ PVS_SRC_MODIFIER_W_SHIFT = 28,
+ PVS_SRC_ADDR_SEL_MASK = 0x3,
+ PVS_SRC_ADDR_SEL_SHIFT = 29,
+ PVS_SRC_ADDR_MODE_1_MASK = 0x0,
+ PVS_SRC_ADDR_MODE_1_SHIFT = 32,
+};
+
+/*\}*/
/* BEGIN: Packet 3 commands */
@@ -1583,13 +2679,511 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_PRIM_NUM_VERTICES_SHIFT 16
#define R300_PRIM_NUM_VERTICES_MASK 0xffff
+
+
+/*
+ * The R500 unified shader (US) registers come in banks of 512 each, one
+ * for each instruction slot in the shader. You can't touch them directly.
+ * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive
+ * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the
+ * instruction is fully specified.
+ */
+#define R500_US_ALU_ALPHA_INST_0 0xa800
+# define R500_ALPHA_OP_MAD 0
+# define R500_ALPHA_OP_DP 1
+# define R500_ALPHA_OP_MIN 2
+# define R500_ALPHA_OP_MAX 3
+/* #define R500_ALPHA_OP_RESERVED 4 */
+# define R500_ALPHA_OP_CND 5
+# define R500_ALPHA_OP_CMP 6
+# define R500_ALPHA_OP_FRC 7
+# define R500_ALPHA_OP_EX2 8
+# define R500_ALPHA_OP_LN2 9
+# define R500_ALPHA_OP_RCP 10
+# define R500_ALPHA_OP_RSQ 11
+# define R500_ALPHA_OP_SIN 12
+# define R500_ALPHA_OP_COS 13
+# define R500_ALPHA_OP_MDH 14
+# define R500_ALPHA_OP_MDV 15
+# define R500_ALPHA_ADDRD(x) (x << 4)
+# define R500_ALPHA_ADDRD_REL (1 << 11)
+# define R500_ALPHA_SEL_A_SHIFT 12
+# define R500_ALPHA_SEL_A_SRC0 (0 << 12)
+# define R500_ALPHA_SEL_A_SRC1 (1 << 12)
+# define R500_ALPHA_SEL_A_SRC2 (2 << 12)
+# define R500_ALPHA_SEL_A_SRCP (3 << 12)
+# define R500_ALPHA_SWIZ_A_R (0 << 14)
+# define R500_ALPHA_SWIZ_A_G (1 << 14)
+# define R500_ALPHA_SWIZ_A_B (2 << 14)
+# define R500_ALPHA_SWIZ_A_A (3 << 14)
+# define R500_ALPHA_SWIZ_A_0 (4 << 14)
+# define R500_ALPHA_SWIZ_A_HALF (5 << 14)
+# define R500_ALPHA_SWIZ_A_1 (6 << 14)
+/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */
+# define R500_ALPHA_MOD_A_NOP (0 << 17)
+# define R500_ALPHA_MOD_A_NEG (1 << 17)
+# define R500_ALPHA_MOD_A_ABS (2 << 17)
+# define R500_ALPHA_MOD_A_NAB (3 << 17)
+# define R500_ALPHA_SEL_B_SHIFT 19
+# define R500_ALPHA_SEL_B_SRC0 (0 << 19)
+# define R500_ALPHA_SEL_B_SRC1 (1 << 19)
+# define R500_ALPHA_SEL_B_SRC2 (2 << 19)
+# define R500_ALPHA_SEL_B_SRCP (3 << 19)
+# define R500_ALPHA_SWIZ_B_R (0 << 21)
+# define R500_ALPHA_SWIZ_B_G (1 << 21)
+# define R500_ALPHA_SWIZ_B_B (2 << 21)
+# define R500_ALPHA_SWIZ_B_A (3 << 21)
+# define R500_ALPHA_SWIZ_B_0 (4 << 21)
+# define R500_ALPHA_SWIZ_B_HALF (5 << 21)
+# define R500_ALPHA_SWIZ_B_1 (6 << 21)
+/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */
+# define R500_ALPHA_MOD_B_NOP (0 << 24)
+# define R500_ALPHA_MOD_B_NEG (1 << 24)
+# define R500_ALPHA_MOD_B_ABS (2 << 24)
+# define R500_ALPHA_MOD_B_NAB (3 << 24)
+# define R500_ALPHA_OMOD_IDENTITY (0 << 26)
+# define R500_ALPHA_OMOD_MUL_2 (1 << 26)
+# define R500_ALPHA_OMOD_MUL_4 (2 << 26)
+# define R500_ALPHA_OMOD_MUL_8 (3 << 26)
+# define R500_ALPHA_OMOD_DIV_2 (4 << 26)
+# define R500_ALPHA_OMOD_DIV_4 (5 << 26)
+# define R500_ALPHA_OMOD_DIV_8 (6 << 26)
+# define R500_ALPHA_OMOD_DISABLE (7 << 26)
+# define R500_ALPHA_TARGET(x) (x << 29)
+# define R500_ALPHA_W_OMASK (1 << 31)
+#define R500_US_ALU_ALPHA_ADDR_0 0x9800
+# define R500_ALPHA_ADDR0(x) (x << 0)
+# define R500_ALPHA_ADDR0_CONST (1 << 8)
+# define R500_ALPHA_ADDR0_REL (1 << 9)
+# define R500_ALPHA_ADDR1(x) (x << 10)
+# define R500_ALPHA_ADDR1_CONST (1 << 18)
+# define R500_ALPHA_ADDR1_REL (1 << 19)
+# define R500_ALPHA_ADDR2(x) (x << 20)
+# define R500_ALPHA_ADDR2_CONST (1 << 28)
+# define R500_ALPHA_ADDR2_REL (1 << 29)
+# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30)
+# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30)
+# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30)
+# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30)
+#define R500_US_ALU_RGBA_INST_0 0xb000
+# define R500_ALU_RGBA_OP_MAD (0 << 0)
+# define R500_ALU_RGBA_OP_DP3 (1 << 0)
+# define R500_ALU_RGBA_OP_DP4 (2 << 0)
+# define R500_ALU_RGBA_OP_D2A (3 << 0)
+# define R500_ALU_RGBA_OP_MIN (4 << 0)
+# define R500_ALU_RGBA_OP_MAX (5 << 0)
+/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */
+# define R500_ALU_RGBA_OP_CND (7 << 0)
+# define R500_ALU_RGBA_OP_CMP (8 << 0)
+# define R500_ALU_RGBA_OP_FRC (9 << 0)
+# define R500_ALU_RGBA_OP_SOP (10 << 0)
+# define R500_ALU_RGBA_OP_MDH (11 << 0)
+# define R500_ALU_RGBA_OP_MDV (12 << 0)
+# define R500_ALU_RGBA_ADDRD(x) (x << 4)
+# define R500_ALU_RGBA_ADDRD_REL (1 << 11)
+# define R500_ALU_RGBA_SEL_C_SHIFT 12
+# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12)
+# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12)
+# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12)
+# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12)
+# define R500_ALU_RGBA_R_SWIZ_R (0 << 14)
+# define R500_ALU_RGBA_R_SWIZ_G (1 << 14)
+# define R500_ALU_RGBA_R_SWIZ_B (2 << 14)
+# define R500_ALU_RGBA_R_SWIZ_A (3 << 14)
+# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14)
+# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14)
+# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14)
+/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */
+# define R500_ALU_RGBA_G_SWIZ_R (0 << 17)
+# define R500_ALU_RGBA_G_SWIZ_G (1 << 17)
+# define R500_ALU_RGBA_G_SWIZ_B (2 << 17)
+# define R500_ALU_RGBA_G_SWIZ_A (3 << 17)
+# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17)
+# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17)
+# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17)
+/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */
+# define R500_ALU_RGBA_B_SWIZ_R (0 << 20)
+# define R500_ALU_RGBA_B_SWIZ_G (1 << 20)
+# define R500_ALU_RGBA_B_SWIZ_B (2 << 20)
+# define R500_ALU_RGBA_B_SWIZ_A (3 << 20)
+# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20)
+# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20)
+# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20)
+/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */
+# define R500_ALU_RGBA_MOD_C_NOP (0 << 23)
+# define R500_ALU_RGBA_MOD_C_NEG (1 << 23)
+# define R500_ALU_RGBA_MOD_C_ABS (2 << 23)
+# define R500_ALU_RGBA_MOD_C_NAB (3 << 23)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25)
+# define R500_ALU_RGBA_A_SWIZ_R (0 << 27)
+# define R500_ALU_RGBA_A_SWIZ_G (1 << 27)
+# define R500_ALU_RGBA_A_SWIZ_B (2 << 27)
+# define R500_ALU_RGBA_A_SWIZ_A (3 << 27)
+# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27)
+# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27)
+# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27)
+/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */
+# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30)
+#define R500_US_ALU_RGB_INST_0 0xa000
+# define R500_ALU_RGB_SEL_A_SHIFT 0
+# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0)
+# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0)
+# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0)
+# define R500_ALU_RGB_SEL_A_SRCP (3 << 0)
+# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2)
+/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */
+# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5)
+/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */
+# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8)
+/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */
+# define R500_ALU_RGB_MOD_A_NOP (0 << 11)
+# define R500_ALU_RGB_MOD_A_NEG (1 << 11)
+# define R500_ALU_RGB_MOD_A_ABS (2 << 11)
+# define R500_ALU_RGB_MOD_A_NAB (3 << 11)
+# define R500_ALU_RGB_SEL_B_SHIFT 13
+# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13)
+# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13)
+# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13)
+# define R500_ALU_RGB_SEL_B_SRCP (3 << 13)
+# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15)
+/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */
+# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18)
+/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */
+# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21)
+/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */
+# define R500_ALU_RGB_MOD_B_NOP (0 << 24)
+# define R500_ALU_RGB_MOD_B_NEG (1 << 24)
+# define R500_ALU_RGB_MOD_B_ABS (2 << 24)
+# define R500_ALU_RGB_MOD_B_NAB (3 << 24)
+# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26)
+# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26)
+# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26)
+# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26)
+# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26)
+# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26)
+# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26)
+# define R500_ALU_RGB_OMOD_DISABLE (7 << 26)
+# define R500_ALU_RGB_TARGET(x) (x << 29)
+# define R500_ALU_RGB_WMASK (1 << 31)
+#define R500_US_ALU_RGB_ADDR_0 0x9000
+# define R500_RGB_ADDR0(x) (x << 0)
+# define R500_RGB_ADDR0_CONST (1 << 8)
+# define R500_RGB_ADDR0_REL (1 << 9)
+# define R500_RGB_ADDR1(x) (x << 10)
+# define R500_RGB_ADDR1_CONST (1 << 18)
+# define R500_RGB_ADDR1_REL (1 << 19)
+# define R500_RGB_ADDR2(x) (x << 20)
+# define R500_RGB_ADDR2_CONST (1 << 28)
+# define R500_RGB_ADDR2_REL (1 << 29)
+# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30)
+# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30)
+# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30)
+# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30)
+#define R500_US_CMN_INST_0 0xb800
+# define R500_INST_TYPE_MASK (3 << 0)
+# define R500_INST_TYPE_ALU (0 << 0)
+# define R500_INST_TYPE_OUT (1 << 0)
+# define R500_INST_TYPE_FC (2 << 0)
+# define R500_INST_TYPE_TEX (3 << 0)
+# define R500_INST_TEX_SEM_WAIT (1 << 2)
+# define R500_INST_RGB_PRED_SEL_NONE (0 << 3)
+# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3)
+# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3)
+# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3)
+# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3)
+# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3)
+# define R500_INST_RGB_PRED_INV (1 << 6)
+# define R500_INST_WRITE_INACTIVE (1 << 7)
+# define R500_INST_LAST (1 << 8)
+# define R500_INST_NOP (1 << 9)
+# define R500_INST_ALU_WAIT (1 << 10)
+# define R500_INST_RGB_WMASK_R (1 << 11)
+# define R500_INST_RGB_WMASK_G (1 << 12)
+# define R500_INST_RGB_WMASK_B (1 << 13)
+# define R500_INST_ALPHA_WMASK (1 << 14)
+# define R500_INST_RGB_OMASK_R (1 << 15)
+# define R500_INST_RGB_OMASK_G (1 << 16)
+# define R500_INST_RGB_OMASK_B (1 << 17)
+# define R500_INST_ALPHA_OMASK (1 << 18)
+# define R500_INST_RGB_CLAMP (1 << 19)
+# define R500_INST_ALPHA_CLAMP (1 << 20)
+# define R500_INST_ALU_RESULT_SEL (1 << 21)
+# define R500_INST_ALPHA_PRED_INV (1 << 22)
+# define R500_INST_ALU_RESULT_OP_EQ (0 << 23)
+# define R500_INST_ALU_RESULT_OP_LT (1 << 23)
+# define R500_INST_ALU_RESULT_OP_GE (2 << 23)
+# define R500_INST_ALU_RESULT_OP_NE (3 << 23)
+# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25)
+# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25)
+# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25)
+# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25)
+# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25)
+# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25)
+/* XXX next four are kind of guessed */
+# define R500_INST_STAT_WE_R (1 << 28)
+# define R500_INST_STAT_WE_G (1 << 29)
+# define R500_INST_STAT_WE_B (1 << 30)
+# define R500_INST_STAT_WE_A (1 << 31)
+
+/* note that these are 8 bit lengths, despite the offsets, at least for R500 */
+#define R500_US_CODE_ADDR 0x4630
+# define R500_US_CODE_START_ADDR(x) (x << 0)
+# define R500_US_CODE_END_ADDR(x) (x << 16)
+#define R500_US_CODE_OFFSET 0x4638
+# define R500_US_CODE_OFFSET_ADDR(x) (x << 0)
+#define R500_US_CODE_RANGE 0x4634
+# define R500_US_CODE_RANGE_ADDR(x) (x << 0)
+# define R500_US_CODE_RANGE_SIZE(x) (x << 16)
+#define R500_US_CONFIG 0x4600
+# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1)
+#define R500_US_FC_ADDR_0 0xa000
+# define R500_FC_BOOL_ADDR(x) (x << 0)
+# define R500_FC_INT_ADDR(x) (x << 8)
+# define R500_FC_JUMP_ADDR(x) (x << 16)
+# define R500_FC_JUMP_GLOBAL (1 << 31)
+#define R500_US_FC_BOOL_CONST 0x4620
+# define R500_FC_KBOOL(x) (x)
+#define R500_US_FC_CTRL 0x4624
+# define R500_FC_TEST_EN (1 << 30)
+# define R500_FC_FULL_FC_EN (1 << 31)
+#define R500_US_FC_INST_0 0x9800
+# define R500_FC_OP_JUMP (0 << 0)
+# define R500_FC_OP_LOOP (1 << 0)
+# define R500_FC_OP_ENDLOOP (2 << 0)
+# define R500_FC_OP_REP (3 << 0)
+# define R500_FC_OP_ENDREP (4 << 0)
+# define R500_FC_OP_BREAKLOOP (5 << 0)
+# define R500_FC_OP_BREAKREP (6 << 0)
+# define R500_FC_OP_CONTINUE (7 << 0)
+# define R500_FC_B_ELSE (1 << 4)
+# define R500_FC_JUMP_ANY (1 << 5)
+# define R500_FC_A_OP_NONE (0 << 6)
+# define R500_FC_A_OP_POP (1 << 6)
+# define R500_FC_A_OP_PUSH (2 << 6)
+# define R500_FC_JUMP_FUNC(x) (x << 8)
+# define R500_FC_B_POP_CNT(x) (x << 16)
+# define R500_FC_B_OP0_NONE (0 << 24)
+# define R500_FC_B_OP0_DECR (1 << 24)
+# define R500_FC_B_OP0_INCR (2 << 24)
+# define R500_FC_B_OP1_DECR (0 << 26)
+# define R500_FC_B_OP1_NONE (1 << 26)
+# define R500_FC_B_OP1_INCR (2 << 26)
+# define R500_FC_IGNORE_UNCOVERED (1 << 28)
+#define R500_US_FC_INT_CONST_0 0x4c00
+# define R500_FC_INT_CONST_KR(x) (x << 0)
+# define R500_FC_INT_CONST_KG(x) (x << 8)
+# define R500_FC_INT_CONST_KB(x) (x << 16)
+/* _0 through _15 */
+#define R500_US_FORMAT0_0 0x4640
+# define R500_FORMAT_TXWIDTH(x) (x << 0)
+# define R500_FORMAT_TXHEIGHT(x) (x << 11)
+# define R500_FORMAT_TXDEPTH(x) (x << 22)
+/* _0 through _3 */
+#define R500_US_OUT_FMT_0 0x46a4
+# define R500_OUT_FMT_C4_8 (0 << 0)
+# define R500_OUT_FMT_C4_10 (1 << 0)
+# define R500_OUT_FMT_C4_10_GAMMA (2 << 0)
+# define R500_OUT_FMT_C_16 (3 << 0)
+# define R500_OUT_FMT_C2_16 (4 << 0)
+# define R500_OUT_FMT_C4_16 (5 << 0)
+# define R500_OUT_FMT_C_16_MPEG (6 << 0)
+# define R500_OUT_FMT_C2_16_MPEG (7 << 0)
+# define R500_OUT_FMT_C2_4 (8 << 0)
+# define R500_OUT_FMT_C_3_3_2 (9 << 0)
+# define R500_OUT_FMT_C_6_5_6 (10 << 0)
+# define R500_OUT_FMT_C_11_11_10 (11 << 0)
+# define R500_OUT_FMT_C_10_11_11 (12 << 0)
+# define R500_OUT_FMT_C_2_10_10_10 (13 << 0)
+/* #define R500_OUT_FMT_RESERVED (14 << 0) */
+# define R500_OUT_FMT_UNUSED (15 << 0)
+# define R500_OUT_FMT_C_16_FP (16 << 0)
+# define R500_OUT_FMT_C2_16_FP (17 << 0)
+# define R500_OUT_FMT_C4_16_FP (18 << 0)
+# define R500_OUT_FMT_C_32_FP (19 << 0)
+# define R500_OUT_FMT_C2_32_FP (20 << 0)
+# define R500_OUT_FMT_C4_32_FP (21 << 0)
+# define R500_C0_SEL_A (0 << 8)
+# define R500_C0_SEL_R (1 << 8)
+# define R500_C0_SEL_G (2 << 8)
+# define R500_C0_SEL_B (3 << 8)
+# define R500_C1_SEL_A (0 << 10)
+# define R500_C1_SEL_R (1 << 10)
+# define R500_C1_SEL_G (2 << 10)
+# define R500_C1_SEL_B (3 << 10)
+# define R500_C2_SEL_A (0 << 12)
+# define R500_C2_SEL_R (1 << 12)
+# define R500_C2_SEL_G (2 << 12)
+# define R500_C2_SEL_B (3 << 12)
+# define R500_C3_SEL_A (0 << 14)
+# define R500_C3_SEL_R (1 << 14)
+# define R500_C3_SEL_G (2 << 14)
+# define R500_C3_SEL_B (3 << 14)
+# define R500_OUT_SIGN(x) (x << 16)
+# define R500_ROUND_ADJ (1 << 20)
+#define R500_US_PIXSIZE 0x4604
+# define R500_PIX_SIZE(x) (x)
+#define R500_US_TEX_ADDR_0 0x9800
+# define R500_TEX_SRC_ADDR(x) (x << 0)
+# define R500_TEX_SRC_ADDR_REL (1 << 7)
+# define R500_TEX_SRC_S_SWIZ_R (0 << 8)
+# define R500_TEX_SRC_S_SWIZ_G (1 << 8)
+# define R500_TEX_SRC_S_SWIZ_B (2 << 8)
+# define R500_TEX_SRC_S_SWIZ_A (3 << 8)
+# define R500_TEX_SRC_T_SWIZ_R (0 << 10)
+# define R500_TEX_SRC_T_SWIZ_G (1 << 10)
+# define R500_TEX_SRC_T_SWIZ_B (2 << 10)
+# define R500_TEX_SRC_T_SWIZ_A (3 << 10)
+# define R500_TEX_SRC_R_SWIZ_R (0 << 12)
+# define R500_TEX_SRC_R_SWIZ_G (1 << 12)
+# define R500_TEX_SRC_R_SWIZ_B (2 << 12)
+# define R500_TEX_SRC_R_SWIZ_A (3 << 12)
+# define R500_TEX_SRC_Q_SWIZ_R (0 << 14)
+# define R500_TEX_SRC_Q_SWIZ_G (1 << 14)
+# define R500_TEX_SRC_Q_SWIZ_B (2 << 14)
+# define R500_TEX_SRC_Q_SWIZ_A (3 << 14)
+# define R500_TEX_DST_ADDR(x) (x << 16)
+# define R500_TEX_DST_ADDR_REL (1 << 23)
+# define R500_TEX_DST_R_SWIZ_R (0 << 24)
+# define R500_TEX_DST_R_SWIZ_G (1 << 24)
+# define R500_TEX_DST_R_SWIZ_B (2 << 24)
+# define R500_TEX_DST_R_SWIZ_A (3 << 24)
+# define R500_TEX_DST_G_SWIZ_R (0 << 26)
+# define R500_TEX_DST_G_SWIZ_G (1 << 26)
+# define R500_TEX_DST_G_SWIZ_B (2 << 26)
+# define R500_TEX_DST_G_SWIZ_A (3 << 26)
+# define R500_TEX_DST_B_SWIZ_R (0 << 28)
+# define R500_TEX_DST_B_SWIZ_G (1 << 28)
+# define R500_TEX_DST_B_SWIZ_B (2 << 28)
+# define R500_TEX_DST_B_SWIZ_A (3 << 28)
+# define R500_TEX_DST_A_SWIZ_R (0 << 30)
+# define R500_TEX_DST_A_SWIZ_G (1 << 30)
+# define R500_TEX_DST_A_SWIZ_B (2 << 30)
+# define R500_TEX_DST_A_SWIZ_A (3 << 30)
+#define R500_US_TEX_ADDR_DXDY_0 0xa000
+# define R500_DX_ADDR(x) (x << 0)
+# define R500_DX_ADDR_REL (1 << 7)
+# define R500_DX_S_SWIZ_R (0 << 8)
+# define R500_DX_S_SWIZ_G (1 << 8)
+# define R500_DX_S_SWIZ_B (2 << 8)
+# define R500_DX_S_SWIZ_A (3 << 8)
+# define R500_DX_T_SWIZ_R (0 << 10)
+# define R500_DX_T_SWIZ_G (1 << 10)
+# define R500_DX_T_SWIZ_B (2 << 10)
+# define R500_DX_T_SWIZ_A (3 << 10)
+# define R500_DX_R_SWIZ_R (0 << 12)
+# define R500_DX_R_SWIZ_G (1 << 12)
+# define R500_DX_R_SWIZ_B (2 << 12)
+# define R500_DX_R_SWIZ_A (3 << 12)
+# define R500_DX_Q_SWIZ_R (0 << 14)
+# define R500_DX_Q_SWIZ_G (1 << 14)
+# define R500_DX_Q_SWIZ_B (2 << 14)
+# define R500_DX_Q_SWIZ_A (3 << 14)
+# define R500_DY_ADDR(x) (x << 16)
+# define R500_DY_ADDR_REL (1 << 17)
+# define R500_DY_S_SWIZ_R (0 << 24)
+# define R500_DY_S_SWIZ_G (1 << 24)
+# define R500_DY_S_SWIZ_B (2 << 24)
+# define R500_DY_S_SWIZ_A (3 << 24)
+# define R500_DY_T_SWIZ_R (0 << 26)
+# define R500_DY_T_SWIZ_G (1 << 26)
+# define R500_DY_T_SWIZ_B (2 << 26)
+# define R500_DY_T_SWIZ_A (3 << 26)
+# define R500_DY_R_SWIZ_R (0 << 28)
+# define R500_DY_R_SWIZ_G (1 << 28)
+# define R500_DY_R_SWIZ_B (2 << 28)
+# define R500_DY_R_SWIZ_A (3 << 28)
+# define R500_DY_Q_SWIZ_R (0 << 30)
+# define R500_DY_Q_SWIZ_G (1 << 30)
+# define R500_DY_Q_SWIZ_B (2 << 30)
+# define R500_DY_Q_SWIZ_A (3 << 30)
+#define R500_US_TEX_INST_0 0x9000
+# define R500_TEX_ID(x) (x << 16)
+# define R500_TEX_INST_NOP (0 << 22)
+# define R500_TEX_INST_LD (1 << 22)
+# define R500_TEX_INST_TEXKILL (2 << 22)
+# define R500_TEX_INST_PROJ (3 << 22)
+# define R500_TEX_INST_LODBIAS (4 << 22)
+# define R500_TEX_INST_LOD (5 << 22)
+# define R500_TEX_INST_DXDY (6 << 22)
+# define R500_TEX_SEM_ACQUIRE (1 << 25)
+# define R500_TEX_IGNORE_UNCOVERED (1 << 26)
+# define R500_TEX_UNSCALED (1 << 27)
+#define R300_US_W_FMT 0x46b4
+# define R300_W_FMT_W0 (0 << 0)
+# define R300_W_FMT_W24 (1 << 0)
+# define R300_W_FMT_W24FP (2 << 0)
+# define R300_W_SRC_US (0 << 2)
+# define R300_W_SRC_RAS (1 << 2)
+
+
/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR.
* Two parameter dwords:
- * 0. The first parameter appears to be always 0
- * 1. The second parameter is a standard primitive emission dword.
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
*/
#define R300_PACKET3_3D_DRAW_VBUF 0x00002800
+/* Draw a primitive from immediate vertices in this packet
+ * Up to 16382 dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ * 2 to end: Up to 16380 dwords of vertex data.
+ */
+#define R300_PACKET3_3D_DRAW_IMMD 0x00002900
+
+/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and
+ * immediate vertices in this packet
+ * Up to 16382 dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ * 2 to end: Up to 16380 dwords of vertex data.
+ */
+#define R300_PACKET3_3D_DRAW_INDX 0x00002A00
+
+
/* Specify the full set of vertex arrays as (address, stride).
* The first parameter is the number of vertex arrays specified.
* The rest of the command is a variable length list of blocks, where
@@ -1610,8 +3204,29 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_EB_UNK1_SHIFT 24
# define R300_EB_UNK1 (0x80<<24)
# define R300_EB_UNK2 0x0810
+
+/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400
+/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_IMMD_2 0x00003500
+/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */
#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600
+/* Clears a portion of hierachical Z RAM
+ * 3 dword parameters
+ * 0. START
+ * 1. COUNT: 13:0 (max is 0x3FFF)
+ * 2. CLEAR_VALUE: Value to write into HIZ RAM.
+ */
+#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700
+
+/* Draws a set of primitives using vertex buffers pointed by the state data.
+ * At least 2 Parameters:
+ * 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword.
+ * 2 to end: Data or indices (see other 3D_DRAW_* packets for details)
+ */
+#define R300_PACKET3_3D_DRAW_128 0x00003900
+
/* END: Packet 3 commands */
@@ -1633,3 +3248,5 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#endif /* _R300_REG_H */
/* *INDENT-ON* */
+
+/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */
diff --git a/r300/r300_render.c b/r300/r300_render.c
index cc13e9a..0a199e6 100644
--- a/r300/r300_render.c
+++ b/r300/r300_render.c
@@ -45,6 +45,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* obviously this does work... Further investigation is needed.
*
* \author Nicolai Haehnle <prefect_@gmx.net>
+ *
+ * \todo Add immediate implementation back? Perhaps this is useful if there are
+ * no bugs...
*/
#include "glheader.h"
@@ -71,12 +74,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_reg.h"
#include "r300_tex.h"
#include "r300_emit.h"
+#include "r300_fragprog.h"
extern int future_hw_tcl_on;
/**
* \brief Convert a OpenGL primitive type into a R300 primitive type.
*/
-static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim)
+int r300PrimitiveType(r300ContextPtr rmesa, int prim)
{
switch (prim & PRIM_MODE_MASK) {
case GL_POINTS:
@@ -116,7 +120,7 @@ static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim)
}
}
-static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
+int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
{
int verts_off = 0;
@@ -168,16 +172,13 @@ static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
return num_verts - verts_off;
}
-static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts,
- int elt_size)
+static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct r300_dma_region *rvb = &rmesa->state.elt_dma;
void *out;
- assert(elt_size == 2 || elt_size == 4);
-
- if (r300IsGartMemory(rmesa, elts, n_elts * elt_size)) {
+ if (r300IsGartMemory(rmesa, elts, n_elts * 4)) {
rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
rvb->start = ((char *)elts) - rvb->address;
rvb->aos_offset =
@@ -189,66 +190,27 @@ static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts,
_mesa_exit(-1);
}
- r300AllocDmaRegion(rmesa, rvb, n_elts * elt_size, elt_size);
+ r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4);
rvb->aos_offset = GET_START(rvb);
out = rvb->address + rvb->start;
- memcpy(out, elts, n_elts * elt_size);
+ memcpy(out, elts, n_elts * 4);
}
static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
- int vertex_count, int type, int elt_size)
+ int vertex_count, int type)
{
int cmd_reserved = 0;
int cmd_written = 0;
drm_radeon_cmd_header_t *cmd = NULL;
- unsigned long t_addr;
- unsigned long magic_1, magic_2;
-
- assert(elt_size == 2 || elt_size == 4);
-
- if (addr & (elt_size - 1)) {
- WARN_ONCE("Badly aligned buffer\n");
- return;
- }
-
- magic_1 = (addr % 32) / 4;
- t_addr = addr & ~0x1d;
- magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1;
- start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
- if (elt_size == 4) {
- e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
- (vertex_count << 16) | type |
- R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
- } else {
- e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
- (vertex_count << 16) | type);
- }
+ start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0);
+ e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
- start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
-#ifdef OPTIMIZE_ELTS
- if (elt_size == 4) {
- e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
- e32(addr);
- } else {
- e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2);
- e32(t_addr);
- }
-#else
+ start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2);
e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
e32(addr);
-#endif
-
- if (elt_size == 4) {
- e32(vertex_count);
- } else {
-#ifdef OPTIMIZE_ELTS
- e32(magic_2);
-#else
- e32((vertex_count + 1) / 2);
-#endif
- }
+ e32(vertex_count);
}
static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
@@ -263,26 +225,23 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
offset);
- start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+ start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1);
e32(nr);
+
for (i = 0; i + 1 < nr; i += 2) {
- e32((rmesa->state.aos[i].aos_size << 0)
- | (rmesa->state.aos[i].aos_stride << 8)
- | (rmesa->state.aos[i + 1].aos_size << 16)
- | (rmesa->state.aos[i + 1].aos_stride << 24)
- );
- e32(rmesa->state.aos[i].aos_offset +
- offset * 4 * rmesa->state.aos[i].aos_stride);
- e32(rmesa->state.aos[i + 1].aos_offset +
- offset * 4 * rmesa->state.aos[i + 1].aos_stride);
+ e32((rmesa->state.aos[i].aos_size << 0) |
+ (rmesa->state.aos[i].aos_stride << 8) |
+ (rmesa->state.aos[i + 1].aos_size << 16) |
+ (rmesa->state.aos[i + 1].aos_stride << 24));
+
+ e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride);
+ e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride);
}
if (nr & 1) {
- e32((rmesa->state.aos[nr - 1].aos_size << 0)
- | (rmesa->state.aos[nr - 1].aos_stride << 8)
- );
- e32(rmesa->state.aos[nr - 1].aos_offset +
- offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
+ e32((rmesa->state.aos[nr - 1].aos_size << 0) |
+ (rmesa->state.aos[nr - 1].aos_stride << 8));
+ e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
}
}
@@ -292,130 +251,78 @@ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
int cmd_written = 0;
drm_radeon_cmd_header_t *cmd = NULL;
- start_packet3(RADEON_CP_PACKET3_3D_DRAW_VBUF_2, 0);
- e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16)
- | type);
+ start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
+ e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
}
static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
int start, int end, int prim)
{
int type, num_verts;
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *vb = &tnl->vb;
- type = r300PrimitiveType(rmesa, ctx, prim);
+ type = r300PrimitiveType(rmesa, prim);
num_verts = r300NumVerts(rmesa, end - start, prim);
if (type < 0 || num_verts <= 0)
return;
- if (rmesa->state.VB.Elts) {
- r300EmitAOS(rmesa, rmesa->state.aos_count, start);
+ if (vb->Elts) {
if (num_verts > 65535) {
/* not implemented yet */
WARN_ONCE("Too many elts\n");
return;
}
- r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts,
- rmesa->state.VB.elt_size);
- r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset,
- num_verts, type, rmesa->state.VB.elt_size);
+ /* Note: The following is incorrect, but it's the best I can do
+ * without a major refactoring of how DMA memory is handled.
+ * The problem: Ensuring that both vertex arrays *and* index
+ * arrays are at the right position, and then ensuring that
+ * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
+ * at once.
+ *
+ * So why is the following incorrect? Well, it seems like
+ * allocating the index array might actually evict the vertex
+ * arrays. *sigh*
+ */
+ r300EmitElts(ctx, vb->Elts, num_verts);
+ r300EmitAOS(rmesa, rmesa->state.aos_count, start);
+ r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type);
} else {
r300EmitAOS(rmesa, rmesa->state.aos_count, start);
r300FireAOS(rmesa, num_verts, type);
}
}
-#define CONV_VB(a, b) rvb->AttribPtr[(a)].size = vb->b->size, \
- rvb->AttribPtr[(a)].type = GL_FLOAT, \
- rvb->AttribPtr[(a)].stride = vb->b->stride, \
- rvb->AttribPtr[(a)].data = vb->b->data
-
-static void radeon_vb_to_rvb(r300ContextPtr rmesa,
- struct radeon_vertex_buffer *rvb,
- struct vertex_buffer *vb)
-{
- int i;
- GLcontext *ctx;
- ctx = rmesa->radeon.glCtx;
-
- memset(rvb, 0, sizeof(*rvb));
-
- rvb->Elts = vb->Elts;
- rvb->elt_size = 4;
- rvb->elt_min = 0;
- rvb->elt_max = vb->Count;
-
- rvb->Count = vb->Count;
-
- if (hw_tcl_on) {
- CONV_VB(VERT_ATTRIB_POS, ObjPtr);
- } else {
- assert(vb->ClipPtr);
- CONV_VB(VERT_ATTRIB_POS, ClipPtr);
- }
-
- CONV_VB(VERT_ATTRIB_NORMAL, NormalPtr);
- CONV_VB(VERT_ATTRIB_COLOR0, ColorPtr[0]);
- CONV_VB(VERT_ATTRIB_COLOR1, SecondaryColorPtr[0]);
- CONV_VB(VERT_ATTRIB_FOG, FogCoordPtr);
-
- for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++)
- CONV_VB(VERT_ATTRIB_TEX0 + i, TexCoordPtr[i]);
-
- for (i = 0; i < MAX_VERTEX_PROGRAM_ATTRIBS; i++)
- CONV_VB(VERT_ATTRIB_GENERIC0 + i,
- AttribPtr[VERT_ATTRIB_GENERIC0 + i]);
-
- rvb->Primitive = vb->Primitive;
- rvb->PrimitiveCount = vb->PrimitiveCount;
- rvb->LockFirst = rvb->LockCount = 0;
- rvb->lock_uptodate = GL_FALSE;
-}
-
static GLboolean r300RunRender(GLcontext * ctx,
struct tnl_pipeline_stage *stage)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct radeon_vertex_buffer *VB = &rmesa->state.VB;
int i;
- int cmd_reserved = 0;
- int cmd_written = 0;
- drm_radeon_cmd_header_t *cmd = NULL;
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *vb = &tnl->vb;
+
if (RADEON_DEBUG & DEBUG_PRIMS)
fprintf(stderr, "%s\n", __FUNCTION__);
- if (stage) {
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- radeon_vb_to_rvb(rmesa, VB, &tnl->vb);
- }
-
r300UpdateShaders(rmesa);
if (r300EmitArrays(ctx))
return GL_TRUE;
r300UpdateShaderStates(rmesa);
- reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
- e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
-
- reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
- e32(R300_RB3D_ZCACHE_UNKNOWN_03);
-
+ r300EmitCacheFlush(rmesa);
r300EmitState(rmesa);
- for (i = 0; i < VB->PrimitiveCount; i++) {
- GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
- GLuint start = VB->Primitive[i].start;
- GLuint end = VB->Primitive[i].start + VB->Primitive[i].count;
+ for (i = 0; i < vb->PrimitiveCount; i++) {
+ GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
+ GLuint start = vb->Primitive[i].start;
+ GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
}
- reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
- e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
-
- reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
- e32(R300_RB3D_ZCACHE_UNKNOWN_03);
+ r300EmitCacheFlush(rmesa);
#ifdef USER_BUFFERS
r300UseArrays(ctx);
@@ -439,13 +346,26 @@ static GLboolean r300RunRender(GLcontext * ctx,
static int r300Fallback(GLcontext * ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- struct r300_fragment_program *fp = (struct r300_fragment_program *)
+ /* Do we need to use new-style shaders?
+ * Also is there a better way to do this? */
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ struct r500_fragment_program *fp = (struct r500_fragment_program *)
(char *)ctx->FragmentProgram._Current;
-
- if (fp) {
- if (!fp->translated)
- r300TranslateFragmentShader(r300, fp);
- FALLBACK_IF(!fp->translated);
+ if (fp) {
+ if (!fp->translated) {
+ r500TranslateFragmentShader(r300, fp);
+ FALLBACK_IF(!fp->translated);
+ }
+ }
+ } else {
+ struct r300_fragment_program *fp = (struct r300_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+ if (fp) {
+ if (!fp->translated) {
+ r300TranslateFragmentShader(r300, fp);
+ FALLBACK_IF(!fp->translated);
+ }
+ }
}
FALLBACK_IF(ctx->RenderMode != GL_RENDER);
@@ -457,16 +377,12 @@ static int r300Fallback(GLcontext * ctx)
|| ctx->Stencil.WriteMask[0] !=
ctx->Stencil.WriteMask[1]));
- FALLBACK_IF(ctx->Color.ColorLogicOpEnabled);
-
if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
FALLBACK_IF(ctx->Point.PointSprite);
if (!r300->disable_lowimpact_fallback) {
- FALLBACK_IF(ctx->Polygon.OffsetPoint);
- FALLBACK_IF(ctx->Polygon.OffsetLine);
FALLBACK_IF(ctx->Polygon.StippleFlag);
- FALLBACK_IF(ctx->Multisample.Enabled);
+ FALLBACK_IF(ctx->Multisample._Enabled);
FALLBACK_IF(ctx->Line.StippleFlag);
FALLBACK_IF(ctx->Line.SmoothFlag);
FALLBACK_IF(ctx->Point.SmoothFlag);
@@ -478,12 +394,17 @@ static int r300Fallback(GLcontext * ctx)
static GLboolean r300RunNonTCLRender(GLcontext * ctx,
struct tnl_pipeline_stage *stage)
{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
if (RADEON_DEBUG & DEBUG_PRIMS)
fprintf(stderr, "%s\n", __FUNCTION__);
if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
return GL_TRUE;
+ if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ return GL_TRUE;
+
return r300RunRender(ctx, stage);
}
diff --git a/r300/r300_shader.c b/r300/r300_shader.c
index 59fe17b..f30fd98 100644
--- a/r300/r300_shader.c
+++ b/r300/r300_shader.c
@@ -1,8 +1,7 @@
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-#include "program.h"
+#include "main/glheader.h"
+
+#include "shader/program.h"
#include "tnl/tnl.h"
#include "r300_context.h"
#include "r300_fragprog.h"
@@ -10,8 +9,10 @@
static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
GLuint id)
{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct r300_vertex_program_cont *vp;
- struct r300_fragment_program *fp;
+ struct r300_fragment_program *r300_fp;
+ struct r500_fragment_program *r500_fp;
switch (target) {
case GL_VERTEX_STATE_PROGRAM_NV:
@@ -20,14 +21,27 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
return _mesa_init_vertex_program(ctx, &vp->mesa_program,
target, id);
case GL_FRAGMENT_PROGRAM_ARB:
- fp = CALLOC_STRUCT(r300_fragment_program);
- fp->ctx = ctx;
- return _mesa_init_fragment_program(ctx, &fp->mesa_program,
- target, id);
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ r500_fp = CALLOC_STRUCT(r500_fragment_program);
+ r500_fp->ctx = ctx;
+ return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program,
+ target, id);
+ } else {
+ r300_fp = CALLOC_STRUCT(r300_fragment_program);
+ return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program,
+ target, id);
+ }
+
case GL_FRAGMENT_PROGRAM_NV:
- fp = CALLOC_STRUCT(r300_fragment_program);
- return _mesa_init_fragment_program(ctx, &fp->mesa_program,
- target, id);
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ r500_fp = CALLOC_STRUCT(r500_fragment_program);
+ return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program,
+ target, id);
+ } else {
+ r300_fp = CALLOC_STRUCT(r300_fragment_program);
+ return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program,
+ target, id);
+ }
default:
_mesa_problem(ctx, "Bad target in r300NewProgram");
}
@@ -43,17 +57,23 @@ static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog)
static void
r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct r300_vertex_program_cont *vp = (void *)prog;
- struct r300_fragment_program *fp = (struct r300_fragment_program *)prog;
+ struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog;
+ struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog;
switch (target) {
case GL_VERTEX_PROGRAM_ARB:
vp->progs = NULL;
break;
case GL_FRAGMENT_PROGRAM_ARB:
- fp->translated = GL_FALSE;
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ r500_fp->translated = GL_FALSE;
+ else
+ r300_fp->translated = GL_FALSE;
break;
}
+
/* need this for tcl fallbacks */
_tnl_program_string(ctx, target, prog);
}
@@ -61,7 +81,7 @@ r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
static GLboolean
r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
- return 1;
+ return GL_TRUE;
}
void r300InitShaderFuncs(struct dd_function_table *functions)
diff --git a/r300/r300_state.c b/r300/r300_state.c
index 2aaf041..15cd053 100644
--- a/r300/r300_state.c
+++ b/r300/r300_state.c
@@ -65,20 +65,33 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "drirenderbuffer.h"
+extern int future_hw_tcl_on;
+extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx);
+
static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4])
{
- GLubyte color[4];
r300ContextPtr rmesa = R300_CONTEXT(ctx);
R300_STATECHANGE(rmesa, blend_color);
- CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
- CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
- CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
- CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ GLuint r = IROUND(cf[0]*1023.0f);
+ GLuint g = IROUND(cf[1]*1023.0f);
+ GLuint b = IROUND(cf[2]*1023.0f);
+ GLuint a = IROUND(cf[3]*1023.0f);
- rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0],
- color[1], color[2]);
+ rmesa->hw.blend_color.cmd[1] = r | (a << 16);
+ rmesa->hw.blend_color.cmd[2] = b | (g << 16);
+ } else {
+ GLubyte color[4];
+ CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
+ CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
+ CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
+ CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
+
+ rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0],
+ color[1], color[2]);
+ }
}
/**
@@ -184,7 +197,7 @@ static void r300SetBlendCntl(r300ContextPtr r300, int func, int eqn,
*/
#if 0
if (new_ablend == new_cblend) {
- new_cblend |= R300_BLEND_NO_SEPARATE;
+ new_cblend |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
}
#endif
new_cblend |= cbits;
@@ -290,7 +303,9 @@ static void r300SetBlendState(GLcontext * ctx)
r300SetBlendCntl(r300,
func, eqn,
- R300_BLEND_UNKNOWN | R300_BLEND_ENABLE, funcA, eqnA);
+ (R300_SEPARATE_ALPHA_ENABLE |
+ R300_READ_ENABLE |
+ R300_ALPHA_BLEND_ENABLE), funcA, eqnA);
}
static void r300BlendEquationSeparate(GLcontext * ctx,
@@ -307,6 +322,83 @@ static void r300BlendFuncSeparate(GLcontext * ctx,
}
/**
+ * Translate LogicOp enums into hardware representation.
+ * Both use a very logical bit-wise layout, but unfortunately the order
+ * of bits is reversed.
+ */
+static GLuint translate_logicop(GLenum logicop)
+{
+ GLuint bits = logicop - GL_CLEAR;
+ bits = ((bits & 1) << 3) | ((bits & 2) << 1) | ((bits & 4) >> 1) | ((bits & 8) >> 3);
+ return bits << R300_RB3D_ROPCNTL_ROP_SHIFT;
+}
+
+/**
+ * Used internally to update the r300->hw hardware state to match the
+ * current OpenGL state.
+ */
+static void r300SetLogicOpState(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ R300_STATECHANGE(r300, rop);
+ if (RGBA_LOGICOP_ENABLED(ctx)) {
+ r300->hw.rop.cmd[1] = R300_RB3D_ROPCNTL_ROP_ENABLE |
+ translate_logicop(ctx->Color.LogicOp);
+ } else {
+ r300->hw.rop.cmd[1] = 0;
+ }
+}
+
+/**
+ * Called by Mesa when an application program changes the LogicOp state
+ * via glLogicOp.
+ */
+static void r300LogicOpcode(GLcontext *ctx, GLenum logicop)
+{
+ if (RGBA_LOGICOP_ENABLED(ctx))
+ r300SetLogicOpState(ctx);
+}
+
+static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLint p;
+ GLint *ip;
+
+ /* no VAP UCP on non-TCL chipsets */
+ if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ return;
+
+ p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+ ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+ R300_STATECHANGE( rmesa, vpucp[p] );
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3];
+}
+
+static void r300SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLuint p;
+
+ /* no VAP UCP on non-TCL chipsets */
+ if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ return;
+
+ p = cap - GL_CLIP_PLANE0;
+ R300_STATECHANGE(r300, vap_clip_cntl);
+ if (state) {
+ r300->hw.vap_clip_cntl.cmd[1] |= (R300_VAP_UCP_ENABLE_0 << p);
+ r300ClipPlane(ctx, cap, NULL);
+ } else {
+ r300->hw.vap_clip_cntl.cmd[1] &= ~(R300_VAP_UCP_ENABLE_0 << p);
+ }
+}
+
+/**
* Update our tracked culling state based on Mesa's state.
*/
static void r300UpdateCulling(GLcontext * ctx)
@@ -314,43 +406,82 @@ static void r300UpdateCulling(GLcontext * ctx)
r300ContextPtr r300 = R300_CONTEXT(ctx);
uint32_t val = 0;
- R300_STATECHANGE(r300, cul);
if (ctx->Polygon.CullFlag) {
- if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
- val = R300_CULL_FRONT | R300_CULL_BACK;
- else if (ctx->Polygon.CullFaceMode == GL_FRONT)
+ switch (ctx->Polygon.CullFaceMode) {
+ case GL_FRONT:
val = R300_CULL_FRONT;
- else
+ break;
+ case GL_BACK:
val = R300_CULL_BACK;
+ break;
+ case GL_FRONT_AND_BACK:
+ val = R300_CULL_FRONT | R300_CULL_BACK;
+ break;
+ default:
+ break;
+ }
+ }
- if (ctx->Polygon.FrontFace == GL_CW)
- val |= R300_FRONT_FACE_CW;
- else
- val |= R300_FRONT_FACE_CCW;
+ switch (ctx->Polygon.FrontFace) {
+ case GL_CW:
+ val |= R300_FRONT_FACE_CW;
+ break;
+ case GL_CCW:
+ val |= R300_FRONT_FACE_CCW;
+ break;
+ default:
+ break;
}
+
+ R300_STATECHANGE(r300, cul);
r300->hw.cul.cmd[R300_CUL_CULL] = val;
}
+static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(r300, occlusion_cntl);
+ if (state) {
+ r300->hw.occlusion_cntl.cmd[1] |= (3 << 0);
+ } else {
+ r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0);
+ }
+}
+
+static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+ struct r300_fragment_program *fp = (struct r300_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+ return (fp && fp->WritesDepth);
+ } else {
+ struct r500_fragment_program* fp =
+ (struct r500_fragment_program*)(char*)
+ ctx->FragmentProgram._Current;
+ return (fp && fp->writes_depth);
+ }
+}
+
static void r300SetEarlyZState(GLcontext * ctx)
{
- /* updates register R300_RB3D_EARLY_Z (0x4F14)
- if depth test is not enabled it should be R300_EARLY_Z_DISABLE
- if depth is enabled and alpha not it should be R300_EARLY_Z_ENABLE
- if depth and alpha is enabled it should be R300_EARLY_Z_DISABLE
- */
r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLuint topZ = R300_ZTOP_ENABLE;
- R300_STATECHANGE(r300, zstencil_format);
if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS)
- /* disable early Z */
- r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE;
- else {
- if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER)
- /* enable early Z */
- r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_ENABLE;
- else
- /* disable early Z */
- r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE;
+ topZ = R300_ZTOP_DISABLE;
+ if (current_fragment_program_writes_depth(ctx))
+ topZ = R300_ZTOP_DISABLE;
+
+ if (topZ != r300->hw.zstencil_format.cmd[2]) {
+ /* Note: This completely reemits the stencil format.
+ * I have not tested whether this is strictly necessary,
+ * or if emitting a write to ZB_ZTOP is enough.
+ */
+ R300_STATECHANGE(r300, zstencil_format);
+ r300->hw.zstencil_format.cmd[2] = topZ;
}
}
@@ -365,41 +496,43 @@ static void r300SetAlphaState(GLcontext * ctx)
switch (ctx->Color.AlphaFunc) {
case GL_NEVER:
- pp_misc |= R300_ALPHA_TEST_FAIL;
+ pp_misc |= R300_FG_ALPHA_FUNC_NEVER;
break;
case GL_LESS:
- pp_misc |= R300_ALPHA_TEST_LESS;
+ pp_misc |= R300_FG_ALPHA_FUNC_LESS;
break;
case GL_EQUAL:
- pp_misc |= R300_ALPHA_TEST_EQUAL;
+ pp_misc |= R300_FG_ALPHA_FUNC_EQUAL;
break;
case GL_LEQUAL:
- pp_misc |= R300_ALPHA_TEST_LEQUAL;
+ pp_misc |= R300_FG_ALPHA_FUNC_LE;
break;
case GL_GREATER:
- pp_misc |= R300_ALPHA_TEST_GREATER;
+ pp_misc |= R300_FG_ALPHA_FUNC_GREATER;
break;
case GL_NOTEQUAL:
- pp_misc |= R300_ALPHA_TEST_NEQUAL;
+ pp_misc |= R300_FG_ALPHA_FUNC_NOTEQUAL;
break;
case GL_GEQUAL:
- pp_misc |= R300_ALPHA_TEST_GEQUAL;
+ pp_misc |= R300_FG_ALPHA_FUNC_GE;
break;
case GL_ALWAYS:
- /*pp_misc |= R300_ALPHA_TEST_PASS; */
+ /*pp_misc |= FG_ALPHA_FUNC_ALWAYS; */
really_enabled = GL_FALSE;
break;
}
if (really_enabled) {
- pp_misc |= R300_ALPHA_TEST_ENABLE;
- pp_misc |= (refByte & R300_REF_ALPHA_MASK);
+ pp_misc |= R300_FG_ALPHA_FUNC_ENABLE;
+ pp_misc |= R500_FG_ALPHA_FUNC_8BIT;
+ pp_misc |= (refByte & R300_FG_ALPHA_FUNC_VAL_MASK);
} else {
pp_misc = 0x0;
}
R300_STATECHANGE(r300, at);
r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc;
+ r300->hw.at.cmd[R300_AT_UNKNOWN] = 0;
r300SetEarlyZState(ctx);
}
@@ -439,152 +572,53 @@ static void r300SetDepthState(GLcontext * ctx)
r300ContextPtr r300 = R300_CONTEXT(ctx);
R300_STATECHANGE(r300, zs);
- r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE;
- r300->hw.zs.cmd[R300_ZS_CNTL_1] &=
- ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT);
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE|R300_STENCIL_FRONT_BACK;
+ r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT);
- if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) {
+ if (ctx->Depth.Test) {
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE;
if (ctx->Depth.Mask)
- r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
- R300_RB3D_Z_TEST_AND_WRITE;
- else
- r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST;
-
- r300->hw.zs.cmd[R300_ZS_CNTL_1] |=
- translate_func(ctx->Depth.
- Func) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
- } else {
- r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_DISABLED_1;
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_WRITE_ENABLE;
r300->hw.zs.cmd[R300_ZS_CNTL_1] |=
- translate_func(GL_NEVER) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
+ translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT;
}
r300SetEarlyZState(ctx);
}
-static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq );
-
-/**
- * Handle glEnable()/glDisable().
- *
- * \note Mesa already filters redundant calls to glEnable/glDisable.
- */
-static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+static void r300SetStencilState(GLcontext * ctx, GLboolean state)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- GLuint p;
- if (RADEON_DEBUG & DEBUG_STATE)
- fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__,
- _mesa_lookup_enum_by_nr(cap),
- state ? "GL_TRUE" : "GL_FALSE");
-
- switch (cap) {
- /* Fast track this one...
- */
- case GL_TEXTURE_1D:
- case GL_TEXTURE_2D:
- case GL_TEXTURE_3D:
- break;
- case GL_FOG:
- R300_STATECHANGE(r300, fogs);
+ if (r300->state.stencil.hw_stencil) {
+ R300_STATECHANGE(r300, zs);
if (state) {
- r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FOG_ENABLE;
-
- ctx->Driver.Fogfv(ctx, GL_FOG_MODE, NULL);
- ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY,
- &ctx->Fog.Density);
- ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
- ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
- ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
+ R300_STENCIL_ENABLE;
} else {
- r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FOG_ENABLE;
- }
-
- break;
-
- case GL_ALPHA_TEST:
- r300SetAlphaState(ctx);
- break;
-
- case GL_BLEND:
- case GL_COLOR_LOGIC_OP:
- r300SetBlendState(ctx);
- break;
-
-
- case GL_CLIP_PLANE0:
- case GL_CLIP_PLANE1:
- case GL_CLIP_PLANE2:
- case GL_CLIP_PLANE3:
- case GL_CLIP_PLANE4:
- case GL_CLIP_PLANE5:
- /* no VAP UCP on non-TCL chipsets */
- if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- return;
-
- p = cap-GL_CLIP_PLANE0;
- R300_STATECHANGE( r300, vap_clip_cntl );
- if (state) {
- r300->hw.vap_clip_cntl.cmd[1] |= (R300_VAP_UCP_ENABLE_0<<p);
- r300ClipPlane( ctx, cap, NULL );
- }
- else {
- r300->hw.vap_clip_cntl.cmd[1] &= ~(R300_VAP_UCP_ENABLE_0<<p);
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] &=
+ ~R300_STENCIL_ENABLE;
}
- break;
- case GL_DEPTH_TEST:
- r300SetDepthState(ctx);
- break;
-
- case GL_STENCIL_TEST:
- if (r300->state.stencil.hw_stencil) {
- R300_STATECHANGE(r300, zs);
- if (state) {
- r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
- R300_RB3D_STENCIL_ENABLE;
- } else {
- r300->hw.zs.cmd[R300_ZS_CNTL_0] &=
- ~R300_RB3D_STENCIL_ENABLE;
- }
- } else {
+ } else {
#if R200_MERGED
- FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state);
+ FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state);
#endif
- }
- break;
-
- case GL_CULL_FACE:
- r300UpdateCulling(ctx);
- break;
-
- case GL_POLYGON_OFFSET_POINT:
- case GL_POLYGON_OFFSET_LINE:
- break;
-
- case GL_POLYGON_OFFSET_FILL:
- R300_STATECHANGE(r300, occlusion_cntl);
- if (state) {
- r300->hw.occlusion_cntl.cmd[1] |= (3 << 0);
- } else {
- r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0);
- }
- break;
- default:
- radeonEnable(ctx, cap, state);
- return;
}
}
static void r300UpdatePolygonMode(GLcontext * ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- uint32_t hw_mode = 0;
+ uint32_t hw_mode = R300_GA_POLY_MODE_DISABLE;
+ /* Only do something if a polygon mode is wanted, default is GL_FILL */
if (ctx->Polygon.FrontMode != GL_FILL ||
ctx->Polygon.BackMode != GL_FILL) {
GLenum f, b;
+ /* Handle GL_CW (clock wise and GL_CCW (counter clock wise)
+ * correctly by selecting the correct front and back face
+ */
if (ctx->Polygon.FrontFace == GL_CCW) {
f = ctx->Polygon.FrontMode;
b = ctx->Polygon.BackMode;
@@ -593,29 +627,30 @@ static void r300UpdatePolygonMode(GLcontext * ctx)
b = ctx->Polygon.FrontMode;
}
- hw_mode |= R300_PM_ENABLED;
+ /* Enable polygon mode */
+ hw_mode |= R300_GA_POLY_MODE_DUAL;
switch (f) {
case GL_LINE:
- hw_mode |= R300_PM_FRONT_LINE;
+ hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_LINE;
break;
- case GL_POINT: /* noop */
- hw_mode |= R300_PM_FRONT_POINT;
+ case GL_POINT:
+ hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_POINT;
break;
case GL_FILL:
- hw_mode |= R300_PM_FRONT_FILL;
+ hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
break;
}
switch (b) {
case GL_LINE:
- hw_mode |= R300_PM_BACK_LINE;
+ hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_LINE;
break;
- case GL_POINT: /* noop */
- hw_mode |= R300_PM_BACK_POINT;
+ case GL_POINT:
+ hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_POINT;
break;
case GL_FILL:
- hw_mode |= R300_PM_BACK_FILL;
+ hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_TRI;
break;
}
}
@@ -624,6 +659,9 @@ static void r300UpdatePolygonMode(GLcontext * ctx)
R300_STATECHANGE(r300, polygon_mode);
r300->hw.polygon_mode.cmd[1] = hw_mode;
}
+
+ r300->hw.polygon_mode.cmd[2] = 0x00000001;
+ r300->hw.polygon_mode.cmd[3] = 0x00000000;
}
/**
@@ -680,9 +718,10 @@ static void r300ColorMask(GLcontext * ctx,
GLboolean r, GLboolean g, GLboolean b, GLboolean a)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- int mask = (r ? R300_COLORMASK0_R : 0) |
- (g ? R300_COLORMASK0_G : 0) |
- (b ? R300_COLORMASK0_B : 0) | (a ? R300_COLORMASK0_A : 0);
+ int mask = (r ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
+ (g ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
+ (b ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
+ (a ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0);
if (mask != r300->hw.cmk.cmd[R300_CMK_COLORMASK]) {
R300_STATECHANGE(r300, cmk);
@@ -708,15 +747,13 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
switch (pname) {
case GL_FOG_MODE:
- if (!ctx->Fog.Enabled)
- return;
switch (ctx->Fog.Mode) {
case GL_LINEAR:
R300_STATECHANGE(r300, fogs);
r300->hw.fogs.cmd[R300_FOGS_STATE] =
(r300->hw.fogs.
- cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) |
- R300_FOG_MODE_LINEAR;
+ cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) |
+ R300_FG_FOG_BLEND_FN_LINEAR;
if (ctx->Fog.Start == ctx->Fog.End) {
fogScale.f = -1.0;
@@ -733,8 +770,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
R300_STATECHANGE(r300, fogs);
r300->hw.fogs.cmd[R300_FOGS_STATE] =
(r300->hw.fogs.
- cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) |
- R300_FOG_MODE_EXP;
+ cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) |
+ R300_FG_FOG_BLEND_FN_EXP;
fogScale.f = 0.0933 * ctx->Fog.Density;
fogStart.f = 0.0;
break;
@@ -742,8 +779,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
R300_STATECHANGE(r300, fogs);
r300->hw.fogs.cmd[R300_FOGS_STATE] =
(r300->hw.fogs.
- cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) |
- R300_FOG_MODE_EXP2;
+ cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) |
+ R300_FG_FOG_BLEND_FN_EXP2;
fogScale.f = 0.3 * ctx->Fog.Density;
fogStart.f = 0.0;
default:
@@ -801,14 +838,32 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
}
}
+static void r300SetFogState(GLcontext * ctx, GLboolean state)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(r300, fogs);
+ if (state) {
+ r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FG_FOG_BLEND_ENABLE;
+
+ r300Fogfv(ctx, GL_FOG_MODE, NULL);
+ r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
+ r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
+ r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
+ r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
+ } else {
+ r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FG_FOG_BLEND_ENABLE;
+ }
+}
+
/* =============================================================
* Point state
*/
static void r300PointSize(GLcontext * ctx, GLfloat size)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
-
- size = ctx->Point._Size;
+ /* same size limits for AA, non-AA points */
+ size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize);
R300_STATECHANGE(r300, ps);
r300->hw.ps.cmd[R300_PS_POINTSIZE] =
@@ -816,6 +871,31 @@ static void r300PointSize(GLcontext * ctx, GLfloat size)
((int)(size * 6) << R300_POINTSIZE_Y_SHIFT);
}
+static void r300PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ switch (pname) {
+ case GL_POINT_SIZE_MIN:
+ R300_STATECHANGE(r300, ga_point_minmax);
+ r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MIN_MASK;
+ r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MinSize * 6.0);
+ break;
+ case GL_POINT_SIZE_MAX:
+ R300_STATECHANGE(r300, ga_point_minmax);
+ r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MAX_MASK;
+ r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MaxSize * 6.0)
+ << R300_GA_POINT_MINMAX_MAX_SHIFT;
+ break;
+ case GL_POINT_DISTANCE_ATTENUATION:
+ break;
+ case GL_POINT_FADE_THRESHOLD_SIZE:
+ break;
+ default:
+ break;
+ }
+}
+
/* =============================================================
* Line state
*/
@@ -823,11 +903,12 @@ static void r300LineWidth(GLcontext * ctx, GLfloat widthf)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- widthf = ctx->Line._Width;
-
+ widthf = CLAMP(widthf,
+ ctx->Const.MinPointSize,
+ ctx->Const.MaxPointSize);
R300_STATECHANGE(r300, lcntl);
- r300->hw.lcntl.cmd[1] = (int)(widthf * 6.0);
- r300->hw.lcntl.cmd[1] |= R300_LINE_CNT_VE;
+ r300->hw.lcntl.cmd[1] =
+ R300_LINE_CNT_HO | R300_LINE_CNT_VE | (int)(widthf * 6.0);
}
static void r300PolygonMode(GLcontext * ctx, GLenum face, GLenum mode)
@@ -873,6 +954,7 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode)
r300ContextPtr rmesa = R300_CONTEXT(ctx);
R300_STATECHANGE(rmesa, shade);
+ rmesa->hw.shade.cmd[1] = 0x00000002;
switch (mode) {
case GL_FLAT:
rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT;
@@ -883,6 +965,8 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode)
default:
return;
}
+ rmesa->hw.shade.cmd[3] = 0x00000000;
+ rmesa->hw.shade.cmd[4] = 0x00000000;
}
static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
@@ -891,37 +975,36 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
r300ContextPtr rmesa = R300_CONTEXT(ctx);
GLuint refmask =
(((ctx->Stencil.
- Ref[0] & 0xff) << R300_RB3D_ZS2_STENCIL_REF_SHIFT) | ((ctx->
- Stencil.
- ValueMask
- [0] &
- 0xff)
- <<
- R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
+ Ref[0] & 0xff) << R300_STENCILREF_SHIFT) | ((ctx->
+ Stencil.
+ ValueMask
+ [0] &
+ 0xff)
+ <<
+ R300_STENCILMASK_SHIFT));
GLuint flag;
R300_STATECHANGE(rmesa, zs);
-
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK;
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK <<
- R300_RB3D_ZS1_FRONT_FUNC_SHIFT)
+ R300_S_FRONT_FUNC_SHIFT)
| (R300_ZS_MASK <<
- R300_RB3D_ZS1_BACK_FUNC_SHIFT));
+ R300_S_BACK_FUNC_SHIFT));
rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
- ~((R300_RB3D_ZS2_STENCIL_MASK <<
- R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
- (R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
+ ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) |
+ (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT));
flag = translate_func(ctx->Stencil.Function[0]);
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
- (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT);
+ (flag << R300_S_FRONT_FUNC_SHIFT);
if (ctx->Stencil._TestTwoSide)
flag = translate_func(ctx->Stencil.Function[1]);
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
- (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
+ (flag << R300_S_BACK_FUNC_SHIFT);
rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask;
}
@@ -931,11 +1014,12 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
R300_STATECHANGE(rmesa, zs);
rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
- ~(R300_RB3D_ZS2_STENCIL_MASK <<
- R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT);
+ ~(R300_STENCILREF_MASK <<
+ R300_STENCILWRITEMASK_SHIFT);
rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |=
(ctx->Stencil.
- WriteMask[0] & 0xff) << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT;
+ WriteMask[0] & R300_STENCILREF_MASK) <<
+ R300_STENCILWRITEMASK_SHIFT;
}
static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
@@ -946,49 +1030,37 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
R300_STATECHANGE(rmesa, zs);
/* It is easier to mask what's left.. */
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &=
- (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT) |
- (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) |
- (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
+ (R300_ZS_MASK << R300_Z_FUNC_SHIFT) |
+ (R300_ZS_MASK << R300_S_FRONT_FUNC_SHIFT) |
+ (R300_ZS_MASK << R300_S_BACK_FUNC_SHIFT);
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
(translate_stencil_op(ctx->Stencil.FailFunc[0]) <<
- R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT)
+ R300_S_FRONT_SFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) <<
- R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT)
+ R300_S_FRONT_ZFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) <<
- R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT);
+ R300_S_FRONT_ZPASS_OP_SHIFT);
if (ctx->Stencil._TestTwoSide) {
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
(translate_stencil_op(ctx->Stencil.FailFunc[1]) <<
- R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
+ R300_S_BACK_SFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZFailFunc[1]) <<
- R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT)
+ R300_S_BACK_ZFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZPassFunc[1]) <<
- R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT);
+ R300_S_BACK_ZPASS_OP_SHIFT);
} else {
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
(translate_stencil_op(ctx->Stencil.FailFunc[0]) <<
- R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
+ R300_S_BACK_SFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) <<
- R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT)
+ R300_S_BACK_ZFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) <<
- R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT);
+ R300_S_BACK_ZPASS_OP_SHIFT);
}
}
-static void r300ClearStencil(GLcontext * ctx, GLint s)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
-
- rmesa->state.stencil.clear =
- ((GLuint) (ctx->Stencil.Clear & 0xff) |
- (R300_RB3D_ZS2_STENCIL_MASK <<
- R300_RB3D_ZS2_STENCIL_MASK_SHIFT) | ((ctx->Stencil.
- WriteMask[0] & 0xff) <<
- R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT));
-}
-
/* =============================================================
* Window position and viewport transformation
*/
@@ -1076,12 +1148,12 @@ void r300UpdateDrawBuffer(GLcontext * ctx)
struct gl_framebuffer *fb = ctx->DrawBuffer;
driRenderbuffer *drb;
- if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
+ if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
/* draw to front */
drb =
(driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].
Renderbuffer;
- } else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) {
+ } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
/* draw to back */
drb =
(driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].
@@ -1253,8 +1325,8 @@ static unsigned long gen_fixed_filter(unsigned long f)
(R300_TX_CLAMP << R300_TX_WRAP_T_SHIFT)) {
needs_fixing |= 2;
}
- if ((f & ((7 - 1) << R300_TX_WRAP_Q_SHIFT)) ==
- (R300_TX_CLAMP << R300_TX_WRAP_Q_SHIFT)) {
+ if ((f & ((7 - 1) << R300_TX_WRAP_R_SHIFT)) ==
+ (R300_TX_CLAMP << R300_TX_WRAP_R_SHIFT)) {
needs_fixing |= 4;
}
@@ -1262,7 +1334,7 @@ static unsigned long gen_fixed_filter(unsigned long f)
return f;
mag = f & R300_TX_MAG_FILTER_MASK;
- min = f & R300_TX_MIN_FILTER_MASK;
+ min = f & (R300_TX_MIN_FILTER_MASK|R300_TX_MIN_FILTER_MIP_MASK);
/* TODO: Check for anisto filters too */
if ((mag != R300_TX_MAG_FILTER_NEAREST)
@@ -1294,12 +1366,100 @@ static unsigned long gen_fixed_filter(unsigned long f)
f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT;
}
if (needs_fixing & 4) {
- f &= ~((7 - 1) << R300_TX_WRAP_Q_SHIFT);
- f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_Q_SHIFT;
+ f &= ~((7 - 1) << R300_TX_WRAP_R_SHIFT);
+ f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT;
}
return f;
}
+static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int i;
+ struct r300_fragment_program *fp = (struct r300_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+ struct r300_fragment_program_code *code = &fp->code;
+
+ R300_STATECHANGE(r300, fpt);
+
+ for (i = 0; i < code->tex.length; i++) {
+ int unit;
+ int opcode;
+ unsigned long val;
+
+ unit = code->tex.inst[i] >> R300_TEX_ID_SHIFT;
+ unit &= 15;
+
+ val = code->tex.inst[i];
+ val &= ~R300_TEX_ID_MASK;
+
+ opcode =
+ (val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT;
+ if (opcode == R300_TEX_OP_KIL) {
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ } else {
+ if (tmu_mappings[unit] >= 0) {
+ val |=
+ tmu_mappings[unit] <<
+ R300_TEX_ID_SHIFT;
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ } else {
+ // We get here when the corresponding texture image is incomplete
+ // (e.g. incomplete mipmaps etc.)
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ }
+ }
+ }
+
+ r300->hw.fpt.cmd[R300_FPT_CMD_0] =
+ cmdpacket0(R300_US_TEX_INST_0, code->tex.length);
+}
+
+static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+{
+ int i;
+ struct r500_fragment_program *fp = (struct r500_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+ struct r500_fragment_program_code *code = &fp->code;
+
+ /* find all the texture instructions and relocate the texture units */
+ for (i = 0; i < code->inst_end + 1; i++) {
+ if ((code->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) {
+ uint32_t val;
+ int unit, opcode, new_unit;
+
+ val = code->inst[i].inst1;
+
+ unit = (val >> 16) & 0xf;
+
+ val &= ~(0xf << 16);
+
+ opcode = val & (0x7 << 22);
+ if (opcode == R500_TEX_INST_TEXKILL) {
+ new_unit = 0;
+ } else {
+ if (tmu_mappings[unit] >= 0) {
+ new_unit = tmu_mappings[unit];
+ } else {
+ new_unit = 0;
+ }
+ }
+ val |= R500_TEX_ID(new_unit);
+ code->inst[i].inst1 = val;
+ }
+ }
+}
+
+static GLuint translate_lod_bias(GLfloat bias)
+{
+ GLint b = (int)(bias*32);
+ if (b >= (1 << 9))
+ b = (1 << 9)-1;
+ else if (b < -(1 << 9))
+ b = -(1 << 9);
+ return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
+}
+
static void r300SetupTextures(GLcontext * ctx)
{
int i, mtu;
@@ -1363,8 +1523,14 @@ static void r300SetupTextures(GLcontext * ctx)
r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 +
hw_tmu] =
gen_fixed_filter(t->filter) | (hw_tmu << 28);
- /* Currently disabled! */
- r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80;
+ /* Note: There is a LOD bias per texture unit and a LOD bias
+ * per texture object. We add them here to get the correct behaviour.
+ * (The per-texture object LOD bias was introduced in OpenGL 1.4
+ * and is not present in the EXT_texture_object extension).
+ */
+ r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+ t->filter_1 |
+ translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias);
r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
t->size;
r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
@@ -1395,7 +1561,7 @@ static void r300SetupTextures(GLcontext * ctx)
}
r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_FILTER_0, last_hw_tmu + 1);
+ cmdpacket0(R300_TX_FILTER0_0, last_hw_tmu + 1);
r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
r300->hw.tex.size.cmd[R300_TEX_CMD_0] =
@@ -1403,7 +1569,7 @@ static void r300SetupTextures(GLcontext * ctx)
r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1);
r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] =
- cmdpacket0(R300_TX_PITCH_0, last_hw_tmu + 1);
+ cmdpacket0(R300_TX_FORMAT2_0, last_hw_tmu + 1);
r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1);
r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
@@ -1414,39 +1580,18 @@ static void r300SetupTextures(GLcontext * ctx)
if (!fp) /* should only happenen once, just after context is created */
return;
- R300_STATECHANGE(r300, fpt);
-
- for (i = 0; i < fp->tex.length; i++) {
- int unit;
- int opcode;
- unsigned long val;
-
- unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT;
- unit &= 15;
-
- val = fp->tex.inst[i];
- val &= ~R300_FPITX_IMAGE_MASK;
-
- opcode =
- (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT;
- if (opcode == R300_FPITX_OP_KIL) {
- r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
- } else {
- if (tmu_mappings[unit] >= 0) {
- val |=
- tmu_mappings[unit] <<
- R300_FPITX_IMAGE_SHIFT;
- r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
- } else {
- // We get here when the corresponding texture image is incomplete
- // (e.g. incomplete mipmaps etc.)
- r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
- }
+ if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+ if (fp->mesa_program.UsesKill && last_hw_tmu < 0) {
+ // The KILL operation requires the first texture unit
+ // to be enabled.
+ r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
+ r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
+ r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_FILTER0_0, 1);
}
- }
-
- r300->hw.fpt.cmd[R300_FPT_CMD_0] =
- cmdpacket0(R300_PFS_TEXI_0, fp->tex.length);
+ r300SetupFragmentShaderTextures(ctx, tmu_mappings);
+ } else
+ r500SetupFragmentShaderTextures(ctx, tmu_mappings);
if (RADEON_DEBUG & DEBUG_STATE)
fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n",
@@ -1466,28 +1611,22 @@ static void r300SetupRSUnit(GLcontext * ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
/* I'm still unsure if these are needed */
- GLuint interp_magic[8] = {
- 0x00,
- R300_RS_INTERP_1_UNKNOWN,
- R300_RS_INTERP_2_UNKNOWN,
- R300_RS_INTERP_3_UNKNOWN,
- 0x00,
- 0x00,
- 0x00,
- 0x00
- };
+ GLuint interp_col[8];
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
union r300_outputs_written OutputsWritten;
GLuint InputsRead;
int fp_reg, high_rr;
- int in_texcoords, col_interp_nr;
- int i;
+ int col_interp_nr;
+ int rs_tex_count = 0, rs_col_count = 0;
+ int i, count;
+
+ memset(interp_col, 0, sizeof(interp_col));
if (hw_tcl_on)
- OutputsWritten.vp_outputs =
- CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+ OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
else
- RENDERINPUTS_COPY(OutputsWritten.index_bitset,
- r300->state.render_inputs_bitset);
+ RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset);
if (ctx->FragmentProgram._Current)
InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
@@ -1500,9 +1639,9 @@ static void r300SetupRSUnit(GLcontext * ctx)
R300_STATECHANGE(r300, rc);
R300_STATECHANGE(r300, rr);
- fp_reg = in_texcoords = col_interp_nr = high_rr = 0;
+ fp_reg = col_interp_nr = high_rr = 0;
- r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0;
+ r300->hw.rr.cmd[R300_RR_INST_1] = 0;
if (InputsRead & FRAG_BIT_WPOS) {
for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
@@ -1518,96 +1657,267 @@ static void r300SetupRSUnit(GLcontext * ctx)
InputsRead &= ~FRAG_BIT_WPOS;
}
+ if (InputsRead & FRAG_BIT_COL0) {
+ count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
+ interp_col[0] |= R300_RS_COL_PTR(rs_col_count);
+ if (count == 3)
+ interp_col[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB1);
+ rs_col_count += count;
+ }
+ else
+ interp_col[0] = R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+
+ if (InputsRead & FRAG_BIT_COL1) {
+ count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
+ if (count == 3)
+ interp_col[1] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB0);
+ interp_col[1] |= R300_RS_COL_PTR(1);
+ rs_col_count += count;
+ }
+
+
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0
- | R300_RS_INTERP_USED
- | (in_texcoords << R300_RS_INTERP_SRC_SHIFT)
- | interp_magic[i];
+ int swiz;
- r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0;
+ /* with TCL we always seem to route 4 components */
+ if (hw_tcl_on)
+ count = 4;
+ else
+ count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
+
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count;
+ switch(count) {
+ case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break;
+ case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
+ default:
+ case 1:
+ case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
+ };
+
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + i] |= swiz;
+
+ r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0;
if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+
+ rs_tex_count += count;
+
//assert(r300->state.texture.tc_count != 0);
- r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R300_RS_ROUTE_ENABLE | i /* source INTERP */
- | (fp_reg << R300_RS_ROUTE_DEST_SHIFT);
+ r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R300_RS_INST_TEX_CN_WRITE | i /* source INTERP */
+ | (fp_reg << R300_RS_INST_TEX_ADDR_SHIFT);
high_rr = fp_reg;
- if (!R300_OUTPUTS_WRITTEN_TEST
- (OutputsWritten, VERT_RESULT_TEX0 + i,
- _TNL_ATTRIB_TEX(i))) {
- /* Passing invalid data here can lock the GPU. */
- WARN_ONCE
- ("fragprog wants coords for tex%d, vp doesn't provide them!\n",
- i);
- //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base);
- //_mesa_exit(-1);
+ /* Passing invalid data here can lock the GPU. */
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
+ InputsRead &= ~(FRAG_BIT_TEX0 << i);
+ fp_reg++;
+ } else {
+ WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
}
- InputsRead &= ~(FRAG_BIT_TEX0 << i);
- fp_reg++;
}
- /* Need to count all coords enabled at vof */
- if (R300_OUTPUTS_WRITTEN_TEST
- (OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i)))
- in_texcoords++;
}
if (InputsRead & FRAG_BIT_COL0) {
- if (!R300_OUTPUTS_WRITTEN_TEST
- (OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
- WARN_ONCE
- ("fragprog wants col0, vp doesn't provide it\n");
- goto out; /* FIXME */
- //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base);
- //_mesa_exit(-1);
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
+ r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT);
+ InputsRead &= ~FRAG_BIT_COL0;
+ col_interp_nr++;
+ } else {
+ WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
}
+ }
+
+ if (InputsRead & FRAG_BIT_COL1) {
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
+ r300->hw.rr.cmd[R300_RR_INST_1] |= R300_RS_INST_COL_ID(1) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT);
+ InputsRead &= ~FRAG_BIT_COL1;
+ if (high_rr < 1)
+ high_rr = 1;
+ col_interp_nr++;
+ } else {
+ WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+ }
+ }
- r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
- | R300_RS_ROUTE_0_COLOR
- | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT);
- InputsRead &= ~FRAG_BIT_COL0;
+ /* Need at least one. This might still lock as the values are undefined... */
+ if (rs_tex_count == 0 && col_interp_nr == 0) {
+ r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT);
col_interp_nr++;
}
- out:
+
+ r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT)
+ | (col_interp_nr << R300_IC_COUNT_SHIFT)
+ | R300_HIRES_EN;
+
+ assert(high_rr >= 0);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1);
+ r300->hw.rc.cmd[2] = high_rr;
+
+ if (InputsRead)
+ WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
+}
+
+static void r500SetupRSUnit(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ /* I'm still unsure if these are needed */
+ GLuint interp_col[8];
+ union r300_outputs_written OutputsWritten;
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint InputsRead;
+ int fp_reg, high_rr;
+ int rs_col_count = 0;
+ int in_texcoords, col_interp_nr;
+ int i, count;
+
+ memset(interp_col, 0, sizeof(interp_col));
+ if (hw_tcl_on)
+ OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+ else
+ RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset);
+
+ if (ctx->FragmentProgram._Current)
+ InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
+ else {
+ fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+ return; /* This should only ever happen once.. */
+ }
+
+ R300_STATECHANGE(r300, ri);
+ R300_STATECHANGE(r300, rc);
+ R300_STATECHANGE(r300, rr);
+
+ fp_reg = col_interp_nr = high_rr = in_texcoords = 0;
+
+ r300->hw.rr.cmd[R300_RR_INST_1] = 0;
+
+ if (InputsRead & FRAG_BIT_WPOS) {
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+ if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
+ break;
+
+ if (i == ctx->Const.MaxTextureUnits) {
+ fprintf(stderr, "\tno free texcoord found...\n");
+ _mesa_exit(-1);
+ }
+
+ InputsRead |= (FRAG_BIT_TEX0 << i);
+ InputsRead &= ~FRAG_BIT_WPOS;
+ }
+
+ if (InputsRead & FRAG_BIT_COL0) {
+ count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
+ interp_col[0] |= R500_RS_COL_PTR(rs_col_count);
+ if (count == 3)
+ interp_col[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1);
+ rs_col_count += count;
+ }
+ else
+ interp_col[0] = R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
if (InputsRead & FRAG_BIT_COL1) {
- if (!R300_OUTPUTS_WRITTEN_TEST
- (OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
- WARN_ONCE
- ("fragprog wants col1, vp doesn't provide it\n");
- //_mesa_exit(-1);
+ count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
+ interp_col[1] |= R500_RS_COL_PTR(1);
+ if (count == 3)
+ interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB0);
+ rs_col_count += count;
+ }
+
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ GLuint swiz = 0;
+
+ /* with TCL we always seem to route 4 components */
+ if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+
+ if (hw_tcl_on)
+ count = 4;
+ else
+ count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
+
+ /* always have on texcoord */
+ swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT;
+ if (count >= 2)
+ swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT;
+ else
+ swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
+
+ if (count >= 3)
+ swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT;
+ else
+ swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
+
+ if (count == 4)
+ swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT;
+ else
+ swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
+
+ } else
+ swiz = (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
+
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | swiz;
+
+ r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0;
+ if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+ //assert(r300->state.texture.tc_count != 0);
+ r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */
+ | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT);
+ high_rr = fp_reg;
+
+ /* Passing invalid data here can lock the GPU. */
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
+ InputsRead &= ~(FRAG_BIT_TEX0 << i);
+ fp_reg++;
+ } else {
+ WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
+ }
}
+ }
- r300->hw.rr.cmd[R300_RR_ROUTE_1] |=
- R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 |
- (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT);
- InputsRead &= ~FRAG_BIT_COL1;
- if (high_rr < 1)
- high_rr = 1;
- col_interp_nr++;
+ if (InputsRead & FRAG_BIT_COL0) {
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
+ r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
+ InputsRead &= ~FRAG_BIT_COL0;
+ col_interp_nr++;
+ } else {
+ WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+ }
+ }
+
+ if (InputsRead & FRAG_BIT_COL1) {
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
+ r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
+ InputsRead &= ~FRAG_BIT_COL1;
+ if (high_rr < 1)
+ high_rr = 1;
+ col_interp_nr++;
+ } else {
+ WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+ }
}
/* Need at least one. This might still lock as the values are undefined... */
if (in_texcoords == 0 && col_interp_nr == 0) {
- r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
- | R300_RS_ROUTE_0_COLOR
- | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT);
+ r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
col_interp_nr++;
}
- r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_RS_CNTL_TC_CNT_SHIFT)
- | (col_interp_nr << R300_RS_CNTL_CI_CNT_SHIFT)
- | R300_RS_CNTL_0_UNKNOWN_18;
+ r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_IT_COUNT_SHIFT)
+ | (col_interp_nr << R300_IC_COUNT_SHIFT)
+ | R300_HIRES_EN;
assert(high_rr >= 0);
- r300->hw.rr.cmd[R300_RR_CMD_0] =
- cmdpacket0(R300_RS_ROUTE_0, high_rr + 1);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1);
r300->hw.rc.cmd[2] = 0xC0 | high_rr;
if (InputsRead)
- WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n",
- InputsRead);
+ WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
}
-#define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
+
+
#define bump_vpu_count(ptr, new_count) do{\
drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
@@ -1616,9 +1926,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
if(_nc>_p->vpu.count)_p->vpu.count=_nc;\
}while(0)
-void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, struct
- r300_vertex_shader_fragment
- *vsf)
+static INLINE void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, struct r300_vertex_shader_fragment *vsf)
{
int i;
@@ -1626,8 +1934,7 @@ void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, s
return;
if (vsf->length & 0x3) {
- fprintf(stderr,
- "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n");
+ fprintf(stderr, "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n");
_mesa_exit(-1);
}
@@ -1635,147 +1942,172 @@ void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, s
case 0:
R300_STATECHANGE(r300, vpi);
for (i = 0; i < vsf->length; i++)
- r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i +
- 4 * (dest & 0xff)] = (vsf->body.d[i]);
- bump_vpu_count(r300->hw.vpi.cmd,
- vsf->length + 4 * (dest & 0xff));
+ r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]);
+ bump_vpu_count(r300->hw.vpi.cmd, vsf->length + 4 * (dest & 0xff));
break;
case 2:
R300_STATECHANGE(r300, vpp);
for (i = 0; i < vsf->length; i++)
- r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i +
- 4 * (dest & 0xff)] = (vsf->body.d[i]);
- bump_vpu_count(r300->hw.vpp.cmd,
- vsf->length + 4 * (dest & 0xff));
+ r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]);
+ bump_vpu_count(r300->hw.vpp.cmd, vsf->length + 4 * (dest & 0xff));
break;
case 4:
R300_STATECHANGE(r300, vps);
for (i = 0; i < vsf->length; i++)
- r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] =
- (vsf->body.d[i]);
- bump_vpu_count(r300->hw.vps.cmd,
- vsf->length + 4 * (dest & 0xff));
+ r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]);
+ bump_vpu_count(r300->hw.vps.cmd, vsf->length + 4 * (dest & 0xff));
break;
default:
- fprintf(stderr,
- "%s:%s don't know how to handle dest %04x\n",
- __FILE__, __FUNCTION__, dest);
+ fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
_mesa_exit(-1);
}
}
-/* just a skeleton for now.. */
+#define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c))
-/* Generate a vertex shader that simply transforms vertex and texture coordinates,
- while leaving colors intact. Nothing fancy (like lights)
- If implementing lights make a copy first, so it is easy to switch between the two versions */
-static void r300GenerateSimpleVertexShader(r300ContextPtr r300)
+static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
+ GLuint output_count, GLuint temp_count)
{
- int i;
- GLuint o_reg = 0;
-
- /* Allocate parameters */
- r300->state.vap_param.transform_offset = 0x0; /* transform matrix */
- r300->state.vertex_shader.param_offset = 0x0;
- r300->state.vertex_shader.param_count = 0x4; /* 4 vector values - 4x4 matrix */
-
- r300->state.vertex_shader.program_start = 0x0;
- r300->state.vertex_shader.unknown_ptr1 = 0x4; /* magic value ? */
- r300->state.vertex_shader.program_end = 0x0;
-
- r300->state.vertex_shader.unknown_ptr2 = 0x0; /* magic value */
- r300->state.vertex_shader.unknown_ptr3 = 0x4; /* magic value */
+ int vtx_mem_size;
+ int pvs_num_slots;
+ int pvs_num_cntrls;
+
+ /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS.
+ * See r500 docs 6.5.2 - done in emit */
+
+ /* avoid division by zero */
+ if (input_count == 0) input_count = 1;
+ if (output_count == 0) output_count = 1;
+ if (temp_count == 0) temp_count = 1;
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ vtx_mem_size = 128;
+ else
+ vtx_mem_size = 72;
+
+ pvs_num_slots = MIN3(10, vtx_mem_size/input_count, vtx_mem_size/output_count);
+ pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count);
+
+ R300_STATECHANGE(rmesa, vap_cntl);
+ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] =
+ (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) |
+ (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (12 << R300_VF_MAX_VTX_NUM_SHIFT);
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= R500_TCL_STATE_OPTIMIZATION;
+ } else
+ /* not sure about non-tcl */
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (5 << R300_VF_MAX_VTX_NUM_SHIFT));
+
+ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+ else
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT);
- r300->state.vertex_shader.unknown1.length = 0;
- r300->state.vertex_shader.unknown2.length = 0;
+}
-#define WRITE_OP(oper,source1,source2,source3) {\
- r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].op=(oper); \
- r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[0]=(source1); \
- r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[1]=(source2); \
- r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[2]=(source3); \
- r300->state.vertex_shader.program_end++; \
+static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa)
+{
+ struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader);
+ GLuint o_reg = 0;
+ GLuint i_reg = 0;
+ int i;
+ int inst_count = 0;
+ int param_count = 0;
+ int program_end = 0;
+
+ for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) {
+ if (rmesa->state.sw_tcl_inputs[i] != -1) {
+ prog->program.body.i[program_end + 0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, GL_FALSE, GL_FALSE, o_reg++, VSF_FLAG_ALL, PVS_DST_REG_OUT);
+ prog->program.body.i[program_end + 1] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
+ prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
+ prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
+ program_end += 4;
+ i_reg++;
+ }
}
- for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++)
- if (r300->state.sw_tcl_inputs[i] != -1) {
- WRITE_OP(EASY_VSF_OP(MUL, o_reg++, ALL, RESULT),
- VSF_REG(r300->state.sw_tcl_inputs[i]),
- VSF_ATTR_UNITY(r300->state.
- sw_tcl_inputs[i]),
- VSF_UNITY(r300->state.sw_tcl_inputs[i])
- )
+ prog->program.length = program_end;
- }
+ r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START,
+ &(prog->program));
+ inst_count = (prog->program.length / 4) - 1;
- r300->state.vertex_shader.program_end--; /* r300 wants program length to be one more - no idea why */
- r300->state.vertex_shader.program.length =
- (r300->state.vertex_shader.program_end + 1) * 4;
+ r300VapCntl(rmesa, i_reg, o_reg, 0);
- r300->state.vertex_shader.unknown_ptr1 = r300->state.vertex_shader.program_end; /* magic value ? */
- r300->state.vertex_shader.unknown_ptr2 = r300->state.vertex_shader.program_end; /* magic value ? */
- r300->state.vertex_shader.unknown_ptr3 = r300->state.vertex_shader.program_end; /* magic value ? */
+ R300_STATECHANGE(rmesa, pvs);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
+ (0 << R300_PVS_FIRST_INST_SHIFT) |
+ (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (inst_count << R300_PVS_LAST_INST_SHIFT);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] =
+ (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
+ (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] =
+ (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+}
+static int bit_count (int x)
+{
+ x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U);
+ x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U);
+ x = (x >> 16) + (x & 0xffff);
+ x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f);
+ return (x >> 8) + (x & 0x00ff);
}
-static void r300SetupVertexProgram(r300ContextPtr rmesa)
+static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
{
GLcontext *ctx = rmesa->radeon.glCtx;
- int inst_count;
- int param_count;
- struct r300_vertex_program *prog =
- (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
+ struct r300_vertex_program *prog = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
+ int inst_count = 0;
+ int param_count = 0;
- ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
+ /* FIXME: r300SetupVertexProgramFragment */
R300_STATECHANGE(rmesa, vpp);
param_count =
- r300VertexProgUpdateParams(ctx, (struct r300_vertex_program_cont *)
- ctx->VertexProgram._Current /*prog */ ,
+ r300VertexProgUpdateParams(ctx,
+ (struct r300_vertex_program_cont *)
+ ctx->VertexProgram._Current,
(float *)&rmesa->hw.vpp.
cmd[R300_VPP_PARAM_0]);
bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
param_count /= 4;
- /* Reset state, in case we don't use something */
- ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
- ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
+ r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program));
+ inst_count = (prog->program.length / 4) - 1;
- setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, &(prog->program));
-
-#if 0
- setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1,
- &(rmesa->state.vertex_shader.unknown1));
- setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2,
- &(rmesa->state.vertex_shader.unknown2));
-#endif
-
- inst_count = prog->program.length / 4 - 1;
+ r300VapCntl(rmesa, bit_count(prog->key.InputsRead),
+ bit_count(prog->key.OutputsWritten), prog->num_temporaries);
R300_STATECHANGE(rmesa, pvs);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
- (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT)
- | (inst_count /*pos_end */ << R300_PVS_CNTL_1_POS_END_SHIFT)
- | (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT);
+ (0 << R300_PVS_FIRST_INST_SHIFT) |
+ (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (inst_count << R300_PVS_LAST_INST_SHIFT);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] =
- (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT)
- | (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT);
+ (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
+ (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] =
- (0 /*rmesa->state.vertex_shader.unknown_ptr2 */ <<
- R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT)
- | (inst_count /*rmesa->state.vertex_shader.unknown_ptr3 */ <<
- 0);
-
- /* This is done for vertex shader fragments, but also needs to be done for vap_pvs,
- so I leave it as a reminder */
-#if 0
- reg_start(R300_VAP_PVS_WAITIDLE, 0);
- e32(0x00000000);
-#endif
+ (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
}
-static void r300SetupVertexShader(r300ContextPtr rmesa)
+static void r300SetupVertexProgram(r300ContextPtr rmesa)
{
GLcontext *ctx = rmesa->radeon.glCtx;
@@ -1788,50 +2120,71 @@ static void r300SetupVertexShader(r300ContextPtr rmesa)
0x400 area might have something to do with pixel shaders as it appears right after pfs programming.
0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */
//setup_vertex_shader_fragment(rmesa, 0x406, &unk4);
- if (hw_tcl_on
- && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->
- translated) {
- r300SetupVertexProgram(rmesa);
- return;
+ if (hw_tcl_on && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->translated) {
+ r300SetupRealVertexProgram(rmesa);
+ } else {
+ /* FIXME: This needs to be replaced by vertex shader generation code. */
+ r300SetupDefaultVertexProgram(rmesa);
}
- /* This needs to be replaced by vertex shader generation code */
- r300GenerateSimpleVertexShader(rmesa);
-
- setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM,
- &(rmesa->state.vertex_shader.program));
-
-#if 0
- setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1,
- &(rmesa->state.vertex_shader.unknown1));
- setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2,
- &(rmesa->state.vertex_shader.unknown2));
-#endif
+}
- R300_STATECHANGE(rmesa, pvs);
- rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
- (rmesa->state.vertex_shader.
- program_start << R300_PVS_CNTL_1_PROGRAM_START_SHIFT)
- | (rmesa->state.vertex_shader.
- unknown_ptr1 << R300_PVS_CNTL_1_POS_END_SHIFT)
- | (rmesa->state.vertex_shader.
- program_end << R300_PVS_CNTL_1_PROGRAM_END_SHIFT);
- rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] =
- (rmesa->state.vertex_shader.
- param_offset << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT)
- | (rmesa->state.vertex_shader.
- param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT);
- rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] =
- (rmesa->state.vertex_shader.
- unknown_ptr2 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT)
- | (rmesa->state.vertex_shader.unknown_ptr3 << 0);
+/**
+ * Enable/Disable states.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+ if (RADEON_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__,
+ _mesa_lookup_enum_by_nr(cap),
+ state ? "GL_TRUE" : "GL_FALSE");
- /* This is done for vertex shader fragments, but also needs to be done for vap_pvs,
- so I leave it as a reminder */
-#if 0
- reg_start(R300_VAP_PVS_WAITIDLE, 0);
- e32(0x00000000);
-#endif
+ switch (cap) {
+ case GL_TEXTURE_1D:
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_3D:
+ /* empty */
+ break;
+ case GL_FOG:
+ r300SetFogState(ctx, state);
+ break;
+ case GL_ALPHA_TEST:
+ r300SetAlphaState(ctx);
+ break;
+ case GL_COLOR_LOGIC_OP:
+ r300SetLogicOpState(ctx);
+ /* fall-through, because logic op overrides blending */
+ case GL_BLEND:
+ r300SetBlendState(ctx);
+ break;
+ case GL_CLIP_PLANE0:
+ case GL_CLIP_PLANE1:
+ case GL_CLIP_PLANE2:
+ case GL_CLIP_PLANE3:
+ case GL_CLIP_PLANE4:
+ case GL_CLIP_PLANE5:
+ r300SetClipPlaneState(ctx, cap, state);
+ break;
+ case GL_DEPTH_TEST:
+ r300SetDepthState(ctx);
+ break;
+ case GL_STENCIL_TEST:
+ r300SetStencilState(ctx, state);
+ break;
+ case GL_CULL_FACE:
+ r300UpdateCulling(ctx);
+ break;
+ case GL_POLYGON_OFFSET_POINT:
+ case GL_POLYGON_OFFSET_LINE:
+ case GL_POLYGON_OFFSET_FILL:
+ r300SetPolygonOffsetState(ctx, state);
+ break;
+ default:
+ radeonEnable(ctx, cap, state);
+ break;
+ }
}
/**
@@ -1848,12 +2201,6 @@ static void r300ResetHwState(r300ContextPtr r300)
if (RADEON_DEBUG & DEBUG_STATE)
fprintf(stderr, "%s\n", __FUNCTION__);
- /* This is a place to initialize registers which
- have bitfields accessed by different functions
- and not all bits are used */
-
- /* go and compute register values from GL state */
-
r300UpdateWindow(ctx);
r300ColorMask(ctx,
@@ -1879,17 +2226,11 @@ static void r300ResetHwState(r300ContextPtr r300)
r300UpdateTextureState(ctx);
r300SetBlendState(ctx);
+ r300SetLogicOpState(ctx);
r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
- /* Initialize magic registers
- TODO : learn what they really do, or get rid of
- those we don't have to touch */
- if (!has_tcl)
- r300->hw.vap_cntl.cmd[1] = 0x0014045a;
- else
- r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */
r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA
| R300_VPORT_X_OFFSET_ENA
| R300_VPORT_Y_SCALE_ENA
@@ -1898,123 +2239,137 @@ static void r300ResetHwState(r300ContextPtr r300)
| R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT;
r300->hw.vte.cmd[2] = 0x00000008;
- r300->hw.unk2134.cmd[1] = 0x00FFFFFF;
- r300->hw.unk2134.cmd[2] = 0x00000000;
- if (_mesa_little_endian())
- r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP;
- else
- r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP;
+ r300->hw.vap_vf_max_vtx_indx.cmd[1] = 0x00FFFFFF;
+ r300->hw.vap_vf_max_vtx_indx.cmd[2] = 0x00000000;
+
+#ifdef MESA_LITTLE_ENDIAN
+ r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP;
+#else
+ r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP;
+#endif
/* disable VAP/TCL on non-TCL capable chips */
if (!has_tcl)
r300->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS;
- r300->hw.unk21DC.cmd[1] = 0xAAAAAAAA;
+ r300->hw.vap_psc_sgn_norm_cntl.cmd[1] = 0xAAAAAAAA;
- r300->hw.vap_clip_cntl.cmd[1] = R300_221C_NORMAL;
+ /* XXX: Other families? */
+ if (has_tcl) {
+ r300->hw.vap_clip_cntl.cmd[1] = R300_PS_UCP_MODE_DIST_COP;
- r300->hw.unk2220.cmd[1] = r300PackFloat32(1.0);
- r300->hw.unk2220.cmd[2] = r300PackFloat32(1.0);
- r300->hw.unk2220.cmd[3] = r300PackFloat32(1.0);
- r300->hw.unk2220.cmd[4] = r300PackFloat32(1.0);
+ r300->hw.vap_clip.cmd[1] = r300PackFloat32(1.0); /* X */
+ r300->hw.vap_clip.cmd[2] = r300PackFloat32(1.0); /* X */
+ r300->hw.vap_clip.cmd[3] = r300PackFloat32(1.0); /* Y */
+ r300->hw.vap_clip.cmd[4] = r300PackFloat32(1.0); /* Y */
- /* what about other chips than r300 or rv350??? */
- if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300)
- r300->hw.unk2288.cmd[1] = R300_2288_R300;
- else
- r300->hw.unk2288.cmd[1] = R300_2288_RV350;
+ switch (r300->radeon.radeonScreen->chip_family) {
+ case CHIP_FAMILY_R300:
+ r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_R300;
+ break;
+ default:
+ r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_RV350;
+ break;
+ }
+ }
r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE
| R300_GB_LINE_STUFF_ENABLE
- | R300_GB_TRIANGLE_STUFF_ENABLE /*| R300_GB_UNK31 */ ;
+ | R300_GB_TRIANGLE_STUFF_ENABLE;
r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666;
r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666;
- if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) ||
- (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350))
- r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
- R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_R300 |
- R300_GB_TILE_SIZE_16;
- else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410)
- r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
- R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_RV410 |
- R300_GB_TILE_SIZE_16;
- else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)
- r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
- R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_R420 |
- R300_GB_TILE_SIZE_16;
- else
- r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
- R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_RV300 |
- R300_GB_TILE_SIZE_16;
- /* set to 0 when fog is disabled? */
+
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
+ R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16 /*| R300_GB_SUBPIXEL_1_16*/;
+ switch (r300->radeon.radeonScreen->num_gb_pipes) {
+ case 1:
+ default:
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+ R300_GB_TILE_PIPE_COUNT_RV300;
+ break;
+ case 2:
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+ R300_GB_TILE_PIPE_COUNT_R300;
+ break;
+ case 3:
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+ R300_GB_TILE_PIPE_COUNT_R420_3P;
+ break;
+ case 4:
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+ R300_GB_TILE_PIPE_COUNT_R420;
+ break;
+ }
+
+ /* XXX: set to 0 when fog is disabled? */
r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = R300_GB_FOG_SELECT_1_1_W;
- r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = R300_AA_DISABLE; /* No antialiasing */
- r300->hw.unk4200.cmd[1] = r300PackFloat32(0.0);
- r300->hw.unk4200.cmd[2] = r300PackFloat32(0.0);
- r300->hw.unk4200.cmd[3] = r300PackFloat32(1.0);
- r300->hw.unk4200.cmd[4] = r300PackFloat32(1.0);
+ /* XXX: Enable anti-aliasing? */
+ r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE;
- r300->hw.unk4214.cmd[1] = 0x00050005;
+ r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0);
+ r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0);
+ r300->hw.ga_point_s0.cmd[3] = r300PackFloat32(1.0);
+ r300->hw.ga_point_s0.cmd[4] = r300PackFloat32(1.0);
- r300PointSize(ctx, 0.0);
+ r300->hw.ga_triangle_stipple.cmd[1] = 0x00050005;
- r300->hw.unk4230.cmd[1] = 0x18000006;
- r300->hw.unk4230.cmd[2] = 0x00020006;
- r300->hw.unk4230.cmd[3] = r300PackFloat32(1.0 / 192.0);
+ r300PointSize(ctx, 1.0);
- r300LineWidth(ctx, 0.0);
+ r300->hw.ga_point_minmax.cmd[1] = 0x18000006;
+ r300->hw.ga_point_minmax.cmd[2] = 0x00020006;
+ r300->hw.ga_point_minmax.cmd[3] = r300PackFloat32(1.0 / 192.0);
- r300->hw.unk4260.cmd[1] = 0;
- r300->hw.unk4260.cmd[2] = r300PackFloat32(0.0);
- r300->hw.unk4260.cmd[3] = r300PackFloat32(1.0);
+ r300LineWidth(ctx, 1.0);
+
+ r300->hw.ga_line_stipple.cmd[1] = 0;
+ r300->hw.ga_line_stipple.cmd[2] = r300PackFloat32(0.0);
+ r300->hw.ga_line_stipple.cmd[3] = r300PackFloat32(1.0);
- r300->hw.shade.cmd[1] = 0x00000002;
r300ShadeModel(ctx, ctx->Light.ShadeModel);
- r300->hw.shade.cmd[3] = 0x00000000;
- r300->hw.shade.cmd[4] = 0x00000000;
r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode);
r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode);
- r300->hw.polygon_mode.cmd[2] = 0x00000001;
- r300->hw.polygon_mode.cmd[3] = 0x00000000;
r300->hw.zbias_cntl.cmd[1] = 0x00000000;
r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor,
ctx->Polygon.OffsetUnits);
+ r300Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint);
+ r300Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine);
r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill);
- r300->hw.unk42C0.cmd[1] = 0x4B7FFFFF;
- r300->hw.unk42C0.cmd[2] = 0x00000000;
+ r300->hw.su_depth_scale.cmd[1] = 0x4B7FFFFF;
+ r300->hw.su_depth_scale.cmd[2] = 0x00000000;
- r300->hw.unk43A4.cmd[1] = 0x0000001C;
- r300->hw.unk43A4.cmd[2] = 0x2DA49525;
+ r300->hw.sc_hyperz.cmd[1] = 0x0000001C;
+ r300->hw.sc_hyperz.cmd[2] = 0x2DA49525;
- r300->hw.unk43E8.cmd[1] = 0x00FFFFFF;
+ r300->hw.sc_screendoor.cmd[1] = 0x00FFFFFF;
- r300->hw.unk46A4.cmd[1] = 0x00001B01;
- r300->hw.unk46A4.cmd[2] = 0x00001B0F;
- r300->hw.unk46A4.cmd[3] = 0x00001B0F;
- r300->hw.unk46A4.cmd[4] = 0x00001B0F;
- r300->hw.unk46A4.cmd[5] = 0x00000001;
+ r300->hw.us_out_fmt.cmd[1] = R500_OUT_FMT_C4_8 |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[2] = R500_OUT_FMT_UNUSED |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[3] = R500_OUT_FMT_UNUSED |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W24;
r300Enable(ctx, GL_FOG, ctx->Fog.Enabled);
- ctx->Driver.Fogfv(ctx, GL_FOG_MODE, NULL);
- ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
- ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
- ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
- ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
- ctx->Driver.Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL);
+ r300Fogfv(ctx, GL_FOG_MODE, NULL);
+ r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
+ r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
+ r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
+ r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
+ r300Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL);
- r300->hw.at.cmd[R300_AT_UNKNOWN] = 0;
- r300->hw.unk4BD8.cmd[1] = 0;
+ r300->hw.fg_depth_src.cmd[1] = 0;
- r300->hw.unk4E00.cmd[1] = 0;
+ r300->hw.rb3d_cctl.cmd[1] = 0;
r300BlendColor(ctx, ctx->Color.BlendColor);
- r300->hw.blend_color.cmd[2] = 0;
- r300->hw.blend_color.cmd[3] = 0;
/* Again, r300ClearBuffer uses this */
r300->hw.cb.cmd[R300_CB_OFFSET] =
@@ -2030,39 +2385,20 @@ static void r300ResetHwState(r300ContextPtr r300)
if (r300->radeon.sarea->tiling_enabled)
r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
- r300->hw.unk4E50.cmd[1] = 0;
- r300->hw.unk4E50.cmd[2] = 0;
- r300->hw.unk4E50.cmd[3] = 0;
- r300->hw.unk4E50.cmd[4] = 0;
- r300->hw.unk4E50.cmd[5] = 0;
- r300->hw.unk4E50.cmd[6] = 0;
- r300->hw.unk4E50.cmd[7] = 0;
- r300->hw.unk4E50.cmd[8] = 0;
- r300->hw.unk4E50.cmd[9] = 0;
-
- r300->hw.unk4E88.cmd[1] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[1] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[2] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[3] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[4] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[5] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[6] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[7] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[8] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[9] = 0;
- r300->hw.unk4EA0.cmd[1] = 0x00000000;
- r300->hw.unk4EA0.cmd[2] = 0xffffffff;
+ r300->hw.rb3d_aaresolve_ctl.cmd[1] = 0;
- switch (ctx->Visual.depthBits) {
- case 16:
- r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z;
- break;
- case 24:
- r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z;
- break;
- default:
- fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
- ctx->Visual.depthBits);
- _mesa_exit(-1);
-
- }
- /* z compress? */
- //r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32;
-
- r300->hw.zstencil_format.cmd[3] = 0x00000003;
- r300->hw.zstencil_format.cmd[4] = 0x00000000;
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
r300->hw.zb.cmd[R300_ZB_OFFSET] =
r300->radeon.radeonScreen->depthOffset +
@@ -2070,37 +2406,51 @@ static void r300ResetHwState(r300ContextPtr r300)
r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
if (r300->radeon.sarea->tiling_enabled) {
- /* Turn off when clearing buffers ? */
- r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTH_TILE_ENABLE;
+ /* XXX: Turn off when clearing buffers ? */
+ r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE;
if (ctx->Visual.depthBits == 24)
r300->hw.zb.cmd[R300_ZB_PITCH] |=
- R300_DEPTH_MICROTILE_ENABLE;
+ R300_DEPTHMICROTILE_TILED;
+ }
+
+ r300->hw.zb_depthclearvalue.cmd[1] = 0;
+
+ switch (ctx->Visual.depthBits) {
+ case 16:
+ r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_16BIT_INT_Z;
+ break;
+ case 24:
+ r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ break;
+ default:
+ fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits);
+ _mesa_exit(-1);
}
- r300->hw.unk4F28.cmd[1] = 0;
+ r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE;
+ r300->hw.zstencil_format.cmd[3] = 0x00000003;
+ r300->hw.zstencil_format.cmd[4] = 0x00000000;
+ r300SetEarlyZState(ctx);
r300->hw.unk4F30.cmd[1] = 0;
r300->hw.unk4F30.cmd[2] = 0;
- r300->hw.unk4F44.cmd[1] = 0;
+ r300->hw.zb_hiz_offset.cmd[1] = 0;
- r300->hw.unk4F54.cmd[1] = 0;
+ r300->hw.zb_hiz_pitch.cmd[1] = 0;
+ r300VapCntl(r300, 0, 0, 0);
if (has_tcl) {
r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0;
r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0;
r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0);
r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0;
}
-//END: TODO
+
r300->hw.all_dirty = GL_TRUE;
}
-
-extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx);
-
-extern int future_hw_tcl_on;
void r300UpdateShaders(r300ContextPtr rmesa)
{
GLcontext *ctx;
@@ -2136,18 +2486,40 @@ void r300UpdateShaders(r300ContextPtr rmesa)
hw_tcl_on = future_hw_tcl_on = 0;
r300ResetHwState(rmesa);
+ r300UpdateStateParameters(ctx, _NEW_PROGRAM);
return;
}
- r300UpdateStateParameters(ctx, _NEW_PROGRAM);
}
+ r300UpdateStateParameters(ctx, _NEW_PROGRAM);
+}
+static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
+ struct gl_program *program, struct prog_src_register srcreg)
+{
+ static const GLfloat dummy[4] = { 0, 0, 0, 0 };
+
+ switch(srcreg.File) {
+ case PROGRAM_LOCAL_PARAM:
+ return program->LocalParams[srcreg.Index];
+ case PROGRAM_ENV_PARAM:
+ return ctx->FragmentProgram.Parameters[srcreg.Index];
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ return program->Parameters->ParameterValues[srcreg.Index];
+ default:
+ _mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n");
+ return dummy;
+ }
}
+
static void r300SetupPixelShader(r300ContextPtr rmesa)
{
GLcontext *ctx = rmesa->radeon.glCtx;
struct r300_fragment_program *fp = (struct r300_fragment_program *)
(char *)ctx->FragmentProgram._Current;
+ struct r300_fragment_program_code *code;
int i, k;
if (!fp) /* should only happenen once, just after context is created */
@@ -2159,62 +2531,143 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
__FUNCTION__);
return;
}
-#define OUTPUT_FIELD(st, reg, field) \
- R300_STATECHANGE(rmesa, st); \
- for(i=0;i<=fp->alu_end;i++) \
- rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=fp->alu.inst[i].field;\
- rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmdpacket0(reg, fp->alu_end+1);
+ code = &fp->code;
- OUTPUT_FIELD(fpi[0], R300_PFS_INSTR0_0, inst0);
- OUTPUT_FIELD(fpi[1], R300_PFS_INSTR1_0, inst1);
- OUTPUT_FIELD(fpi[2], R300_PFS_INSTR2_0, inst2);
- OUTPUT_FIELD(fpi[3], R300_PFS_INSTR3_0, inst3);
-#undef OUTPUT_FIELD
+ r300SetupTextures(ctx);
+
+ R300_STATECHANGE(rmesa, fpi[0]);
+ R300_STATECHANGE(rmesa, fpi[1]);
+ R300_STATECHANGE(rmesa, fpi[2]);
+ R300_STATECHANGE(rmesa, fpi[3]);
+ rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu.length);
+ rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu.length);
+ rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu.length);
+ rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++) {
+ rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0;
+ rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1;
+ rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2;
+ rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3;
+ }
R300_STATECHANGE(rmesa, fp);
+ rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3);
+ rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx;
+ rmesa->hw.fp.cmd[R300_FP_CNTL2] =
+ (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
+ ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
+ (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
+ ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
/* I just want to say, the way these nodes are stored.. weird.. */
- for (i = 0, k = (4 - (fp->cur_node + 1)); i < 4; i++, k++) {
- if (i < (fp->cur_node + 1)) {
+ for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) {
+ if (i < (code->cur_node + 1)) {
rmesa->hw.fp.cmd[R300_FP_NODE0 + k] =
- (fp->node[i].
- alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT)
- | (fp->node[i].
- alu_end << R300_PFS_NODE_ALU_END_SHIFT)
- | (fp->node[i].
- tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT)
- | (fp->node[i].
- tex_end << R300_PFS_NODE_TEX_END_SHIFT)
- | fp->node[i].flags; /* ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0); */
+ (code->node[i].alu_offset << R300_ALU_START_SHIFT) |
+ (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) |
+ (code->node[i].tex_offset << R300_TEX_START_SHIFT) |
+ (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) |
+ code->node[i].flags;
} else {
rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0;
}
}
- /* PFS_CNTL_0 */
- rmesa->hw.fp.cmd[R300_FP_CNTL0] =
- fp->cur_node | (fp->first_node_has_tex << 3);
- /* PFS_CNTL_1 */
- rmesa->hw.fp.cmd[R300_FP_CNTL1] = fp->max_temp_idx;
- /* PFS_CNTL_2 */
- rmesa->hw.fp.cmd[R300_FP_CNTL2] =
- (fp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
- | (fp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT)
- | (fp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
- | (fp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT);
-
R300_STATECHANGE(rmesa, fpp);
- for (i = 0; i < fp->const_nr; i++) {
- rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] =
- r300PackFloat24(fp->constant[i][0]);
- rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] =
- r300PackFloat24(fp->constant[i][1]);
- rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] =
- r300PackFloat24(fp->constant[i][2]);
- rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] =
- r300PackFloat24(fp->constant[i][3]);
+ rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4);
+ for (i = 0; i < code->const_nr; i++) {
+ const GLfloat *constant = get_fragmentprogram_constant(ctx,
+ &fp->mesa_program.Base, code->constant[i]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]);
+ }
+}
+
+#define bump_r500fp_count(ptr, new_count) do{\
+ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
+ int _nc=(new_count)/6; \
+ assert(_nc < 256); \
+ if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\
+} while(0)
+
+#define bump_r500fp_const_count(ptr, new_count) do{\
+ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
+ int _nc=(new_count)/4; \
+ assert(_nc < 256); \
+ if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\
+} while(0)
+
+static void r500SetupPixelShader(r300ContextPtr rmesa)
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+ struct r500_fragment_program *fp = (struct r500_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+ int i;
+ struct r500_fragment_program_code *code;
+
+ if (!fp) /* should only happenen once, just after context is created */
+ return;
+
+ ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0;
+
+ r500TranslateFragmentShader(rmesa, fp);
+ if (!fp->translated) {
+ fprintf(stderr, "%s: No valid fragment shader, exiting\n",
+ __FUNCTION__);
+ return;
+ }
+ code = &fp->code;
+
+ if (fp->mesa_program.FogOption != GL_NONE) {
+ /* Enable HW fog. Try not to squish GL context.
+ * (Anybody sane remembered to set glFog() opts first!) */
+ r300SetFogState(ctx, GL_TRUE);
+ ctx->Fog.Mode = fp->mesa_program.FogOption;
+ r300Fogfv(ctx, GL_FOG_MODE, NULL);
+ } else
+ /* Make sure HW is matching GL context. */
+ r300SetFogState(ctx, ctx->Fog.Enabled);
+
+ r300SetupTextures(ctx);
+
+ R300_STATECHANGE(rmesa, fp);
+ rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx;
+
+ rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] =
+ R500_US_CODE_START_ADDR(code->inst_offset) |
+ R500_US_CODE_END_ADDR(code->inst_end);
+ rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] =
+ R500_US_CODE_RANGE_ADDR(code->inst_offset) |
+ R500_US_CODE_RANGE_SIZE(code->inst_end);
+ rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] =
+ R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */
+
+ R300_STATECHANGE(rmesa, r500fp);
+ /* Emit our shader... */
+ for (i = 0; i < code->inst_end+1; i++) {
+ rmesa->hw.r500fp.cmd[i*6+1] = code->inst[i].inst0;
+ rmesa->hw.r500fp.cmd[i*6+2] = code->inst[i].inst1;
+ rmesa->hw.r500fp.cmd[i*6+3] = code->inst[i].inst2;
+ rmesa->hw.r500fp.cmd[i*6+4] = code->inst[i].inst3;
+ rmesa->hw.r500fp.cmd[i*6+5] = code->inst[i].inst4;
+ rmesa->hw.r500fp.cmd[i*6+6] = code->inst[i].inst5;
+ }
+
+ bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6);
+
+ R300_STATECHANGE(rmesa, r500fp_const);
+ for (i = 0; i < code->const_nr; i++) {
+ const GLfloat *constant = get_fragmentprogram_constant(ctx,
+ &fp->mesa_program.Base, code->constant[i]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]);
}
- rmesa->hw.fpp.cmd[R300_FPP_CMD_0] =
- cmdpacket0(R300_PFS_PARAM_0_X, fp->const_nr * 4);
+ bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4);
+
}
void r300UpdateShaderStates(r300ContextPtr rmesa)
@@ -2223,13 +2676,29 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
ctx = rmesa->radeon.glCtx;
r300UpdateTextureState(ctx);
+ r300SetEarlyZState(ctx);
- r300SetupPixelShader(rmesa);
- r300SetupTextures(ctx);
+ GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
+ if (current_fragment_program_writes_depth(ctx))
+ fgdepthsrc = R300_FG_DEPTH_SRC_SHADER;
+ if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) {
+ R300_STATECHANGE(rmesa, fg_depth_src);
+ rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc;
+ }
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ r500SetupPixelShader(rmesa);
+ else
+ r300SetupPixelShader(rmesa);
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ r500SetupRSUnit(ctx);
+ else
+ r300SetupRSUnit(ctx);
if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- r300SetupVertexShader(rmesa);
- r300SetupRSUnit(ctx);
+ r300SetupVertexProgram(rmesa);
+
}
/**
@@ -2269,16 +2738,12 @@ void r300InitState(r300ContextPtr r300)
switch (ctx->Visual.depthBits) {
case 16:
r300->state.depth.scale = 1.0 / (GLfloat) 0xffff;
- depth_fmt = R300_DEPTH_FORMAT_16BIT_INT_Z;
- r300->state.stencil.clear = 0x00000000;
+ depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z;
break;
case 24:
r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff;
- depth_fmt = R300_DEPTH_FORMAT_24BIT_INT_Z;
- r300->state.stencil.clear = 0x00ff0000;
+ depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
break;
-
-
default:
fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
ctx->Visual.depthBits);
@@ -2301,29 +2766,15 @@ static void r300RenderMode(GLcontext * ctx, GLenum mode)
(void)mode;
}
-static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
- GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
-
- R300_STATECHANGE( rmesa, vpucp[p] );
- rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
- rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
- rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2];
- rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3];
-}
-
-
void r300UpdateClipPlanes( GLcontext *ctx )
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
GLuint p;
-
+
for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
-
+
R300_STATECHANGE( rmesa, vpucp[p] );
rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
@@ -2353,9 +2804,12 @@ void r300InitStateFuncs(struct dd_function_table *functions)
functions->Fogfv = r300Fogfv;
functions->FrontFace = r300FrontFace;
functions->ShadeModel = r300ShadeModel;
+ functions->LogicOpcode = r300LogicOpcode;
+
+ /* ARB_point_parameters */
+ functions->PointParameterfv = r300PointParameter;
/* Stencil related */
- functions->ClearStencil = r300ClearStencil;
functions->StencilFuncSeparate = r300StencilFuncSeparate;
functions->StencilMaskSeparate = r300StencilMaskSeparate;
functions->StencilOpSeparate = r300StencilOpSeparate;
diff --git a/r300/r300_state.h b/r300/r300_state.h
index 21a49b7..0589ab7 100644
--- a/r300/r300_state.h
+++ b/r300/r300_state.h
@@ -37,8 +37,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_context.h"
+#define R300_NEWPRIM( rmesa ) \
+ do { \
+ if ( rmesa->dma.flush ) \
+ rmesa->dma.flush( rmesa ); \
+ } while (0)
+
#define R300_STATECHANGE(r300, atom) \
do { \
+ R300_NEWPRIM(r300); \
r300->hw.atom.dirty = GL_TRUE; \
r300->hw.is_dirty = GL_TRUE; \
} while(0)
@@ -58,13 +65,16 @@ do { \
\
} while (0)
-extern void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state);
-extern void r300InitState(r300ContextPtr r300);
-extern void r300InitStateFuncs(struct dd_function_table *functions);
-extern void r300UpdateViewportOffset(GLcontext * ctx);
-extern void r300UpdateDrawBuffer(GLcontext * ctx);
-
-extern void r300UpdateShaders(r300ContextPtr rmesa);
-extern void r300UpdateShaderStates(r300ContextPtr rmesa);
+// r300_state.c
+extern int future_hw_tcl_on;
+void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx);
+void r300UpdateViewportOffset (GLcontext * ctx);
+void r300UpdateDrawBuffer (GLcontext * ctx);
+void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state);
+void r300UpdateShaders (r300ContextPtr rmesa);
+void r300UpdateShaderStates (r300ContextPtr rmesa);
+void r300InitState (r300ContextPtr r300);
+void r300UpdateClipPlanes (GLcontext * ctx);
+void r300InitStateFuncs (struct dd_function_table *functions);
#endif /* __R300_STATE_H__ */
diff --git a/r300/r300_swtcl.c b/r300/r300_swtcl.c
new file mode 100644
index 0000000..8aebd9b
--- /dev/null
+++ b/r300/r300_swtcl.c
@@ -0,0 +1,697 @@
+/**************************************************************************
+
+Copyright (C) 2007 Dave Airlie
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Dave Airlie <airlied@linux.ie>
+ */
+
+/* derived from r200 swtcl path */
+
+
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "enums.h"
+#include "image.h"
+#include "imports.h"
+#include "light.h"
+#include "macros.h"
+
+#include "swrast/s_context.h"
+#include "swrast/s_fog.h"
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "r300_context.h"
+#include "r300_swtcl.h"
+#include "r300_state.h"
+#include "r300_ioctl.h"
+#include "r300_emit.h"
+#include "r300_mem.h"
+
+static void flush_last_swtcl_prim( r300ContextPtr rmesa );
+
+
+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset);
+void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
+#define EMIT_ATTR( ATTR, STYLE ) \
+do { \
+ rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \
+ rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \
+ rmesa->swtcl.vertex_attr_count++; \
+} while (0)
+
+#define EMIT_PAD( N ) \
+do { \
+ rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \
+ rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \
+ rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \
+ rmesa->swtcl.vertex_attr_count++; \
+} while (0)
+
+static void r300SetVertexFormat( GLcontext *ctx )
+{
+ r300ContextPtr rmesa = R300_CONTEXT( ctx );
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ DECLARE_RENDERINPUTS(index_bitset);
+ GLuint InputsRead = 0, OutputsWritten = 0;
+ int vap_fmt_0 = 0;
+ int vap_vte_cntl = 0;
+ int offset = 0;
+ int vte = 0;
+ GLint inputs[VERT_ATTRIB_MAX];
+ GLint tab[VERT_ATTRIB_MAX];
+ int swizzle[VERT_ATTRIB_MAX][4];
+ GLuint i, nr;
+ GLuint sz, vap_fmt_1 = 0;
+
+ DECLARE_RENDERINPUTS(render_inputs_bitset);
+ RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
+ RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+ RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
+
+ vte = rmesa->hw.vte.cmd[1];
+ vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT);
+ /* Important:
+ */
+ if ( VB->NdcPtr != NULL ) {
+ VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+ vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT;
+ }
+ else {
+ VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+ vte |= R300_VTX_W0_FMT;
+ }
+
+ assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+ rmesa->swtcl.vertex_attr_count = 0;
+
+ /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+ * build up a hardware vertex.
+ */
+ if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) {
+ sz = VB->AttribPtr[VERT_ATTRIB_POS]->size;
+ InputsRead |= 1 << VERT_ATTRIB_POS;
+ OutputsWritten |= 1 << VERT_RESULT_HPOS;
+ EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 );
+ offset = sz;
+ } else {
+ offset = 4;
+ EMIT_PAD(4 * sizeof(float));
+ }
+
+ if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
+ EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
+ vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+ offset += 1;
+ }
+
+ if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) {
+ sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size;
+ rmesa->swtcl.coloroffset = offset;
+ InputsRead |= 1 << VERT_ATTRIB_COLOR0;
+ OutputsWritten |= 1 << VERT_RESULT_COL0;
+ EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 );
+ offset += sz;
+ }
+
+ rmesa->swtcl.specoffset = 0;
+ if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+ sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size;
+ rmesa->swtcl.specoffset = offset;
+ EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 );
+ InputsRead |= 1 << VERT_ATTRIB_COLOR1;
+ OutputsWritten |= 1 << VERT_RESULT_COL1;
+ }
+
+ if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+ int i;
+
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
+ sz = VB->TexCoordPtr[i]->size;
+ InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
+ OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
+ EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 );
+ vap_fmt_1 |= sz << (3 * i);
+ }
+ }
+ }
+
+ for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (InputsRead & (1 << i)) {
+ inputs[i] = nr++;
+ } else {
+ inputs[i] = -1;
+ }
+ }
+
+ /* Fixed, apply to vir0 only */
+ if (InputsRead & (1 << VERT_ATTRIB_POS))
+ inputs[VERT_ATTRIB_POS] = 0;
+ if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
+ inputs[VERT_ATTRIB_COLOR0] = 2;
+ if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
+ inputs[VERT_ATTRIB_COLOR1] = 3;
+ for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
+ if (InputsRead & (1 << i))
+ inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+
+ for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (InputsRead & (1 << i)) {
+ tab[nr++] = i;
+ }
+ }
+
+ for (i = 0; i < nr; i++) {
+ int ci;
+
+ swizzle[i][0] = SWIZZLE_ZERO;
+ swizzle[i][1] = SWIZZLE_ZERO;
+ swizzle[i][2] = SWIZZLE_ZERO;
+ swizzle[i][3] = SWIZZLE_ONE;
+
+ for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) {
+ swizzle[i][ci] = ci;
+ }
+ }
+
+ R300_NEWPRIM(rmesa);
+ R300_STATECHANGE(rmesa, vir[0]);
+ ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
+ r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
+ VB->AttribPtr, inputs, tab, nr);
+ R300_STATECHANGE(rmesa, vir[1]);
+ ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
+ r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
+ nr);
+
+ R300_STATECHANGE(rmesa, vic);
+ rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+ rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+
+ R300_STATECHANGE(rmesa, vof);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
+
+ rmesa->swtcl.vertex_size =
+ _tnl_install_attrs( ctx,
+ rmesa->swtcl.vertex_attrs,
+ rmesa->swtcl.vertex_attr_count,
+ NULL, 0 );
+
+ rmesa->swtcl.vertex_size /= 4;
+
+ RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
+
+
+ R300_STATECHANGE(rmesa, vte);
+ rmesa->hw.vte.cmd[1] = vte;
+ rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size;
+}
+
+
+/* Flush vertices in the current dma region.
+ */
+static void flush_last_swtcl_prim( r300ContextPtr rmesa )
+{
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ rmesa->dma.flush = NULL;
+
+ if (rmesa->dma.current.buf) {
+ struct r300_dma_region *current = &rmesa->dma.current;
+ GLuint current_offset = GET_START(current);
+
+ assert (current->start +
+ rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+ current->ptr);
+
+ if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+
+ r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
+
+ r300EmitState(rmesa);
+
+ r300EmitVertexAOS( rmesa,
+ rmesa->swtcl.vertex_size,
+ current_offset);
+
+ r300EmitVbufPrim( rmesa,
+ rmesa->swtcl.hw_primitive,
+ rmesa->swtcl.numverts);
+
+ r300EmitCacheFlush(rmesa);
+ }
+
+ rmesa->swtcl.numverts = 0;
+ current->start = current->ptr;
+ }
+}
+
+/* Alloc space in the current dma region.
+ */
+static void *
+r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
+{
+ GLuint bytes = vsize * nverts;
+
+ if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
+ r300RefillCurrentDmaRegion( rmesa, bytes);
+
+ if (!rmesa->dma.flush) {
+ rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+ rmesa->dma.flush = flush_last_swtcl_prim;
+ }
+
+ ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+ ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
+ ASSERT( rmesa->dma.current.start +
+ rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+ rmesa->dma.current.ptr );
+
+ {
+ GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
+ rmesa->dma.current.ptr += bytes;
+ rmesa->swtcl.numverts += nverts;
+ return head;
+ }
+}
+
+static GLuint reduced_prim[] = {
+ GL_POINTS,
+ GL_LINES,
+ GL_LINES,
+ GL_LINES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+};
+
+static void r300RasterPrimitive( GLcontext *ctx, GLuint prim );
+static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
+//static void r300ResetLineStipple( GLcontext *ctx );
+
+/***********************************************************************
+ * Emit primitives as inline vertices *
+ ***********************************************************************/
+
+
+#define HAVE_POINTS 1
+#define HAVE_LINES 1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES 1
+#define HAVE_TRI_STRIPS 1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS 1
+#define HAVE_QUADS 0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS 1
+#define HAVE_ELTS 1
+
+#undef LOCAL_VARS
+#undef ALLOC_VERTS
+#define CTX_ARG r300ContextPtr rmesa
+#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 )
+#define LOCAL_VARS \
+ r300ContextPtr rmesa = R300_CONTEXT(ctx); \
+ const char *r300verts = (char *)rmesa->swtcl.verts;
+#define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
+#define VERTEX r300Vertex
+#define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
+#define PRINT_VERTEX(x)
+#undef TAG
+#define TAG(x) r300_##x
+#include "tnl_dd/t_dd_triemit.h"
+
+
+
+/***********************************************************************
+ * Macros for t_dd_tritmp.h to draw basic primitives *
+ ***********************************************************************/
+
+#define QUAD( a, b, c, d ) r300_quad( rmesa, a, b, c, d )
+#define TRI( a, b, c ) r300_triangle( rmesa, a, b, c )
+#define LINE( a, b ) r300_line( rmesa, a, b )
+#define POINT( a ) r300_point( rmesa, a )
+
+/***********************************************************************
+ * Build render functions from dd templates *
+ ***********************************************************************/
+
+#define R300_TWOSIDE_BIT 0x01
+#define R300_UNFILLED_BIT 0x02
+#define R300_MAX_TRIFUNC 0x04
+
+static struct {
+ tnl_points_func points;
+ tnl_line_func line;
+ tnl_triangle_func triangle;
+ tnl_quad_func quad;
+} rast_tab[R300_MAX_TRIFUNC];
+
+#define DO_FALLBACK 0
+#define DO_UNFILLED (IND & R300_UNFILLED_BIT)
+#define DO_TWOSIDE (IND & R300_TWOSIDE_BIT)
+#define DO_FLAT 0
+#define DO_OFFSET 0
+#define DO_TRI 1
+#define DO_QUAD 1
+#define DO_LINE 1
+#define DO_POINTS 1
+#define DO_FULL_QUAD 1
+
+#define HAVE_RGBA 1
+#define HAVE_SPEC 1
+#define HAVE_BACK_COLORS 0
+#define HAVE_HW_FLATSHADE 1
+#define TAB rast_tab
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
+
+/* Only used to pull back colors into vertices (ie, we know color is
+ * floating point).
+ */
+#define R300_COLOR( dst, src ) \
+do { \
+ UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \
+ UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \
+ UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \
+ UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]); \
+} while (0)
+
+#define VERT_SET_RGBA( v, c ) if (coloroffset) R300_COLOR( v->ub4[coloroffset], c )
+#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx ) if (coloroffset) color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx]
+
+#define R300_SPEC( dst, src ) \
+do { \
+ UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \
+ UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \
+ UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \
+} while (0)
+
+#define VERT_SET_SPEC( v, c ) if (specoffset) R300_SPEC( v->ub4[specoffset], c )
+#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset])
+#define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+
+#undef LOCAL_VARS
+#undef TAG
+#undef INIT
+
+#define LOCAL_VARS(n) \
+ r300ContextPtr rmesa = R300_CONTEXT(ctx); \
+ GLuint color[n], spec[n]; \
+ GLuint coloroffset = rmesa->swtcl.coloroffset; \
+ GLuint specoffset = rmesa->swtcl.specoffset; \
+ (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+
+/***********************************************************************
+ * Helpers for rendering unfilled primitives *
+ ***********************************************************************/
+
+#define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] )
+#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#undef TAG
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ * Generate GL render functions *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R300_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R300_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+
+static void init_rast_tab( void )
+{
+ init();
+ init_twoside();
+ init_unfilled();
+ init_twoside_unfilled();
+}
+
+/**********************************************************************/
+/* Render unclipped begin/end objects */
+/**********************************************************************/
+
+#define RENDER_POINTS( start, count ) \
+ for ( ; start < count ; start++) \
+ r300_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+ r300_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 ) \
+ r300_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+ r300_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do { \
+ r300RenderPrimitive( ctx, x ); \
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS \
+ r300ContextPtr rmesa = R300_CONTEXT(ctx); \
+ const GLuint vertsize = rmesa->swtcl.vertex_size; \
+ const char *r300verts = (char *)rmesa->swtcl.verts; \
+ const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \
+ const GLboolean stipple = ctx->Line.StippleFlag; \
+ (void) elt; (void) stipple;
+#define RESET_STIPPLE //if ( stipple ) r200ResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) r300_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) r300_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+
+/**********************************************************************/
+/* Choose render functions */
+/**********************************************************************/
+static void r300ChooseRenderState( GLcontext *ctx )
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLuint index = 0;
+ GLuint flags = ctx->_TriangleCaps;
+
+ if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
+ if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT;
+
+ if (index != rmesa->swtcl.RenderIndex) {
+ tnl->Driver.Render.Points = rast_tab[index].points;
+ tnl->Driver.Render.Line = rast_tab[index].line;
+ tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+ tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+ tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+ if (index == 0) {
+ tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts;
+ tnl->Driver.Render.PrimTabElts = r300_render_tab_elts;
+ tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly;
+ } else {
+ tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+ tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+ tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+ }
+
+ rmesa->swtcl.RenderIndex = index;
+ }
+}
+
+
+static void r300RenderStart(GLcontext *ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT( ctx );
+ // fprintf(stderr, "%s\n", __FUNCTION__);
+
+ r300ChooseRenderState(ctx);
+ r300SetVertexFormat(ctx);
+
+ r300UpdateShaders(rmesa);
+ r300UpdateShaderStates(rmesa);
+
+ r300EmitCacheFlush(rmesa);
+
+ if (rmesa->dma.flush != 0 &&
+ rmesa->dma.flush != flush_last_swtcl_prim)
+ rmesa->dma.flush( rmesa );
+
+}
+
+static void r300RenderFinish(GLcontext *ctx)
+{
+}
+
+static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ if (rmesa->swtcl.hw_primitive != hwprim) {
+ R300_NEWPRIM( rmesa );
+ rmesa->swtcl.hw_primitive = hwprim;
+ }
+}
+
+static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
+{
+
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ rmesa->swtcl.render_primitive = prim;
+
+ if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+ return;
+
+ r300RasterPrimitive( ctx, reduced_prim[prim] );
+ // fprintf(stderr, "%s\n", __FUNCTION__);
+
+}
+
+static void r300ResetLineStipple(GLcontext *ctx)
+{
+
+
+}
+
+void r300InitSwtcl(GLcontext *ctx)
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ static int firsttime = 1;
+
+ if (firsttime) {
+ init_rast_tab();
+ firsttime = 0;
+ }
+
+ tnl->Driver.Render.Start = r300RenderStart;
+ tnl->Driver.Render.Finish = r300RenderFinish;
+ tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
+ tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple;
+ tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+ tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+ tnl->Driver.Render.Interp = _tnl_interp;
+
+ /* FIXME: what are these numbers? */
+ _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+ 48 * sizeof(GLfloat) );
+
+ rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+ rmesa->swtcl.RenderIndex = ~0;
+ rmesa->swtcl.render_primitive = GL_TRIANGLES;
+ rmesa->swtcl.hw_primitive = 0;
+
+ _tnl_invalidate_vertex_state( ctx, ~0 );
+ _tnl_invalidate_vertices( ctx, ~0 );
+ RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
+
+ _tnl_need_projected_coords( ctx, GL_FALSE );
+ r300ChooseRenderState(ctx);
+
+ _mesa_validate_all_lighting_tables( ctx );
+
+ tnl->Driver.NotifyMaterialChange =
+ _mesa_validate_all_lighting_tables;
+}
+
+void r300DestroySwtcl(GLcontext *ctx)
+{
+}
+
+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
+{
+ int cmd_reserved = 0;
+ int cmd_written = 0;
+
+ drm_radeon_cmd_header_t *cmd = NULL;
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n",
+ __FUNCTION__, vertex_size, offset);
+
+ start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
+ e32(1);
+ e32(vertex_size | (vertex_size << 8));
+ e32(offset);
+}
+
+void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
+{
+
+ int cmd_reserved = 0;
+ int cmd_written = 0;
+ int type, num_verts;
+ drm_radeon_cmd_header_t *cmd = NULL;
+
+ type = r300PrimitiveType(rmesa, primitive);
+ num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
+
+ start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
+ e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
+}
diff --git a/r300/r300_swtcl.h b/r300/r300_swtcl.h
new file mode 100644
index 0000000..2ea6ced
--- /dev/null
+++ b/r300/r300_swtcl.h
@@ -0,0 +1,45 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com> - original r200 code
+ * Dave Airlie <airlied@linux.ie>
+ */
+
+#ifndef __R300_SWTCL_H__
+#define __R300_SWTCL_H__
+
+#include "mtypes.h"
+#include "swrast/swrast.h"
+#include "r300_context.h"
+
+extern void r300InitSwtcl( GLcontext *ctx );
+extern void r300DestroySwtcl( GLcontext *ctx );
+
+#endif
diff --git a/r300/r300_tex.c b/r300/r300_tex.c
index 2a21c61..f7f4972 100644
--- a/r300/r300_tex.c
+++ b/r300/r300_tex.c
@@ -52,129 +52,59 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "xmlpool.h"
+
+static unsigned int translate_wrap_mode(GLenum wrapmode)
+{
+ switch(wrapmode) {
+ case GL_REPEAT: return R300_TX_REPEAT;
+ case GL_CLAMP: return R300_TX_CLAMP;
+ case GL_CLAMP_TO_EDGE: return R300_TX_CLAMP_TO_EDGE;
+ case GL_CLAMP_TO_BORDER: return R300_TX_CLAMP_TO_BORDER;
+ case GL_MIRRORED_REPEAT: return R300_TX_REPEAT | R300_TX_MIRRORED;
+ case GL_MIRROR_CLAMP_EXT: return R300_TX_CLAMP | R300_TX_MIRRORED;
+ case GL_MIRROR_CLAMP_TO_EDGE_EXT: return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
+ case GL_MIRROR_CLAMP_TO_BORDER_EXT: return R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
+ default:
+ _mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__);
+ return 0;
+ }
+}
+
+
/**
- * Set the texture wrap modes.
+ * Update the cached hardware registers based on the current texture wrap modes.
*
* \param t Texture object whose wrap modes are to be set
- * \param swrap Wrap mode for the \a s texture coordinate
- * \param twrap Wrap mode for the \a t texture coordinate
*/
-
-static void r300SetTexWrap(r300TexObjPtr t, GLenum swrap, GLenum twrap,
- GLenum rwrap)
+static void r300UpdateTexWrap(r300TexObjPtr t)
{
- unsigned long hw_swrap = 0, hw_twrap = 0, hw_qwrap = 0;
+ struct gl_texture_object *tObj = t->base.tObj;
t->filter &=
- ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_Q_MASK);
+ ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
- switch (swrap) {
- case GL_REPEAT:
- hw_swrap |= R300_TX_REPEAT;
- break;
- case GL_CLAMP:
- hw_swrap |= R300_TX_CLAMP;
- break;
- case GL_CLAMP_TO_EDGE:
- hw_swrap |= R300_TX_CLAMP_TO_EDGE;
- break;
- case GL_CLAMP_TO_BORDER:
- hw_swrap |= R300_TX_CLAMP_TO_BORDER;
- break;
- case GL_MIRRORED_REPEAT:
- hw_swrap |= R300_TX_REPEAT | R300_TX_MIRRORED;
- break;
- case GL_MIRROR_CLAMP_EXT:
- hw_swrap |= R300_TX_CLAMP | R300_TX_MIRRORED;
- break;
- case GL_MIRROR_CLAMP_TO_EDGE_EXT:
- hw_swrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
- break;
- case GL_MIRROR_CLAMP_TO_BORDER_EXT:
- hw_swrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
- break;
- default:
- _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
- }
+ t->filter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
- switch (twrap) {
- case GL_REPEAT:
- hw_twrap |= R300_TX_REPEAT;
- break;
- case GL_CLAMP:
- hw_twrap |= R300_TX_CLAMP;
- break;
- case GL_CLAMP_TO_EDGE:
- hw_twrap |= R300_TX_CLAMP_TO_EDGE;
- break;
- case GL_CLAMP_TO_BORDER:
- hw_twrap |= R300_TX_CLAMP_TO_BORDER;
- break;
- case GL_MIRRORED_REPEAT:
- hw_twrap |= R300_TX_REPEAT | R300_TX_MIRRORED;
- break;
- case GL_MIRROR_CLAMP_EXT:
- hw_twrap |= R300_TX_CLAMP | R300_TX_MIRRORED;
- break;
- case GL_MIRROR_CLAMP_TO_EDGE_EXT:
- hw_twrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
- break;
- case GL_MIRROR_CLAMP_TO_BORDER_EXT:
- hw_twrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
- break;
- default:
- _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
- }
+ if (tObj->Target != GL_TEXTURE_1D) {
+ t->filter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
- switch (rwrap) {
- case GL_REPEAT:
- hw_qwrap |= R300_TX_REPEAT;
- break;
- case GL_CLAMP:
- hw_qwrap |= R300_TX_CLAMP;
- break;
- case GL_CLAMP_TO_EDGE:
- hw_qwrap |= R300_TX_CLAMP_TO_EDGE;
- break;
- case GL_CLAMP_TO_BORDER:
- hw_qwrap |= R300_TX_CLAMP_TO_BORDER;
- break;
- case GL_MIRRORED_REPEAT:
- hw_qwrap |= R300_TX_REPEAT | R300_TX_MIRRORED;
- break;
- case GL_MIRROR_CLAMP_EXT:
- hw_qwrap |= R300_TX_CLAMP | R300_TX_MIRRORED;
- break;
- case GL_MIRROR_CLAMP_TO_EDGE_EXT:
- hw_qwrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
- break;
- case GL_MIRROR_CLAMP_TO_BORDER_EXT:
- hw_qwrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
- break;
- default:
- _mesa_problem(NULL, "bad R wrap mode in %s", __FUNCTION__);
+ if (tObj->Target == GL_TEXTURE_3D)
+ t->filter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
}
-
- t->filter |= hw_swrap << R300_TX_WRAP_S_SHIFT;
- t->filter |= hw_twrap << R300_TX_WRAP_T_SHIFT;
- t->filter |= hw_qwrap << R300_TX_WRAP_Q_SHIFT;
}
-static void r300SetTexMaxAnisotropy(r300TexObjPtr t, GLfloat max)
+static GLuint aniso_filter(GLfloat anisotropy)
{
-
- t->filter &= ~R300_TX_MAX_ANISO_MASK;
-
- if (max <= 1.0) {
- t->filter |= R300_TX_MAX_ANISO_1_TO_1;
- } else if (max <= 2.0) {
- t->filter |= R300_TX_MAX_ANISO_2_TO_1;
- } else if (max <= 4.0) {
- t->filter |= R300_TX_MAX_ANISO_4_TO_1;
- } else if (max <= 8.0) {
- t->filter |= R300_TX_MAX_ANISO_8_TO_1;
+ if (anisotropy >= 16.0) {
+ return R300_TX_MAX_ANISO_16_TO_1;
+ } else if (anisotropy >= 8.0) {
+ return R300_TX_MAX_ANISO_8_TO_1;
+ } else if (anisotropy >= 4.0) {
+ return R300_TX_MAX_ANISO_4_TO_1;
+ } else if (anisotropy >= 2.0) {
+ return R300_TX_MAX_ANISO_2_TO_1;
} else {
- t->filter |= R300_TX_MAX_ANISO_16_TO_1;
+ return R300_TX_MAX_ANISO_1_TO_1;
}
}
@@ -184,54 +114,47 @@ static void r300SetTexMaxAnisotropy(r300TexObjPtr t, GLfloat max)
* \param t Texture whose filter modes are to be set
* \param minf Texture minification mode
* \param magf Texture magnification mode
+ * \param anisotropy Maximum anisotropy level
*/
-
-static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf)
+static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
{
- GLuint anisotropy = (t->filter & R300_TX_MAX_ANISO_MASK);
+ t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
+ t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
- t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MAG_FILTER_MASK);
+ /* Note that EXT_texture_filter_anisotropic is extremely vague about
+ * how anisotropic filtering interacts with the "normal" filter modes.
+ * When anisotropic filtering is enabled, we override min and mag
+ * filter settings completely. This includes driconf's settings.
+ */
+ if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
+ t->filter |= R300_TX_MAG_FILTER_ANISO
+ | R300_TX_MIN_FILTER_ANISO
+ | R300_TX_MIN_FILTER_MIP_LINEAR
+ | aniso_filter(anisotropy);
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy);
+ return;
+ }
- if (anisotropy == R300_TX_MAX_ANISO_1_TO_1) {
- switch (minf) {
- case GL_NEAREST:
- t->filter |= R300_TX_MIN_FILTER_NEAREST;
- break;
- case GL_LINEAR:
- t->filter |= R300_TX_MIN_FILTER_LINEAR;
- break;
- case GL_NEAREST_MIPMAP_NEAREST:
- t->filter |= R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST;
- break;
- case GL_NEAREST_MIPMAP_LINEAR:
- t->filter |= R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR;
- break;
- case GL_LINEAR_MIPMAP_NEAREST:
- t->filter |= R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST;
- break;
- case GL_LINEAR_MIPMAP_LINEAR:
- t->filter |= R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR;
- break;
- }
- } else {
- switch (minf) {
- case GL_NEAREST:
- t->filter |= R300_TX_MIN_FILTER_ANISO_NEAREST;
- break;
- case GL_LINEAR:
- t->filter |= R300_TX_MIN_FILTER_ANISO_LINEAR;
- break;
- case GL_NEAREST_MIPMAP_NEAREST:
- case GL_LINEAR_MIPMAP_NEAREST:
- t->filter |=
- R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST;
- break;
- case GL_NEAREST_MIPMAP_LINEAR:
- case GL_LINEAR_MIPMAP_LINEAR:
- t->filter |=
- R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR;
- break;
- }
+ switch (minf) {
+ case GL_NEAREST:
+ t->filter |= R300_TX_MIN_FILTER_NEAREST;
+ break;
+ case GL_LINEAR:
+ t->filter |= R300_TX_MIN_FILTER_LINEAR;
+ break;
+ case GL_NEAREST_MIPMAP_NEAREST:
+ t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
+ break;
+ case GL_NEAREST_MIPMAP_LINEAR:
+ t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
+ break;
+ case GL_LINEAR_MIPMAP_NEAREST:
+ t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
+ break;
+ case GL_LINEAR_MIPMAP_LINEAR:
+ t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
+ break;
}
/* Note we don't have 3D mipmaps so only use the mag filter setting
@@ -249,7 +172,7 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf)
static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4])
{
- t->pp_border_color = PACK_COLOR_8888(c[0], c[1], c[2], c[3]);
+ t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
}
/**
@@ -277,9 +200,8 @@ static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj)
make_empty_list(&t->base);
- r300SetTexWrap(t, texObj->WrapS, texObj->WrapT, texObj->WrapR);
- r300SetTexMaxAnisotropy(t, texObj->MaxAnisotropy);
- r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter);
+ r300UpdateTexWrap(t);
+ r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
r300SetTexBorderColor(t, texObj->_BorderChan);
}
@@ -294,27 +216,20 @@ static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat,
const GLubyte littleEndian = *((const GLubyte *)&ui);
if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE
- && !littleEndian) || (srcFormat == GL_ABGR_EXT
- && srcType == GL_UNSIGNED_INT_8_8_8_8_REV)
- || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE
- && littleEndian)) {
+ (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+ (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+ (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
return &_mesa_texformat_rgba8888;
- } else
- if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV)
- || (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE
- && littleEndian) || (srcFormat == GL_ABGR_EXT
- && srcType == GL_UNSIGNED_INT_8_8_8_8)
- || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE
- && !littleEndian)) {
+ } else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+ (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+ (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+ (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
return &_mesa_texformat_rgba8888_rev;
- } else if (srcFormat == GL_BGRA &&
- ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
- srcType == GL_UNSIGNED_INT_8_8_8_8)) {
+ } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+ srcType == GL_UNSIGNED_INT_8_8_8_8)) {
return &_mesa_texformat_argb8888_rev;
- } else if (srcFormat == GL_BGRA &&
- ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
- srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
+ } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+ srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
return &_mesa_texformat_argb8888;
} else
return _dri_texformat_argb8888;
@@ -489,6 +404,25 @@ static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx,
case GL_RGBA32F_ARB:
return &_mesa_texformat_rgba_float32;
+ case GL_DEPTH_COMPONENT:
+ case GL_DEPTH_COMPONENT16:
+ case GL_DEPTH_COMPONENT24:
+ case GL_DEPTH_COMPONENT32:
+#if 0
+ switch (type) {
+ case GL_UNSIGNED_BYTE:
+ case GL_UNSIGNED_SHORT:
+ return &_mesa_texformat_z16;
+ case GL_UNSIGNED_INT:
+ return &_mesa_texformat_z32;
+ case GL_UNSIGNED_INT_24_8_EXT:
+ default:
+ return &_mesa_texformat_z24_s8;
+ }
+#else
+ return &_mesa_texformat_z16;
+#endif
+
default:
_mesa_problem(ctx,
"unexpected internalFormat 0x%x in r300ChooseTextureFormat",
@@ -563,34 +497,31 @@ r300ValidateClientStorage(GLcontext * ctx, GLenum target,
return 0;
}
- {
- GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
- format, type);
+ GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
+ format, type);
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "%s: srcRowStride %d/%x\n",
- __FUNCTION__, srcRowStride, srcRowStride);
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "%s: srcRowStride %d/%x\n",
+ __FUNCTION__, srcRowStride, srcRowStride);
- /* Could check this later in upload, pitch restrictions could be
- * relaxed, but would need to store the image pitch somewhere,
- * as packing details might change before image is uploaded:
- */
- if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
- || (srcRowStride & 63))
- return 0;
+ /* Could check this later in upload, pitch restrictions could be
+ * relaxed, but would need to store the image pitch somewhere,
+ * as packing details might change before image is uploaded:
+ */
+ if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
+ || (srcRowStride & 63))
+ return 0;
- /* Have validated that _mesa_transfer_teximage would be a straight
- * memcpy at this point. NOTE: future calls to TexSubImage will
- * overwrite the client data. This is explicitly mentioned in the
- * extension spec.
- */
- texImage->Data = (void *)pixels;
- texImage->IsClientData = GL_TRUE;
- texImage->RowStride =
- srcRowStride / texImage->TexFormat->TexelBytes;
+ /* Have validated that _mesa_transfer_teximage would be a straight
+ * memcpy at this point. NOTE: future calls to TexSubImage will
+ * overwrite the client data. This is explicitly mentioned in the
+ * extension spec.
+ */
+ texImage->Data = (void *)pixels;
+ texImage->IsClientData = GL_TRUE;
+ texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
- return 1;
- }
+ return 1;
}
static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
@@ -967,60 +898,6 @@ r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
t->dirty_images[0] |= (1 << level);
}
-static void r300TexEnv(GLcontext * ctx, GLenum target,
- GLenum pname, const GLfloat * param)
-{
- if (RADEON_DEBUG & DEBUG_STATE) {
- fprintf(stderr, "%s( %s )\n",
- __FUNCTION__, _mesa_lookup_enum_by_nr(pname));
- }
-
- /* This is incorrect: Need to maintain this data for each of
- * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch
- * between them according to _ReallyEnabled.
- */
- switch (pname) {
- case GL_TEXTURE_LOD_BIAS_EXT:{
-#if 0 /* Needs to be relocated in order to make sure we got the right tmu */
- GLfloat bias, min;
- GLuint b;
-
- /* The R300's LOD bias is a signed 2's complement value with a
- * range of -16.0 <= bias < 16.0.
- *
- * NOTE: Add a small bias to the bias for conform mipsel.c test.
- */
- bias = *param + .01;
- min =
- driQueryOptionb(&rmesa->radeon.optionCache,
- "no_neg_lod_bias") ? 0.0 : -16.0;
- bias = CLAMP(bias, min, 16.0);
-
- /* 0.0 - 16.0 == 0x0 - 0x1000 */
- /* 0.0 - -16.0 == 0x1001 - 0x1fff */
- b = 0x1000 / 16.0 * bias;
- b &= R300_LOD_BIAS_MASK;
-
- if (b !=
- (rmesa->hw.tex.unknown1.
- cmd[R300_TEX_VALUE_0 +
- unit] & R300_LOD_BIAS_MASK)) {
- R300_STATECHANGE(rmesa, tex.unknown1);
- rmesa->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 +
- unit] &=
- ~R300_LOD_BIAS_MASK;
- rmesa->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 +
- unit] |= b;
- }
-#endif
- break;
- }
-
- default:
- return;
- }
-}
-
/**
* Changes variables and flags for a state update, which will happen at the
* next UpdateTextureState
@@ -1041,14 +918,13 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
case GL_TEXTURE_MIN_FILTER:
case GL_TEXTURE_MAG_FILTER:
case GL_TEXTURE_MAX_ANISOTROPY_EXT:
- r300SetTexMaxAnisotropy(t, texObj->MaxAnisotropy);
- r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter);
+ r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
break;
case GL_TEXTURE_WRAP_S:
case GL_TEXTURE_WRAP_T:
case GL_TEXTURE_WRAP_R:
- r300SetTexWrap(t, texObj->WrapS, texObj->WrapT, texObj->WrapR);
+ r300UpdateTexWrap(t);
break;
case GL_TEXTURE_BORDER_COLOR:
@@ -1067,13 +943,24 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
driSwapOutTextureObject((driTextureObject *) t);
break;
+ case GL_DEPTH_TEXTURE_MODE:
+ if (!texObj->Image[0][texObj->BaseLevel])
+ return;
+ if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat
+ == GL_DEPTH_COMPONENT) {
+ r300SetDepthTexMode(texObj);
+ break;
+ } else {
+ /* If the texture isn't a depth texture, changing this
+ * state won't cause any changes to the hardware.
+ * Don't force a flush of texture state.
+ */
+ return;
+ }
+
default:
return;
}
-
- /* Mark this texobj as dirty (one bit per tex unit)
- */
- t->dirty_state = TEX_ALL;
}
static void r300BindTexture(GLcontext * ctx, GLenum target,
@@ -1156,7 +1043,6 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
functions->DeleteTexture = r300DeleteTexture;
functions->IsTextureResident = driIsTextureResident;
- functions->TexEnv = r300TexEnv;
functions->TexParameter = r300TexParameter;
functions->CompressedTexImage2D = r300CompressedTexImage2D;
diff --git a/r300/r300_tex.h b/r300/r300_tex.h
index f67a8e6..b86d45b 100644
--- a/r300/r300_tex.h
+++ b/r300/r300_tex.h
@@ -35,6 +35,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#ifndef __r300_TEX_H__
#define __r300_TEX_H__
+extern void r300SetDepthTexMode(struct gl_texture_object *tObj);
+
extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
unsigned long long offset, GLint depth,
GLuint pitch);
diff --git a/r300/r300_texmem.c b/r300/r300_texmem.c
index e2e8355..69847a4 100644
--- a/r300/r300_texmem.c
+++ b/r300/r300_texmem.c
@@ -63,29 +63,16 @@ SOFTWARE.
*/
void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t)
{
+ int i;
+
if (RADEON_DEBUG & DEBUG_TEXTURE) {
fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
(void *)t, (void *)t->base.tObj);
}
- if (rmesa != NULL) {
- unsigned i;
-
- for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
- if (t == rmesa->state.texture.unit[i].texobj) {
- rmesa->state.texture.unit[i].texobj = NULL;
- /* This code below is meant to shorten state
- pushed to the hardware by not programming
- unneeded units.
-
- This does not appear to be worthwhile on R300 */
-#if 0
- remove_from_list(&rmesa->hw.tex[i]);
- make_empty_list(&rmesa->hw.tex[i]);
- remove_from_list(&rmesa->hw.cube[i]);
- make_empty_list(&rmesa->hw.cube[i]);
-#endif
- }
+ for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
+ if (rmesa->state.texture.unit[i].texobj == t) {
+ rmesa->state.texture.unit[i].texobj = NULL;
}
}
}
@@ -362,7 +349,7 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
imageWidth = texImage->Width;
imageHeight = texImage->Height;
- offset = t->bufAddr + t->base.totalSize / 6 * face;
+ offset = t->bufAddr;
if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
GLint imageX = 0;
@@ -518,7 +505,7 @@ int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
t->base.lastLevel);
}
- if (!t || t->base.totalSize == 0)
+ if (t->base.totalSize == 0)
return 0;
if (RADEON_DEBUG & DEBUG_SYNC) {
@@ -547,10 +534,6 @@ int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
/* hope it's safe to add that here... */
t->offset |= t->tile_bits;
}
-
- /* Mark this texobj as dirty on all units:
- */
- t->dirty_state = TEX_ALL;
}
/* Let the world know we've used this memory recently.
diff --git a/r300/r300_texstate.c b/r300/r300_texstate.c
index 8203189..d19832f 100644
--- a/r300/r300_texstate.c
+++ b/r300/r300_texstate.c
@@ -54,7 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5 \
|| ((f) >= MESA_FORMAT_RGBA_FLOAT32 && \
(f) <= MESA_FORMAT_INTENSITY_FLOAT16)) \
- && tx_table_le[f].flag )
+ && tx_table[f].flag )
#define _ASSIGN(entry, format) \
[ MESA_FORMAT_ ## entry ] = { format, 0, 1}
@@ -70,53 +70,19 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
static const struct tx_table {
GLuint format, filter, flag;
-} tx_table_be[] = {
- /* *INDENT-OFF* */
- _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)),
- _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)),
- _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)),
- _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)),
- _ASSIGN(RGB888, 0xffffffff),
- _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
- _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
- _ASSIGN(ARGB4444, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)),
- _ASSIGN(ARGB4444_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)),
- _ASSIGN(ARGB1555, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)),
- _ASSIGN(ARGB1555_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)),
- _ASSIGN(AL88, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)),
- _ASSIGN(AL88_REV, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)),
- _ASSIGN(RGB332, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2)),
- _ASSIGN(A8, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)),
- _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)),
- _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)),
- _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)),
- _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE ),
- _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE),
- _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)),
- _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)),
- _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)),
- _ASSIGN(RGBA_DXT5, R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5)),
- _ASSIGN(RGBA_FLOAT32, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32)),
- _ASSIGN(RGBA_FLOAT16, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16)),
- _ASSIGN(RGB_FLOAT32, 0xffffffff),
- _ASSIGN(RGB_FLOAT16, 0xffffffff),
- _ASSIGN(ALPHA_FLOAT32, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32)),
- _ASSIGN(ALPHA_FLOAT16, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16)),
- _ASSIGN(LUMINANCE_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32)),
- _ASSIGN(LUMINANCE_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16)),
- _ASSIGN(LUMINANCE_ALPHA_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32)),
- _ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)),
- _ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)),
- _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)),
- /* *INDENT-ON* */
-};
-
-static const struct tx_table tx_table_le[] = {
+} tx_table[] = {
/* *INDENT-OFF* */
+#ifdef MESA_LITTLE_ENDIAN
_ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)),
_ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)),
_ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)),
_ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)),
+#else
+ _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)),
+ _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)),
+ _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)),
+ _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)),
+#endif
_ASSIGN(RGB888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)),
_ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
_ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
@@ -131,8 +97,8 @@ static const struct tx_table tx_table_le[] = {
_ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)),
_ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)),
_ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)),
- _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE ),
- _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE),
+ _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE),
+ _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE),
_ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)),
_ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)),
_ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)),
@@ -149,11 +115,186 @@ static const struct tx_table tx_table_le[] = {
_ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)),
_ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)),
_ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)),
+ _ASSIGN(Z16, R300_EASY_TX_FORMAT(X, X, X, X, X16)),
+ _ASSIGN(Z24_S8, R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8)),
+ _ASSIGN(Z32, R300_EASY_TX_FORMAT(X, X, X, X, X32)),
/* *INDENT-ON* */
};
#undef _ASSIGN
+void r300SetDepthTexMode(struct gl_texture_object *tObj)
+{
+ static const GLuint formats[3][3] = {
+ {
+ R300_EASY_TX_FORMAT(X, X, X, ONE, X16),
+ R300_EASY_TX_FORMAT(X, X, X, X, X16),
+ R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X16),
+ },
+ {
+ R300_EASY_TX_FORMAT(X, X, X, ONE, X24_Y8),
+ R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8),
+ R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X24_Y8),
+ },
+ {
+ R300_EASY_TX_FORMAT(X, X, X, ONE, X32),
+ R300_EASY_TX_FORMAT(X, X, X, X, X32),
+ R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X32),
+ },
+ };
+ const GLuint *format;
+ r300TexObjPtr t;
+
+ if (!tObj)
+ return;
+
+ t = (r300TexObjPtr) tObj->DriverData;
+
+
+ switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
+ case MESA_FORMAT_Z16:
+ format = formats[0];
+ break;
+ case MESA_FORMAT_Z24_S8:
+ format = formats[1];
+ break;
+ case MESA_FORMAT_Z32:
+ format = formats[2];
+ break;
+ default:
+ /* Error...which should have already been caught by higher
+ * levels of Mesa.
+ */
+ ASSERT(0);
+ return;
+ }
+
+ switch (tObj->DepthMode) {
+ case GL_LUMINANCE:
+ t->format = format[0];
+ break;
+ case GL_INTENSITY:
+ t->format = format[1];
+ break;
+ case GL_ALPHA:
+ t->format = format[2];
+ break;
+ default:
+ /* Error...which should have already been caught by higher
+ * levels of Mesa.
+ */
+ ASSERT(0);
+ return;
+ }
+}
+
+
+/**
+ * Compute sizes and fill in offset and blit information for the given
+ * image (determined by \p face and \p level).
+ *
+ * \param curOffset points to the offset at which the image is to be stored
+ * and is updated by this function according to the size of the image.
+ */
+static void compute_tex_image_offset(
+ struct gl_texture_object *tObj,
+ GLuint face,
+ GLint level,
+ GLint* curOffset)
+{
+ r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+ const struct gl_texture_image* texImage;
+ GLuint blitWidth = R300_BLIT_WIDTH_BYTES;
+ GLuint texelBytes;
+ GLuint size;
+
+ texImage = tObj->Image[0][level + t->base.firstLevel];
+ if (!texImage)
+ return;
+
+ texelBytes = texImage->TexFormat->TexelBytes;
+
+ /* find image size in bytes */
+ if (texImage->IsCompressed) {
+ if ((t->format & R300_TX_FORMAT_DXT1) ==
+ R300_TX_FORMAT_DXT1) {
+ // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
+ if ((texImage->Width + 3) < 8) /* width one block */
+ size = texImage->CompressedSize * 4;
+ else if ((texImage->Width + 3) < 16)
+ size = texImage->CompressedSize * 2;
+ else
+ size = texImage->CompressedSize;
+ } else {
+ /* DXT3/5, 16 bytes per block */
+ WARN_ONCE
+ ("DXT 3/5 suffers from multitexturing problems!\n");
+ // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
+ if ((texImage->Width + 3) < 8)
+ size = texImage->CompressedSize * 2;
+ else
+ size = texImage->CompressedSize;
+ }
+ } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+ size =
+ ((texImage->Width * texelBytes +
+ 63) & ~63) * texImage->Height;
+ blitWidth = 64 / texelBytes;
+ } else if (t->tile_bits & R300_TXO_MICRO_TILE) {
+ /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+ though the actual offset may be different (if texture is less than
+ 32 bytes width) to the untiled case */
+ int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+ size =
+ (w * ((texImage->Height + 1) / 2)) *
+ texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+ } else {
+ int w = (texImage->Width * texelBytes + 31) & ~31;
+ size = w * texImage->Height * texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+ }
+ assert(size > 0);
+
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
+ texImage->Width, texImage->Height,
+ texImage->Depth,
+ texImage->TexFormat->TexelBytes,
+ texImage->InternalFormat);
+
+ /* All images are aligned to a 32-byte offset */
+ *curOffset = (*curOffset + 0x1f) & ~0x1f;
+
+ if (texelBytes) {
+ /* fix x and y coords up later together with offset */
+ t->image[face][level].x = *curOffset;
+ t->image[face][level].y = 0;
+ t->image[face][level].width =
+ MIN2(size / texelBytes, blitWidth);
+ t->image[face][level].height =
+ (size / texelBytes) / t->image[face][level].width;
+ } else {
+ t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES;
+ t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES;
+ t->image[face][level].width =
+ MIN2(size, R300_BLIT_WIDTH_BYTES);
+ t->image[face][level].height = size / t->image[face][level].width;
+ }
+
+ if (RADEON_DEBUG & DEBUG_TEXTURE)
+ fprintf(stderr,
+ "level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+ level, face, texImage->Width, texImage->Height,
+ t->image[face][level].x, t->image[face][level].y,
+ t->image[face][level].width, t->image[face][level].height,
+ size, *curOffset);
+
+ *curOffset += size;
+}
+
+
+
/**
* This function computes the number of bytes of storage needed for
* the given texture object (all mipmap levels, all cube faces).
@@ -171,29 +312,22 @@ static void r300SetTexImages(r300ContextPtr rmesa,
r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
const struct gl_texture_image *baseImage =
tObj->Image[0][tObj->BaseLevel];
- GLint curOffset, blitWidth;
+ GLint curOffset;
GLint i, texelBytes;
GLint numLevels;
GLint log2Width, log2Height, log2Depth;
/* Set the hardware texture format
*/
- if (!t->image_override && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) {
- if (_mesa_little_endian()) {
- t->format =
- tx_table_le[baseImage->TexFormat->MesaFormat].
- format;
- t->filter |=
- tx_table_le[baseImage->TexFormat->MesaFormat].
- filter;
+ if (!t->image_override
+ && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) {
+ if (baseImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
+ r300SetDepthTexMode(tObj);
} else {
- t->format =
- tx_table_be[baseImage->TexFormat->MesaFormat].
- format;
- t->filter |=
- tx_table_be[baseImage->TexFormat->MesaFormat].
- filter;
+ t->format = tx_table[baseImage->TexFormat->MesaFormat].format;
}
+
+ t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter;
} else if (!t->image_override) {
_mesa_problem(NULL, "unexpected texture format in %s",
__FUNCTION__);
@@ -217,8 +351,6 @@ static void r300SetTexImages(r300ContextPtr rmesa,
* The idea is that we lay out the mipmap levels within a block of
* memory organized as a rectangle of width BLIT_WIDTH_BYTES.
*/
- curOffset = 0;
- blitWidth = R300_BLIT_WIDTH_BYTES;
t->tile_bits = 0;
/* figure out if this texture is suitable for tiling. */
@@ -248,94 +380,23 @@ static void r300SetTexImages(r300ContextPtr rmesa,
}
#endif
- for (i = 0; i < numLevels; i++) {
- const struct gl_texture_image *texImage;
- GLuint size;
-
- texImage = tObj->Image[0][i + t->base.firstLevel];
- if (!texImage)
- break;
-
- /* find image size in bytes */
- if (texImage->IsCompressed) {
- if ((t->format & R300_TX_FORMAT_DXT1) ==
- R300_TX_FORMAT_DXT1) {
- // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
- if ((texImage->Width + 3) < 8) /* width one block */
- size = texImage->CompressedSize * 4;
- else if ((texImage->Width + 3) < 16)
- size = texImage->CompressedSize * 2;
- else
- size = texImage->CompressedSize;
- } else {
- /* DXT3/5, 16 bytes per block */
- WARN_ONCE
- ("DXT 3/5 suffers from multitexturing problems!\n");
- // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
- if ((texImage->Width + 3) < 8)
- size = texImage->CompressedSize * 2;
- else
- size = texImage->CompressedSize;
- }
- } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- size =
- ((texImage->Width * texelBytes +
- 63) & ~63) * texImage->Height;
- blitWidth = 64 / texelBytes;
- } else if (t->tile_bits & R300_TXO_MICRO_TILE) {
- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
- though the actual offset may be different (if texture is less than
- 32 bytes width) to the untiled case */
- int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
- size =
- (w * ((texImage->Height + 1) / 2)) *
- texImage->Depth;
- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
- } else {
- int w = (texImage->Width * texelBytes + 31) & ~31;
- size = w * texImage->Height * texImage->Depth;
- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
- }
- assert(size > 0);
-
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
- texImage->Width, texImage->Height,
- texImage->Depth,
- texImage->TexFormat->TexelBytes,
- texImage->InternalFormat);
-
- /* Align to 32-byte offset. It is faster to do this unconditionally
- * (no branch penalty).
- */
+ curOffset = 0;
- curOffset = (curOffset + 0x1f) & ~0x1f;
+ if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+ ASSERT(log2Width == log2Height);
+ t->format |= R300_TX_FORMAT_CUBIC_MAP;
- if (texelBytes) {
- /* fix x and y coords up later together with offset */
- t->image[0][i].x = curOffset;
- t->image[0][i].y = 0;
- t->image[0][i].width =
- MIN2(size / texelBytes, blitWidth);
- t->image[0][i].height =
- (size / texelBytes) / t->image[0][i].width;
- } else {
- t->image[0][i].x = curOffset % R300_BLIT_WIDTH_BYTES;
- t->image[0][i].y = curOffset / R300_BLIT_WIDTH_BYTES;
- t->image[0][i].width =
- MIN2(size, R300_BLIT_WIDTH_BYTES);
- t->image[0][i].height = size / t->image[0][i].width;
+ for(i = 0; i < numLevels; i++) {
+ GLuint face;
+ for(face = 0; face < 6; face++)
+ compute_tex_image_offset(tObj, face, i, &curOffset);
}
+ } else {
+ if (tObj->Target == GL_TEXTURE_3D)
+ t->format |= R300_TX_FORMAT_3D;
- if (RADEON_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr,
- "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
- i, texImage->Width, texImage->Height,
- t->image[0][i].x, t->image[0][i].y,
- t->image[0][i].width, t->image[0][i].height,
- size, curOffset);
-
- curOffset += size;
+ for (i = 0; i < numLevels; i++)
+ compute_tex_image_offset(tObj, 0, i, &curOffset);
}
/* Align the total size of texture memory block.
@@ -343,43 +404,27 @@ static void r300SetTexImages(r300ContextPtr rmesa,
t->base.totalSize =
(curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
- /* Setup remaining cube face blits, if needed */
- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
- GLuint face;
- for (face = 1; face < 6; face++) {
- for (i = 0; i < numLevels; i++) {
- t->image[face][i].x = t->image[0][i].x;
- t->image[face][i].y = t->image[0][i].y;
- t->image[face][i].width = t->image[0][i].width;
- t->image[face][i].height =
- t->image[0][i].height;
- }
- }
- t->base.totalSize *= 6; /* total texmem needed */
- }
-
- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
- ASSERT(log2Width == log2Height);
- t->format |= R300_TX_FORMAT_CUBIC_MAP;
- }
-
t->size =
(((tObj->Image[0][t->base.firstLevel]->Width -
1) << R300_TX_WIDTHMASK_SHIFT)
| ((tObj->Image[0][t->base.firstLevel]->Height - 1) <<
- R300_TX_HEIGHTMASK_SHIFT))
+ R300_TX_HEIGHTMASK_SHIFT)
+ | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) <<
+ R300_TX_DEPTHMASK_SHIFT))
| ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
+ t->pitch = 0;
+
/* Only need to round to nearest 32 for textures, but the blitter
* requires 64-byte aligned pitches, and we may/may not need the
* blitter. NPOT only!
*/
if (baseImage->IsCompressed) {
- t->pitch =
+ t->pitch |=
(tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- unsigned int align = blitWidth - 1;
- t->pitch = ((tObj->Image[0][t->base.firstLevel]->Width *
+ unsigned int align = (64 / texelBytes) - 1;
+ t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width *
texelBytes) + 63) & ~(63);
t->size |= R300_TX_SIZE_TXPITCH_EN;
if (!t->image_override)
@@ -387,14 +432,17 @@ static void r300SetTexImages(r300ContextPtr rmesa,
(((tObj->Image[0][t->base.firstLevel]->Width) +
align) & ~align) - 1;
} else {
- t->pitch =
+ t->pitch |=
((tObj->Image[0][t->base.firstLevel]->Width *
texelBytes) + 63) & ~(63);
}
- t->dirty_state = TEX_ALL;
-
- /* FYI: r300UploadTexImages( rmesa, t ) used to be called here */
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ if (tObj->Image[0][t->base.firstLevel]->Width > 2048)
+ t->pitch_reg |= R500_TXWIDTH_BIT11;
+ if (tObj->Image[0][t->base.firstLevel]->Height > 2048)
+ t->pitch_reg |= R500_TXHEIGHT_BIT11;
+ }
}
/* ================================================================
@@ -526,27 +574,25 @@ static GLboolean r300UpdateTexture(GLcontext * ctx, int unit)
*/
rmesa->state.texture.unit[unit].texobj->base.bound &=
- ~(1UL << unit);
+ ~(1 << unit);
}
rmesa->state.texture.unit[unit].texobj = t;
- t->base.bound |= (1UL << unit);
- t->dirty_state |= 1 << unit;
+ t->base.bound |= (1 << unit);
driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */
}
return !t->border_fallback;
}
-void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
unsigned long long offset, GLint depth, GLuint pitch)
{
- r300ContextPtr rmesa =
- (r300ContextPtr)((__DRIcontextPrivate*)pDRICtx->private)->driverPrivate;
+ r300ContextPtr rmesa = pDRICtx->driverPrivate;
struct gl_texture_object *tObj =
- _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+ _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
r300TexObjPtr t;
- int idx;
+ uint32_t pitch_val;
if (!tObj)
return;
@@ -559,28 +605,30 @@ void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
return;
t->offset = offset;
- t->pitch_reg = pitch;
+ t->pitch_reg &= (1 << 13) -1;
+ pitch_val = pitch;
switch (depth) {
case 32:
- idx = 2;
- t->pitch_reg /= 4;
+ t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+ t->filter |= tx_table[2].filter;
+ pitch_val /= 4;
break;
case 24:
default:
- idx = 4;
- t->pitch_reg /= 4;
+ t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+ t->filter |= tx_table[4].filter;
+ pitch_val /= 4;
break;
case 16:
- idx = 5;
- t->pitch_reg /= 2;
+ t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+ t->filter |= tx_table[5].filter;
+ pitch_val /= 2;
break;
}
+ pitch_val--;
- t->pitch_reg--;
-
- t->format = tx_table_le[idx].format;
- t->filter |= tx_table_le[idx].filter;
+ t->pitch_reg |= pitch_val;
}
static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit)
diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c
index 1d90ade..c4e325e 100644
--- a/r300/r300_vertprog.c
+++ b/r300/r300_vertprog.c
@@ -1,6 +1,7 @@
/**************************************************************************
-Copyright (C) 2005 Aapo Tahkola.
+Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
+Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
All Rights Reserved.
@@ -25,16 +26,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
-/**
- * \file
- *
- * \author Aapo Tahkola <aet@rasterburn.org>
- */
+/* Radeon R5xx Acceleration, Revision 1.2 */
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-#include "program.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
#include "shader/prog_instruction.h"
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
@@ -42,64 +39,35 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_context.h"
-#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
- SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
- SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
- SWIZZLE_W != VSF_IN_COMPONENT_W || \
- SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
- SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
- WRITEMASK_X != VSF_FLAG_X || \
- WRITEMASK_Y != VSF_FLAG_Y || \
- WRITEMASK_Z != VSF_FLAG_Z || \
- WRITEMASK_W != VSF_FLAG_W
-#error Cannot change these!
-#endif
+/* TODO: Get rid of t_src_class call */
+#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
+ ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
+ t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
+ (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
+ t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y) \
+ (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_src_class(src[x].File), \
+ VSF_FLAG_NONE) | (src[x].RelAddr << 4))
-#define SCALAR_FLAG (1<<31)
-#define FLAG_MASK (1<<31)
-#define OP_MASK (0xf) /* we are unlikely to have more than 15 */
-#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
-
-static struct {
- char *name;
- int opcode;
- unsigned long ip; /* number of input operands and flags */
-} op_names[] = {
- /* *INDENT-OFF* */
- OPN(ABS, 1),
- OPN(ADD, 2),
- OPN(ARL, 1 | SCALAR_FLAG),
- OPN(DP3, 2),
- OPN(DP4, 2),
- OPN(DPH, 2),
- OPN(DST, 2),
- OPN(EX2, 1 | SCALAR_FLAG),
- OPN(EXP, 1 | SCALAR_FLAG),
- OPN(FLR, 1),
- OPN(FRC, 1),
- OPN(LG2, 1 | SCALAR_FLAG),
- OPN(LIT, 1),
- OPN(LOG, 1 | SCALAR_FLAG),
- OPN(MAD, 3),
- OPN(MAX, 2),
- OPN(MIN, 2),
- OPN(MOV, 1),
- OPN(MUL, 2),
- OPN(POW, 2 | SCALAR_FLAG),
- OPN(RCP, 1 | SCALAR_FLAG),
- OPN(RSQ, 1 | SCALAR_FLAG),
- OPN(SGE, 2),
- OPN(SLT, 2),
- OPN(SUB, 2),
- OPN(SWZ, 1),
- OPN(XPD, 2),
- OPN(RCC, 0), //extra
- OPN(PRINT, 0),
- OPN(END, 0)
- /* *INDENT-ON* */
-};
-
-#undef OPN
+#define FREE_TEMPS() \
+ do { \
+ int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
+ if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
+ WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
+ vp->native = GL_FALSE; \
+ } \
+ u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
+ } while (0)
int r300VertexProgUpdateParams(GLcontext * ctx,
struct r300_vertex_program_cont *vp, float *dst)
@@ -133,7 +101,6 @@ int r300VertexProgUpdateParams(GLcontext * ctx,
paramList = mesa_vp->Base.Parameters;
for (pi = 0; pi < paramList->NumParameters; pi++) {
switch (paramList->Parameters[pi].Type) {
-
case PROGRAM_STATE_VAR:
case PROGRAM_NAMED_PARAM:
//fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
@@ -143,7 +110,6 @@ int r300VertexProgUpdateParams(GLcontext * ctx,
*dst++ = paramList->ParameterValues[pi][2];
*dst++ = paramList->ParameterValues[pi][3];
break;
-
default:
_mesa_problem(NULL, "Bad param type in %s",
__FUNCTION__);
@@ -165,11 +131,11 @@ static unsigned long t_dst_class(enum register_file file)
switch (file) {
case PROGRAM_TEMPORARY:
- return VSF_OUT_CLASS_TMP;
+ return PVS_DST_REG_TEMPORARY;
case PROGRAM_OUTPUT:
- return VSF_OUT_CLASS_RESULT;
+ return PVS_DST_REG_OUT;
case PROGRAM_ADDRESS:
- return VSF_OUT_CLASS_ADDR;
+ return PVS_DST_REG_A0;
/*
case PROGRAM_INPUT:
case PROGRAM_LOCAL_PARAM:
@@ -197,19 +163,17 @@ static unsigned long t_dst_index(struct r300_vertex_program *vp,
static unsigned long t_src_class(enum register_file file)
{
-
switch (file) {
case PROGRAM_TEMPORARY:
- return VSF_IN_CLASS_TMP;
-
+ return PVS_SRC_REG_TEMPORARY;
case PROGRAM_INPUT:
- return VSF_IN_CLASS_ATTR;
-
+ return PVS_SRC_REG_INPUT;
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
case PROGRAM_STATE_VAR:
- return VSF_IN_CLASS_PARAM;
+ return PVS_SRC_REG_CONSTANT;
/*
case PROGRAM_OUTPUT:
case PROGRAM_WRITE_ONLY:
@@ -222,7 +186,7 @@ static unsigned long t_src_class(enum register_file file)
}
}
-static __inline unsigned long t_swizzle(GLubyte swizzle)
+static INLINE unsigned long t_swizzle(GLubyte swizzle)
{
/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
return swizzle;
@@ -276,13 +240,15 @@ static unsigned long t_src_index(struct r300_vertex_program *vp,
}
}
+/* these two functions should probably be merged... */
+
static unsigned long t_src(struct r300_vertex_program *vp,
struct prog_src_register *src)
{
/* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
*/
- return MAKE_VSF_SOURCE(t_src_index(vp, src),
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_swizzle(GET_SWZ(src->Swizzle, 1)),
t_swizzle(GET_SWZ(src->Swizzle, 2)),
@@ -294,8 +260,10 @@ static unsigned long t_src(struct r300_vertex_program *vp,
static unsigned long t_src_scalar(struct r300_vertex_program *vp,
struct prog_src_register *src)
{
-
- return MAKE_VSF_SOURCE(t_src_index(vp, src),
+ /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_swizzle(GET_SWZ(src->Swizzle, 0)),
@@ -306,128 +274,687 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp,
(src->RelAddr << 4);
}
-static unsigned long t_opcode(enum prog_opcode opcode)
+static GLboolean valid_dst(struct r300_vertex_program *vp,
+ struct prog_dst_register *dst)
{
+ if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
+ return GL_FALSE;
+ } else if (dst->File == PROGRAM_ADDRESS) {
+ assert(dst->Index == 0);
+ }
- switch (opcode) {
- /* *INDENT-OFF* */
- case OPCODE_ARL: return R300_VPI_OUT_OP_ARL;
- case OPCODE_DST: return R300_VPI_OUT_OP_DST;
- case OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
- case OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
- case OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
- case OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
- case OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
- case OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
- case OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
- case OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
- case OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
- case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
- case OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
- case OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
- case OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
- /* *INDENT-ON* */
+ return GL_TRUE;
+}
- default:
- fprintf(stderr, "%s: Should not be called with opcode %d!",
- __FUNCTION__, opcode);
- }
- _mesa_exit(-1);
- return 0;
+static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
+ t_src_class(src[0].File),
+ (!src[0].
+ NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+ inst[3] = 0;
+
+ return inst;
}
-static unsigned long op_operands(enum prog_opcode opcode)
+static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
{
- int i;
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
- /* Can we trust mesas opcodes to be in order ? */
- for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++)
- if (op_names[i].opcode == opcode)
- return op_names[i].ip;
+static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
- fprintf(stderr, "op %d not found in op_names\n", opcode);
- _mesa_exit(-1);
- return 0;
+static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+ SWIZZLE_ZERO,
+ t_src_class(src[0].File),
+ src[0].
+ NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+ inst[2] =
+ PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
+ t_src_class(src[1].File),
+ src[1].
+ NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+ (src[1].RelAddr << 4);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
}
-static GLboolean valid_dst(struct r300_vertex_program *vp,
- struct prog_dst_register *dst)
+static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
{
- if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
- return GL_FALSE;
- } else if (dst->File == PROGRAM_ADDRESS) {
- assert(dst->Index == 0);
- }
+ inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
- return GL_TRUE;
+static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
+ inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+ PVS_SRC_SELECT_FORCE_1,
+ t_src_class(src[0].File),
+ src[0].
+ NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
}
-/* TODO: Get rid of t_src_class call */
-#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
- ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
- t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
- (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
- t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
-
-#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
- SWIZZLE_ZERO, SWIZZLE_ZERO, \
- SWIZZLE_ZERO, SWIZZLE_ZERO, \
- t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
-
-#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
- SWIZZLE_ZERO, SWIZZLE_ZERO, \
- SWIZZLE_ZERO, SWIZZLE_ZERO, \
- t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
-
-#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
- SWIZZLE_ZERO, SWIZZLE_ZERO, \
- SWIZZLE_ZERO, SWIZZLE_ZERO, \
- t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
-
-#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
- SWIZZLE_ONE, SWIZZLE_ONE, \
- SWIZZLE_ONE, SWIZZLE_ONE, \
- t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
-
-#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
- SWIZZLE_ONE, SWIZZLE_ONE, \
- SWIZZLE_ONE, SWIZZLE_ONE, \
- t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
-
-#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
- SWIZZLE_ONE, SWIZZLE_ONE, \
- SWIZZLE_ONE, SWIZZLE_ONE, \
- t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
+static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
-/* DP4 version seems to trigger some hw peculiarity */
-//#define PREFER_DP4
+static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
-#define FREE_TEMPS() \
- do { \
- if(u_temp_i < vp->num_temporaries) { \
- WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
- vp->native = GL_FALSE; \
- } \
- u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
- } while (0)
+static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
-static void r300TranslateVertexShader(struct r300_vertex_program *vp,
- struct prog_instruction *vpi)
+static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3],
+ int *u_temp_i)
{
- int i, cur_reg = 0;
- VERTEX_SHADER_INSTRUCTION *o_inst;
- unsigned long operands;
- int are_srcs_scalar;
- unsigned long hw_op;
- /* Initial value should be last tmp reg that hw supports.
- Strangely enough r300 doesnt mind even though these would be out of range.
- Smart enough to realize that it doesnt need it? */
- int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
- struct prog_src_register src[3];
+ /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
+ ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
+ GL_FALSE,
+ GL_FALSE,
+ *u_temp_i,
+ t_dst_mask(vpi->DstReg.WriteMask),
+ PVS_DST_REG_TEMPORARY);
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst += 4;
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = PVS_SRC_OPERAND(*u_temp_i,
+ PVS_SRC_SELECT_X,
+ PVS_SRC_SELECT_Y,
+ PVS_SRC_SELECT_Z,
+ PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
+ /* Not 100% sure about this */
+ (!src[0].
+ NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE
+ /*VSF_FLAG_ALL */ );
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+ (*u_temp_i)--;
+
+ return inst;
+}
- vp->pos_end = 0; /* Not supported yet */
- vp->program.length = 0;
- /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
+static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
+
+ inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_src_class(src[0].File),
+ src[0].
+ NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+
+ inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ /* NOTE: Users swizzling might not work. */
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
+ t_src_class(src[0].File),
+ src[0].
+ NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
+ t_src_class(src[0].File),
+ src[0].
+ NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+ inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ t_src_class(src[0].File),
+ src[0].
+ NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+ GL_FALSE,
+ GL_TRUE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = t_src(vp, &src[2]);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = t_src_scalar(vp, &src[1]);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+
+#if 0
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+ t_src_class(src[1].File),
+ (!src[1].
+ NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src[1].RelAddr << 4);
+ inst[3] = 0;
+#else
+ inst[0] =
+ PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ONE);
+ inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+ t_src_class(src[1].File),
+ (!src[1].
+ NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src[1].RelAddr << 4);
+#endif
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3],
+ int *u_temp_i)
+{
+ /* mul r0, r1.yzxw, r2.zxyw
+ mad r0, -r2.yzxw, r1.zxyw, r0
+ */
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ *u_temp_i,
+ t_dst_mask(vpi->DstReg.WriteMask),
+ PVS_DST_REG_TEMPORARY);
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ t_src_class(src[0].File),
+ src[0].
+ NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
+ t_src_class(src[1].File),
+ src[1].
+ NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src[1].RelAddr << 4);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+ inst += 4;
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
+ t_src_class(src[1].File),
+ (!src[1].
+ NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src[1].RelAddr << 4);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ t_src_class(src[0].File),
+ src[0].
+ NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+ (src[0].RelAddr << 4);
+ inst[3] =
+ PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
+ PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
+ PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
+
+ (*u_temp_i)--;
+
+ return inst;
+}
+
+static void t_inputs_outputs(struct r300_vertex_program *vp)
+{
+ int i;
+ int cur_reg = 0;
for (i = 0; i < VERT_ATTRIB_MAX; i++)
vp->inputs[i] = -1;
@@ -437,39 +964,71 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
- /* Assign outputs */
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS))
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
+ }
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+ }
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0))
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+ }
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1))
- vp->outputs[VERT_RESULT_COL1] = cur_reg++;
-
-#if 0 /* Not supported yet */
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0))
- vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
+ vp->outputs[VERT_RESULT_COL1] =
+ vp->outputs[VERT_RESULT_COL0] + 1;
+ cur_reg = vp->outputs[VERT_RESULT_COL1] + 1;
+ }
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1))
- vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ vp->outputs[VERT_RESULT_BFC0] =
+ vp->outputs[VERT_RESULT_COL0] + 2;
+ cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2;
+ }
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC))
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ vp->outputs[VERT_RESULT_BFC1] =
+ vp->outputs[VERT_RESULT_COL0] + 3;
+ cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
+ }
+#if 0
+ if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+ }
#endif
- for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++)
- if (vp->key.OutputsWritten & (1 << i))
+ for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
+ if (vp->key.OutputsWritten & (1 << i)) {
vp->outputs[i] = cur_reg++;
+ }
+ }
+}
+
+static void r300TranslateVertexShader(struct r300_vertex_program *vp,
+ struct prog_instruction *vpi)
+{
+ int i;
+ GLuint *inst;
+ unsigned long num_operands;
+ /* Initial value should be last tmp reg that hw supports.
+ Strangely enough r300 doesnt mind even though these would be out of range.
+ Smart enough to realize that it doesnt need it? */
+ int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
+ struct prog_src_register src[3];
+ vp->pos_end = 0; /* Not supported yet */
+ vp->program.length = 0;
+ /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
vp->translated = GL_TRUE;
vp->native = GL_TRUE;
- o_inst = vp->program.body.i;
- for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) {
+ t_inputs_outputs(vp);
+
+ for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END;
+ vpi++, inst += 4) {
+
FREE_TEMPS();
if (!valid_dst(vp, &vpi->DstReg)) {
@@ -478,61 +1037,64 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
vpi->DstReg.Index = u_temp_i;
}
- operands = op_operands(vpi->Opcode);
- are_srcs_scalar = operands & SCALAR_FLAG;
- operands &= OP_MASK;
+ num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
- for (i = 0; i < operands; i++)
+ /* copy the sources (src) from mesa into a local variable... is this needed? */
+ for (i = 0; i < num_operands; i++) {
src[i] = vpi->SrcReg[i];
+ }
- if (operands == 3) { /* TODO: scalars */
+ if (num_operands == 3) { /* TODO: scalars */
if (CMP_SRCS(src[1], src[2])
|| CMP_SRCS(src[0], src[2])) {
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
- VSF_FLAG_ALL,
- VSF_OUT_CLASS_TMP);
-
- o_inst->src[0] =
- MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
- SWIZZLE_X, SWIZZLE_Y,
- SWIZZLE_Z, SWIZZLE_W,
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ u_temp_i,
+ VSF_FLAG_ALL,
+ PVS_DST_REG_TEMPORARY);
+ inst[1] =
+ PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
+ SWIZZLE_X,
+ SWIZZLE_Y,
+ SWIZZLE_Z,
+ SWIZZLE_W,
t_src_class(src[2].File),
VSF_FLAG_NONE) | (src[2].
RelAddr <<
4);
-
- o_inst->src[1] = ZERO_SRC_2;
- o_inst->src[2] = ZERO_SRC_2;
- o_inst++;
+ inst[2] = __CONST(2, SWIZZLE_ZERO);
+ inst[3] = __CONST(2, SWIZZLE_ZERO);
+ inst += 4;
src[2].File = PROGRAM_TEMPORARY;
src[2].Index = u_temp_i;
src[2].RelAddr = 0;
u_temp_i--;
}
-
}
- if (operands >= 2) {
+ if (num_operands >= 2) {
if (CMP_SRCS(src[1], src[0])) {
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
- VSF_FLAG_ALL,
- VSF_OUT_CLASS_TMP);
-
- o_inst->src[0] =
- MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
- SWIZZLE_X, SWIZZLE_Y,
- SWIZZLE_Z, SWIZZLE_W,
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ u_temp_i,
+ VSF_FLAG_ALL,
+ PVS_DST_REG_TEMPORARY);
+ inst[1] =
+ PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+ SWIZZLE_X,
+ SWIZZLE_Y,
+ SWIZZLE_Z,
+ SWIZZLE_W,
t_src_class(src[0].File),
VSF_FLAG_NONE) | (src[0].
RelAddr <<
4);
-
- o_inst->src[1] = ZERO_SRC_0;
- o_inst->src[2] = ZERO_SRC_0;
- o_inst++;
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst += 4;
src[0].File = PROGRAM_TEMPORARY;
src[0].Index = u_temp_i;
@@ -541,517 +1103,118 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
}
}
- /* These ops need special handling. */
switch (vpi->Opcode) {
- case OPCODE_POW:
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_POW,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- o_inst->src[0] = t_src_scalar(vp, &src[0]);
- o_inst->src[1] = ZERO_SRC_0;
- o_inst->src[2] = t_src_scalar(vp, &src[1]);
- goto next;
-
- case OPCODE_MOV: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
- case OPCODE_SWZ:
-#if 1
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] = ZERO_SRC_0;
- o_inst->src[2] = ZERO_SRC_0;
-#else
- hw_op =
- (src[0].File ==
- PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
- R300_VPI_OUT_OP_MAD;
-
- o_inst->op =
- MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] = ONE_SRC_0;
- o_inst->src[2] = ZERO_SRC_0;
-#endif
-
- goto next;
-
+ case OPCODE_ABS:
+ inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
+ break;
case OPCODE_ADD:
-#if 1
- hw_op = (src[0].File == PROGRAM_TEMPORARY &&
- src[1].File ==
- PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
- R300_VPI_OUT_OP_MAD;
-
- o_inst->op =
- MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- o_inst->src[0] = ONE_SRC_0;
- o_inst->src[1] = t_src(vp, &src[0]);
- o_inst->src[2] = t_src(vp, &src[1]);
-#else
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] = t_src(vp, &src[1]);
- o_inst->src[2] = ZERO_SRC_1;
-
-#endif
- goto next;
-
- case OPCODE_MAD:
- hw_op = (src[0].File == PROGRAM_TEMPORARY &&
- src[1].File == PROGRAM_TEMPORARY &&
- src[2].File ==
- PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
- R300_VPI_OUT_OP_MAD;
-
- o_inst->op =
- MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] = t_src(vp, &src[1]);
- o_inst->src[2] = t_src(vp, &src[2]);
- goto next;
-
- case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */
- hw_op = (src[0].File == PROGRAM_TEMPORARY &&
- src[1].File ==
- PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
- R300_VPI_OUT_OP_MAD;
-
- o_inst->op =
- MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] = t_src(vp, &src[1]);
-
- o_inst->src[2] = ZERO_SRC_1;
- goto next;
-
- case OPCODE_DP3: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_DOT,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
-
- o_inst->src[0] =
- MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 2)),
- SWIZZLE_ZERO,
- t_src_class(src[0].File),
- src[0].
- NegateBase ? VSF_FLAG_XYZ :
- VSF_FLAG_NONE) | (src[0].
- RelAddr << 4);
-
- o_inst->src[1] =
- MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ
- (src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ
- (src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ
- (src[1].Swizzle, 2)),
- SWIZZLE_ZERO,
- t_src_class(src[1].File),
- src[1].
- NegateBase ? VSF_FLAG_XYZ :
- VSF_FLAG_NONE) | (src[1].
- RelAddr << 4);
-
- o_inst->src[2] = ZERO_SRC_1;
- goto next;
-
- case OPCODE_SUB: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
-#if 1
- hw_op = (src[0].File == PROGRAM_TEMPORARY &&
- src[1].File ==
- PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
- R300_VPI_OUT_OP_MAD;
-
- o_inst->op =
- MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] = ONE_SRC_0;
- o_inst->src[2] =
- MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ
- (src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ
- (src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ
- (src[1].Swizzle, 2)),
- t_swizzle(GET_SWZ
- (src[1].Swizzle, 3)),
- t_src_class(src[1].File),
- (!src[1].
- NegateBase) ? VSF_FLAG_ALL :
- VSF_FLAG_NONE) | (src[1].
- RelAddr << 4);
-#else
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
-
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] =
- MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ
- (src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ
- (src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ
- (src[1].Swizzle, 2)),
- t_swizzle(GET_SWZ
- (src[1].Swizzle, 3)),
- t_src_class(src[1].File),
- (!src[1].
- NegateBase) ? VSF_FLAG_ALL :
- VSF_FLAG_NONE) | (src[1].
- RelAddr << 4);
- o_inst->src[2] = 0;
-#endif
- goto next;
-
- case OPCODE_ABS: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_MAX,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
-
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] =
- MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 2)),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 3)),
- t_src_class(src[0].File),
- (!src[0].
- NegateBase) ? VSF_FLAG_ALL :
- VSF_FLAG_NONE) | (src[0].
- RelAddr << 4);
- o_inst->src[2] = 0;
- goto next;
-
+ inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
+ break;
+ case OPCODE_ARL:
+ inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
+ break;
+ case OPCODE_DP3:
+ inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
+ break;
+ case OPCODE_DP4:
+ inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
+ break;
+ case OPCODE_DPH:
+ inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
+ break;
+ case OPCODE_DST:
+ inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
+ break;
+ case OPCODE_EX2:
+ inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
+ break;
+ case OPCODE_EXP:
+ inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
+ break;
case OPCODE_FLR:
- /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
- ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
-
- o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
- t_dst_mask(vpi->DstReg.
- WriteMask),
- VSF_OUT_CLASS_TMP);
-
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] = ZERO_SRC_0;
- o_inst->src[2] = ZERO_SRC_0;
- o_inst++;
-
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
-
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] = MAKE_VSF_SOURCE(u_temp_i,
- VSF_IN_COMPONENT_X,
- VSF_IN_COMPONENT_Y,
- VSF_IN_COMPONENT_Z,
- VSF_IN_COMPONENT_W,
- VSF_IN_CLASS_TMP,
- /* Not 100% sure about this */
- (!src[0].
- NegateBase) ?
- VSF_FLAG_ALL :
- VSF_FLAG_NONE
- /*VSF_FLAG_ALL */ );
-
- o_inst->src[2] = ZERO_SRC_0;
- u_temp_i--;
- goto next;
-
- case OPCODE_LG2: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_LG2,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
-
- o_inst->src[0] =
- MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 0)),
- t_src_class(src[0].File),
- src[0].
- NegateBase ? VSF_FLAG_ALL :
- VSF_FLAG_NONE) | (src[0].
- RelAddr << 4);
- o_inst->src[1] = ZERO_SRC_0;
- o_inst->src[2] = ZERO_SRC_0;
- goto next;
-
- case OPCODE_LIT: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_LIT,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- /* NOTE: Users swizzling might not work. */
- o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
- VSF_IN_COMPONENT_ZERO, // z
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
- t_src_class(src[0].
- File),
- src[0].
- NegateBase ?
- VSF_FLAG_ALL :
- VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
- VSF_IN_COMPONENT_ZERO, // z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
- t_src_class(src[0].
- File),
- src[0].
- NegateBase ?
- VSF_FLAG_ALL :
- VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
- VSF_IN_COMPONENT_ZERO, // z
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
- t_src_class(src[0].
- File),
- src[0].
- NegateBase ?
- VSF_FLAG_ALL :
- VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- goto next;
-
- case OPCODE_DPH: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_DOT,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
-
- o_inst->src[0] =
- MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ
- (src[0].Swizzle, 2)),
- VSF_IN_COMPONENT_ONE,
- t_src_class(src[0].File),
- src[0].
- NegateBase ? VSF_FLAG_XYZ :
- VSF_FLAG_NONE) | (src[0].
- RelAddr << 4);
- o_inst->src[1] = t_src(vp, &src[1]);
- o_inst->src[2] = ZERO_SRC_1;
- goto next;
-
- case OPCODE_XPD:
- /* mul r0, r1.yzxw, r2.zxyw
- mad r0, -r2.yzxw, r1.zxyw, r0
- NOTE: might need MAD_2
- */
-
- o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
- t_dst_mask(vpi->DstReg.
- WriteMask),
- VSF_OUT_CLASS_TMP);
-
- o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
- t_src_class(src[0].
- File),
- src[0].
- NegateBase ?
- VSF_FLAG_ALL :
- VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
-
- o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
- t_src_class(src[1].
- File),
- src[1].
- NegateBase ?
- VSF_FLAG_ALL :
- VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
-
- o_inst->src[2] = ZERO_SRC_1;
- o_inst++;
- u_temp_i--;
-
- o_inst->op =
- MAKE_VSF_OP(R300_VPI_OUT_OP_MAD,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
-
- o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
- t_src_class(src[1].
- File),
- (!src[1].
- NegateBase) ?
- VSF_FLAG_ALL :
- VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
-
- o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
- t_src_class(src[0].
- File),
- src[0].
- NegateBase ?
- VSF_FLAG_ALL :
- VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
-
- o_inst->src[2] = MAKE_VSF_SOURCE(u_temp_i + 1,
- VSF_IN_COMPONENT_X,
- VSF_IN_COMPONENT_Y,
- VSF_IN_COMPONENT_Z,
- VSF_IN_COMPONENT_W,
- VSF_IN_CLASS_TMP,
- VSF_FLAG_NONE);
-
- goto next;
-
- case OPCODE_RCC:
- fprintf(stderr, "Dont know how to handle op %d yet\n",
- vpi->Opcode);
- _mesa_exit(-1);
+ inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */
+ &u_temp_i);
+ break;
+ case OPCODE_FRC:
+ inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
+ break;
+ case OPCODE_LG2:
+ inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
+ break;
+ case OPCODE_LIT:
+ inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
+ break;
+ case OPCODE_LOG:
+ inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
+ break;
+ case OPCODE_MAD:
+ inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
+ break;
+ case OPCODE_MAX:
+ inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
+ break;
+ case OPCODE_MIN:
+ inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
+ break;
+ case OPCODE_MOV:
+ inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
+ break;
+ case OPCODE_MUL:
+ inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
+ break;
+ case OPCODE_POW:
+ inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
break;
- case OPCODE_END:
+ case OPCODE_RCP:
+ inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
+ break;
+ case OPCODE_RSQ:
+ inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
+ break;
+ case OPCODE_SGE:
+ inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
+ break;
+ case OPCODE_SLT:
+ inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
+ break;
+ case OPCODE_SUB:
+ inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
+ break;
+ case OPCODE_SWZ:
+ inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
+ break;
+ case OPCODE_XPD:
+ inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */
+ &u_temp_i);
break;
default:
+ assert(0);
break;
}
+ }
- o_inst->op =
- MAKE_VSF_OP(t_opcode(vpi->Opcode),
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
-
- if (are_srcs_scalar) {
- switch (operands) {
- case 1:
- o_inst->src[0] = t_src_scalar(vp, &src[0]);
- o_inst->src[1] = ZERO_SRC_0;
- o_inst->src[2] = ZERO_SRC_0;
- break;
-
- case 2:
- o_inst->src[0] = t_src_scalar(vp, &src[0]);
- o_inst->src[1] = t_src_scalar(vp, &src[1]);
- o_inst->src[2] = ZERO_SRC_1;
- break;
-
- case 3:
- o_inst->src[0] = t_src_scalar(vp, &src[0]);
- o_inst->src[1] = t_src_scalar(vp, &src[1]);
- o_inst->src[2] = t_src_scalar(vp, &src[2]);
- break;
-
- default:
- fprintf(stderr,
- "scalars and op RCC not handled yet");
- _mesa_exit(-1);
- break;
- }
- } else {
- switch (operands) {
- case 1:
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] = ZERO_SRC_0;
- o_inst->src[2] = ZERO_SRC_0;
- break;
-
- case 2:
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] = t_src(vp, &src[1]);
- o_inst->src[2] = ZERO_SRC_1;
- break;
-
- case 3:
- o_inst->src[0] = t_src(vp, &src[0]);
- o_inst->src[1] = t_src(vp, &src[1]);
- o_inst->src[2] = t_src(vp, &src[2]);
- break;
-
- default:
- fprintf(stderr,
- "scalars and op RCC not handled yet");
- _mesa_exit(-1);
- break;
- }
+ /* Some outputs may be artificially added, to match the inputs
+ of the fragment program. Blank the outputs here. */
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
+ if (vp->key.OutputsAdded & (1 << i)) {
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ vp->outputs[i],
+ VSF_FLAG_ALL,
+ PVS_DST_REG_OUT);
+ inst[1] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst += 4;
}
- next:;
}
- /* Will most likely segfault before we get here... fix later. */
- if (o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH / 4) {
+ vp->program.length = (inst - vp->program.body.i);
+ if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) {
vp->program.length = 0;
vp->native = GL_FALSE;
- return;
}
- vp->program.length = (o_inst - vp->program.body.i) * 4;
#if 0
fprintf(stderr, "hw program:\n");
for (i = 0; i < vp->program.length; i++)
@@ -1059,6 +1222,9 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
#endif
}
+/* DP4 version seems to trigger some hw peculiarity */
+//#define PREFER_DP4
+
static void position_invariant(struct gl_program *prog)
{
struct prog_instruction *vpi;
@@ -1145,8 +1311,8 @@ static void position_invariant(struct gl_program *prog)
assert(vpi->Opcode == OPCODE_END);
}
-static void insert_wpos(struct r300_vertex_program *vp,
- struct gl_program *prog, GLuint temp_index)
+static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
+ GLuint temp_index)
{
struct prog_instruction *vpi;
struct prog_instruction *vpi_insert;
@@ -1206,8 +1372,8 @@ static void pos_as_texcoord(struct r300_vertex_program *vp,
prog->NumTemporaries++;
for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
- if (vpi->DstReg.File == PROGRAM_OUTPUT &&
- vpi->DstReg.Index == VERT_RESULT_HPOS) {
+ if (vpi->DstReg.File == PROGRAM_OUTPUT
+ && vpi->DstReg.Index == VERT_RESULT_HPOS) {
vpi->DstReg.File = PROGRAM_TEMPORARY;
vpi->DstReg.Index = tempregi;
}
@@ -1223,25 +1389,32 @@ static struct r300_vertex_program *build_program(struct r300_vertex_program_key
vp = _mesa_calloc(sizeof(*vp));
_mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
-
vp->wpos_idx = wpos_idx;
if (mesa_vp->IsPositionInvariant) {
position_invariant(&mesa_vp->Base);
}
- if (wpos_idx > -1)
+ if (wpos_idx > -1) {
pos_as_texcoord(vp, &mesa_vp->Base);
+ }
assert(mesa_vp->Base.NumInstructions);
-
vp->num_temporaries = mesa_vp->Base.NumTemporaries;
-
r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
return vp;
}
+static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
+{
+ if (key->OutputsWritten & (1 << vert))
+ return;
+
+ key->OutputsWritten |= 1 << vert;
+ key->OutputsAdded |= 1 << vert;
+}
+
void r300SelectVertexShader(r300ContextPtr r300)
{
GLcontext *ctx = ctx = r300->radeon.glCtx;
@@ -1253,10 +1426,10 @@ void r300SelectVertexShader(r300ContextPtr r300)
GLint wpos_idx;
vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+ wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
+ wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
- wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS;
-
wpos_idx = -1;
if (InputsRead & FRAG_BIT_WPOS) {
for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
@@ -1268,31 +1441,34 @@ void r300SelectVertexShader(r300ContextPtr r300)
_mesa_exit(-1);
}
- InputsRead |= (FRAG_BIT_TEX0 << i);
+ wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
wpos_idx = i;
}
- if (InputsRead & FRAG_BIT_COL0)
- wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0;
+ add_outputs(&wanted_key, VERT_RESULT_HPOS);
- if ((InputsRead & FRAG_BIT_COL1) /*||
- (InputsRead & FRAG_BIT_FOGC) */ )
- wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1;
+ if (InputsRead & FRAG_BIT_COL0) {
+ add_outputs(&wanted_key, VERT_RESULT_COL0);
+ }
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
- if (InputsRead & (FRAG_BIT_TEX0 << i))
- wanted_key.OutputsWritten |=
- 1 << (VERT_RESULT_TEX0 + i);
+ if (InputsRead & FRAG_BIT_COL1) {
+ add_outputs(&wanted_key, VERT_RESULT_COL1);
+ }
+
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+ add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
+ }
+ }
- wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
if (vpc->mesa_program.IsPositionInvariant) {
/* we wan't position don't we ? */
wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
}
for (vp = vpc->progs; vp; vp = vp->next)
- if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) ==
- 0) {
+ if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
+ == 0) {
r300->selected_vp = vp;
return;
}
diff --git a/r300/r300_vertprog.h b/r300/r300_vertprog.h
index 252d5a9..2f35f02 100644
--- a/r300/r300_vertprog.h
+++ b/r300/r300_vertprog.h
@@ -3,10 +3,24 @@
#include "r300_reg.h"
-typedef struct {
- GLuint op;
- GLuint src[3];
-} VERTEX_SHADER_INSTRUCTION;
+#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \
+ (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \
+ | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \
+ | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \
+ | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \
+ | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \
+ | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
+
+#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \
+ (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \
+ | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \
+ | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \
+ | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \
+ | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \
+ | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \
+ | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
+
+#if 1
#define VSF_FLAG_X 1
#define VSF_FLAG_Y 2
@@ -16,74 +30,6 @@ typedef struct {
#define VSF_FLAG_ALL 0xf
#define VSF_FLAG_NONE 0
-#define VSF_OUT_CLASS_TMP 0
-#define VSF_OUT_CLASS_ADDR 1
-#define VSF_OUT_CLASS_RESULT 2
-
-/* first DWORD of an instruction */
-
-/* possible operations:
- DOT, MUL, ADD, MAD, FRC, MAX, MIN, SGE, SLT, EXP, LOG, LIT, POW, RCP, RSQ, EX2,
- LG2, MAD_2 */
-
-#define MAKE_VSF_OP(op, out_reg_index, out_reg_fields, class) \
- ((op) \
- | ((out_reg_index) << R300_VPI_OUT_REG_INDEX_SHIFT) \
- | ((out_reg_fields) << 20) \
- | ( (class) << 8 ) )
-
-#define EASY_VSF_OP(op, out_reg_index, out_reg_fields, class) \
- MAKE_VSF_OP(R300_VPI_OUT_OP_##op, out_reg_index, VSF_FLAG_##out_reg_fields, VSF_OUT_CLASS_##class) \
-
-/* according to Nikolai, the subsequent 3 DWORDs are sources, use same define for each */
-
-#define VSF_IN_CLASS_TMP 0
-#define VSF_IN_CLASS_ATTR 1
-#define VSF_IN_CLASS_PARAM 2
-#define VSF_IN_CLASS_NONE 9
-
-#define VSF_IN_COMPONENT_X 0
-#define VSF_IN_COMPONENT_Y 1
-#define VSF_IN_COMPONENT_Z 2
-#define VSF_IN_COMPONENT_W 3
-#define VSF_IN_COMPONENT_ZERO 4
-#define VSF_IN_COMPONENT_ONE 5
-
-#define MAKE_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \
- ( ((in_reg_index)<<R300_VPI_IN_REG_INDEX_SHIFT) \
- | ((comp_x)<<R300_VPI_IN_X_SHIFT) \
- | ((comp_y)<<R300_VPI_IN_Y_SHIFT) \
- | ((comp_z)<<R300_VPI_IN_Z_SHIFT) \
- | ((comp_w)<<R300_VPI_IN_W_SHIFT) \
- | ((negate)<<25) | ((class)))
-
-#define EASY_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \
- MAKE_VSF_SOURCE(in_reg_index, \
- VSF_IN_COMPONENT_##comp_x, \
- VSF_IN_COMPONENT_##comp_y, \
- VSF_IN_COMPONENT_##comp_z, \
- VSF_IN_COMPONENT_##comp_w, \
- VSF_IN_CLASS_##class, VSF_FLAG_##negate)
-
-/* special sources: */
-
-/* (1.0,1.0,1.0,1.0) vector (ATTR, plain ) */
-#define VSF_ATTR_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, ATTR, NONE)
-#define VSF_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, NONE, NONE)
-
-/* contents of unmodified register */
-#define VSF_REG(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, ATTR, NONE)
-
-/* contents of unmodified parameter */
-#define VSF_PARAM(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, PARAM, NONE)
-
-/* contents of unmodified temporary register */
-#define VSF_TMP(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, TMP, NONE)
-
-/* components of ATTR register */
-#define VSF_ATTR_X(reg) EASY_VSF_SOURCE(reg, X, X, X, X, ATTR, NONE)
-#define VSF_ATTR_Y(reg) EASY_VSF_SOURCE(reg, Y, Y, Y, Y, ATTR, NONE)
-#define VSF_ATTR_Z(reg) EASY_VSF_SOURCE(reg, Z, Z, Z, Z, ATTR, NONE)
-#define VSF_ATTR_W(reg) EASY_VSF_SOURCE(reg, W, W, W, W, ATTR, NONE)
+#endif
#endif
diff --git a/r300/r500_fragprog.c b/r300/r500_fragprog.c
new file mode 100644
index 0000000..75dae86
--- /dev/null
+++ b/r300/r500_fragprog.c
@@ -0,0 +1,700 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include "radeon_nqssadce.h"
+#include "radeon_program_alu.h"
+
+
+static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
+{
+ gl_state_index fail_value_tokens[STATE_LENGTH] = {
+ STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
+ };
+ struct prog_src_register reg = { 0, };
+
+ fail_value_tokens[2] = tmu;
+ reg.File = PROGRAM_STATE_VAR;
+ reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
+ reg.Swizzle = SWIZZLE_WWWW;
+ return reg;
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following way:
+ * - premultiply texture coordinates for RECT
+ * - extract operand swizzles
+ * - introduce a temporary register when write masks are needed
+ *
+ */
+static GLboolean transform_TEX(
+ struct radeon_transform_context *t,
+ struct prog_instruction* orig_inst, void* data)
+{
+ struct r500_fragment_program_compiler *compiler =
+ (struct r500_fragment_program_compiler*)data;
+ struct prog_instruction inst = *orig_inst;
+ struct prog_instruction* tgt;
+ GLboolean destredirect = GL_FALSE;
+
+ if (inst.Opcode != OPCODE_TEX &&
+ inst.Opcode != OPCODE_TXB &&
+ inst.Opcode != OPCODE_TXP &&
+ inst.Opcode != OPCODE_KIL)
+ return GL_FALSE;
+
+ /* ARB_shadow & EXT_shadow_funcs */
+ if (inst.Opcode != OPCODE_KIL &&
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
+
+ if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
+ tgt = radeonAppendInstructions(t->Program, 1);
+
+ tgt->Opcode = OPCODE_MOV;
+ tgt->DstReg = inst.DstReg;
+ if (comparefunc == GL_ALWAYS) {
+ tgt->SrcReg[0].File = PROGRAM_BUILTIN;
+ tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
+ } else {
+ tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
+ }
+ return GL_TRUE;
+ }
+
+ inst.DstReg.File = PROGRAM_TEMPORARY;
+ inst.DstReg.Index = radeonFindFreeTemporary(t);
+ inst.DstReg.WriteMask = WRITEMASK_XYZW;
+ } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) {
+ int tempreg = radeonFindFreeTemporary(t);
+
+ inst.DstReg.File = PROGRAM_TEMPORARY;
+ inst.DstReg.Index = tempreg;
+ inst.DstReg.WriteMask = WRITEMASK_XYZW;
+ destredirect = GL_TRUE;
+ }
+
+ tgt = radeonAppendInstructions(t->Program, 1);
+ _mesa_copy_instructions(tgt, &inst, 1);
+
+ if (inst.Opcode != OPCODE_KIL &&
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
+ GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
+ int rcptemp = radeonFindFreeTemporary(t);
+ int pass, fail;
+
+ tgt = radeonAppendInstructions(t->Program, 3);
+
+ tgt[0].Opcode = OPCODE_RCP;
+ tgt[0].DstReg.File = PROGRAM_TEMPORARY;
+ tgt[0].DstReg.Index = rcptemp;
+ tgt[0].DstReg.WriteMask = WRITEMASK_W;
+ tgt[0].SrcReg[0] = inst.SrcReg[0];
+ tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ tgt[1].Opcode = OPCODE_MAD;
+ tgt[1].DstReg = inst.DstReg;
+ tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
+ tgt[1].SrcReg[0] = inst.SrcReg[0];
+ tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
+ tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
+ tgt[1].SrcReg[1].Index = rcptemp;
+ tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
+ tgt[1].SrcReg[2].Index = inst.DstReg.Index;
+ if (depthmode == 0) /* GL_LUMINANCE */
+ tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ else if (depthmode == 2) /* GL_ALPHA */
+ tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
+
+ /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
+ * r < tex <=> -tex+r < 0
+ * r >= tex <=> not (-tex+r < 0 */
+ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
+ tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW;
+ else
+ tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW;
+
+ tgt[2].Opcode = OPCODE_CMP;
+ tgt[2].DstReg = orig_inst->DstReg;
+ tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
+ tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
+
+ if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
+
+ tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
+ tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
+ tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
+ } else if (destredirect) {
+ tgt = radeonAppendInstructions(t->Program, 1);
+
+ tgt->Opcode = OPCODE_MOV;
+ tgt->DstReg = orig_inst->DstReg;
+ tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
+ tgt->SrcReg[0].Index = inst.DstReg.Index;
+ }
+
+ return GL_TRUE;
+}
+
+
+static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp)
+{
+ struct gl_fragment_program *mp = &fp->mesa_program;
+
+ /* Ask Mesa nicely to fill in ParameterValues for us */
+ if (mp->Base.Parameters)
+ _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters);
+}
+
+
+/**
+ * Transform the program to support fragment.position.
+ *
+ * Introduce a small fragment at the start of the program that will be
+ * the only code that directly reads the FRAG_ATTRIB_WPOS input.
+ * All other code pieces that reference that input will be rewritten
+ * to read from a newly allocated temporary.
+ *
+ * \todo if/when r5xx supports the radeon_program architecture, this is a
+ * likely candidate for code sharing.
+ */
+static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
+{
+ GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead;
+
+ if (!(InputsRead & FRAG_BIT_WPOS))
+ return;
+
+ static gl_state_index tokens[STATE_LENGTH] = {
+ STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
+ };
+ struct prog_instruction *fpi;
+ GLuint window_index;
+ int i = 0;
+ GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
+
+ _mesa_insert_instructions(compiler->program, 0, 3);
+ fpi = compiler->program->Instructions;
+
+ /* perspective divide */
+ fpi[i].Opcode = OPCODE_RCP;
+
+ fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+ fpi[i].DstReg.Index = tempregi;
+ fpi[i].DstReg.WriteMask = WRITEMASK_W;
+ fpi[i].DstReg.CondMask = COND_TR;
+
+ fpi[i].SrcReg[0].File = PROGRAM_INPUT;
+ fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
+ i++;
+
+ fpi[i].Opcode = OPCODE_MUL;
+
+ fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+ fpi[i].DstReg.Index = tempregi;
+ fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
+ fpi[i].DstReg.CondMask = COND_TR;
+
+ fpi[i].SrcReg[0].File = PROGRAM_INPUT;
+ fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
+ fpi[i].SrcReg[1].Index = tempregi;
+ fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ i++;
+
+ /* viewport transformation */
+ window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
+
+ fpi[i].Opcode = OPCODE_MAD;
+
+ fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+ fpi[i].DstReg.Index = tempregi;
+ fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
+ fpi[i].DstReg.CondMask = COND_TR;
+
+ fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
+ fpi[i].SrcReg[0].Index = tempregi;
+ fpi[i].SrcReg[0].Swizzle =
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
+ fpi[i].SrcReg[1].Index = window_index;
+ fpi[i].SrcReg[1].Swizzle =
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
+ fpi[i].SrcReg[2].Index = window_index;
+ fpi[i].SrcReg[2].Swizzle =
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+ i++;
+
+ for (; i < compiler->program->NumInstructions; ++i) {
+ int reg;
+ for (reg = 0; reg < 3; reg++) {
+ if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
+ fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
+ fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
+ fpi[i].SrcReg[reg].Index = tempregi;
+ }
+ }
+ }
+}
+
+
+static void nqssadce_init(struct nqssadce_state* s)
+{
+ s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW;
+ s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W;
+}
+
+static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
+{
+ GLuint relevant;
+ int i;
+
+ if (opcode == OPCODE_TEX ||
+ opcode == OPCODE_TXB ||
+ opcode == OPCODE_TXP ||
+ opcode == OPCODE_KIL) {
+ if (reg.Abs)
+ return GL_FALSE;
+
+ if (reg.NegateAbs)
+ reg.NegateBase ^= 15;
+
+ if (opcode == OPCODE_KIL) {
+ if (reg.Swizzle != SWIZZLE_NOOP)
+ return GL_FALSE;
+ } else {
+ for(i = 0; i < 4; ++i) {
+ GLuint swz = GET_SWZ(reg.Swizzle, i);
+ if (swz == SWIZZLE_NIL) {
+ reg.NegateBase &= ~(1 << i);
+ continue;
+ }
+ if (swz >= 4)
+ return GL_FALSE;
+ }
+ }
+
+ if (reg.NegateBase)
+ return GL_FALSE;
+
+ return GL_TRUE;
+ } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
+ /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
+ * if it doesn't fit perfectly into a .xyzw case... */
+ if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs
+ && !reg.NegateBase && !reg.NegateAbs)
+ return GL_TRUE;
+
+ return GL_FALSE;
+ } else {
+ /* ALU instructions support almost everything */
+ if (reg.Abs)
+ return GL_TRUE;
+
+ relevant = 0;
+ for(i = 0; i < 3; ++i) {
+ GLuint swz = GET_SWZ(reg.Swizzle, i);
+ if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
+ relevant |= 1 << i;
+ }
+ if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
+ return GL_FALSE;
+
+ return GL_TRUE;
+ }
+}
+
+/**
+ * Implement a MOV with a potentially non-native swizzle.
+ *
+ * The only thing we *cannot* do in an ALU instruction is per-component
+ * negation. Therefore, we split the MOV into two instructions when necessary.
+ */
+static void nqssadce_build_swizzle(struct nqssadce_state *s,
+ struct prog_dst_register dst, struct prog_src_register src)
+{
+ struct prog_instruction *inst;
+ GLuint negatebase[2] = { 0, 0 };
+ int i;
+
+ for(i = 0; i < 4; ++i) {
+ GLuint swz = GET_SWZ(src.Swizzle, i);
+ if (swz == SWIZZLE_NIL)
+ continue;
+ negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i;
+ }
+
+ _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
+ inst = s->Program->Instructions + s->IP;
+
+ for(i = 0; i <= 1; ++i) {
+ if (!negatebase[i])
+ continue;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->DstReg = dst;
+ inst->DstReg.WriteMask = negatebase[i];
+ inst->SrcReg[0] = src;
+ inst++;
+ s->IP++;
+ }
+}
+
+static GLuint build_dtm(GLuint depthmode)
+{
+ switch(depthmode) {
+ default:
+ case GL_LUMINANCE: return 0;
+ case GL_INTENSITY: return 1;
+ case GL_ALPHA: return 2;
+ }
+}
+
+static GLuint build_func(GLuint comparefunc)
+{
+ return comparefunc - GL_NEVER;
+}
+
+
+/**
+ * Collect all external state that is relevant for compiling the given
+ * fragment program.
+ */
+static void build_state(
+ r300ContextPtr r300,
+ struct r500_fragment_program *fp,
+ struct r500_fragment_program_external_state *state)
+{
+ int unit;
+
+ _mesa_bzero(state, sizeof(*state));
+
+ for(unit = 0; unit < 16; ++unit) {
+ if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) {
+ struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
+
+ state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
+ state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
+ }
+ }
+}
+
+static void dump_program(struct r500_fragment_program_code *code);
+
+void r500TranslateFragmentShader(r300ContextPtr r300,
+ struct r500_fragment_program *fp)
+{
+ struct r500_fragment_program_external_state state;
+
+ build_state(r300, fp, &state);
+ if (_mesa_memcmp(&fp->state, &state, sizeof(state))) {
+ /* TODO: cache compiled programs */
+ fp->translated = GL_FALSE;
+ _mesa_memcpy(&fp->state, &state, sizeof(state));
+ }
+
+ if (!fp->translated) {
+ struct r500_fragment_program_compiler compiler;
+
+ compiler.r300 = r300;
+ compiler.fp = fp;
+ compiler.code = &fp->code;
+ compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base);
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Compiler: Initial program:\n");
+ _mesa_print_program(compiler.program);
+ }
+
+ insert_WPOS_trailer(&compiler);
+
+ struct radeon_program_transformation transformations[] = {
+ { &transform_TEX, &compiler },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformDeriv, 0 },
+ { &radeonTransformTrigScale, 0 }
+ };
+ radeonLocalTransform(r300->radeon.glCtx, compiler.program,
+ 4, transformations);
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Compiler: after native rewrite:\n");
+ _mesa_print_program(compiler.program);
+ }
+
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &is_native_swizzle,
+ .BuildSwizzle = &nqssadce_build_swizzle,
+ .RewriteDepthOut = GL_TRUE
+ };
+ radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Compiler: after NqSSA-DCE:\n");
+ _mesa_print_program(compiler.program);
+ }
+
+ fp->translated = r500FragmentProgramEmit(&compiler);
+
+ /* Subtle: Rescue any parameters that have been added during transformations */
+ _mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
+ fp->mesa_program.Base.Parameters = compiler.program->Parameters;
+ compiler.program->Parameters = 0;
+
+ _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0);
+
+ r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ if (fp->translated) {
+ _mesa_printf("Machine-readable code:\n");
+ dump_program(&fp->code);
+ }
+ }
+
+ }
+
+ update_params(r300, fp);
+
+}
+
+static char *toswiz(int swiz_val) {
+ switch(swiz_val) {
+ case 0: return "R";
+ case 1: return "G";
+ case 2: return "B";
+ case 3: return "A";
+ case 4: return "0";
+ case 5: return "1/2";
+ case 6: return "1";
+ case 7: return "U";
+ }
+ return NULL;
+}
+
+static char *toop(int op_val)
+{
+ char *str = NULL;
+ switch (op_val) {
+ case 0: str = "MAD"; break;
+ case 1: str = "DP3"; break;
+ case 2: str = "DP4"; break;
+ case 3: str = "D2A"; break;
+ case 4: str = "MIN"; break;
+ case 5: str = "MAX"; break;
+ case 6: str = "Reserved"; break;
+ case 7: str = "CND"; break;
+ case 8: str = "CMP"; break;
+ case 9: str = "FRC"; break;
+ case 10: str = "SOP"; break;
+ case 11: str = "MDH"; break;
+ case 12: str = "MDV"; break;
+ }
+ return str;
+}
+
+static char *to_alpha_op(int op_val)
+{
+ char *str = NULL;
+ switch (op_val) {
+ case 0: str = "MAD"; break;
+ case 1: str = "DP"; break;
+ case 2: str = "MIN"; break;
+ case 3: str = "MAX"; break;
+ case 4: str = "Reserved"; break;
+ case 5: str = "CND"; break;
+ case 6: str = "CMP"; break;
+ case 7: str = "FRC"; break;
+ case 8: str = "EX2"; break;
+ case 9: str = "LN2"; break;
+ case 10: str = "RCP"; break;
+ case 11: str = "RSQ"; break;
+ case 12: str = "SIN"; break;
+ case 13: str = "COS"; break;
+ case 14: str = "MDH"; break;
+ case 15: str = "MDV"; break;
+ }
+ return str;
+}
+
+static char *to_mask(int val)
+{
+ char *str = NULL;
+ switch(val) {
+ case 0: str = "NONE"; break;
+ case 1: str = "R"; break;
+ case 2: str = "G"; break;
+ case 3: str = "RG"; break;
+ case 4: str = "B"; break;
+ case 5: str = "RB"; break;
+ case 6: str = "GB"; break;
+ case 7: str = "RGB"; break;
+ case 8: str = "A"; break;
+ case 9: str = "AR"; break;
+ case 10: str = "AG"; break;
+ case 11: str = "ARG"; break;
+ case 12: str = "AB"; break;
+ case 13: str = "ARB"; break;
+ case 14: str = "AGB"; break;
+ case 15: str = "ARGB"; break;
+ }
+ return str;
+}
+
+static char *to_texop(int val)
+{
+ switch(val) {
+ case 0: return "NOP";
+ case 1: return "LD";
+ case 2: return "TEXKILL";
+ case 3: return "PROJ";
+ case 4: return "LODBIAS";
+ case 5: return "LOD";
+ case 6: return "DXDY";
+ }
+ return NULL;
+}
+
+static void dump_program(struct r500_fragment_program_code *code)
+{
+
+ fprintf(stderr, "R500 Fragment Program:\n--------\n");
+
+ int n;
+ uint32_t inst;
+ uint32_t inst0;
+ char *str = NULL;
+
+ if (code->const_nr) {
+ fprintf(stderr, "--------\nConstants:\n");
+ for (n = 0; n < code->const_nr; n++) {
+ fprintf(stderr, "Constant %d: %i[%i]\n", n,
+ code->constant[n].File, code->constant[n].Index);
+ }
+ fprintf(stderr, "--------\n");
+ }
+
+ for (n = 0; n < code->inst_end+1; n++) {
+ inst0 = inst = code->inst[n].inst0;
+ fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
+ switch(inst & 0x3) {
+ case R500_INST_TYPE_ALU: str = "ALU"; break;
+ case R500_INST_TYPE_OUT: str = "OUT"; break;
+ case R500_INST_TYPE_FC: str = "FC"; break;
+ case R500_INST_TYPE_TEX: str = "TEX"; break;
+ };
+ fprintf(stderr,"%s %s %s %s %s ", str,
+ inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
+ inst & R500_INST_LAST ? "LAST" : "",
+ inst & R500_INST_NOP ? "NOP" : "",
+ inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
+ fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
+ to_mask((inst >> 15) & 0xf));
+
+ switch(inst0 & 0x3) {
+ case 0:
+ case 1:
+ fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
+ inst = code->inst[n].inst1;
+
+ fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+ inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+ (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+ (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+ (inst >> 30));
+
+ fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
+ inst = code->inst[n].inst2;
+ fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+ inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+ (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+ (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+ (inst >> 30));
+ fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
+ inst = code->inst[n].inst3;
+ fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
+ (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
+ (inst >> 11) & 0x3,
+ (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
+ (inst >> 24) & 0x3);
+
+
+ fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
+ inst = code->inst[n].inst4;
+ fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
+ (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+ (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
+ (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+ (inst >> 31) & 0x1);
+
+ fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
+ inst = code->inst[n].inst5;
+ fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
+ (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+ (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
+ (inst >> 23) & 0x3,
+ (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
+ break;
+ case 2:
+ break;
+ case 3:
+ inst = code->inst[n].inst1;
+ fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
+ to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
+ (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
+ inst = code->inst[n].inst2;
+ fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
+ inst & 127, inst & (1<<7) ? "(rel)" : "",
+ toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
+ toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
+ (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
+ toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
+ toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
+
+ fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
+ break;
+ }
+ fprintf(stderr,"\n");
+ }
+
+}
diff --git a/r300/r500_fragprog.h b/r300/r500_fragprog.h
new file mode 100644
index 0000000..8641cee
--- /dev/null
+++ b/r300/r500_fragprog.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ * Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R500_FRAGPROG_H_
+#define __R500_FRAGPROG_H_
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+
+#include "r300_context.h"
+#include "r300_state.h"
+#include "radeon_program.h"
+
+struct r500_fragment_program;
+
+extern void r500TranslateFragmentShader(r300ContextPtr r300,
+ struct r500_fragment_program *fp);
+
+struct r500_fragment_program_compiler {
+ r300ContextPtr r300;
+ struct r500_fragment_program *fp;
+ struct r500_fragment_program_code *code;
+ struct gl_program *program;
+};
+
+extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler);
+
+#endif
diff --git a/r300/r500_fragprog_emit.c b/r300/r500_fragprog_emit.c
new file mode 100644
index 0000000..4631235
--- /dev/null
+++ b/r300/r500_fragprog_emit.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \author Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * \todo Depth write, WPOS/FOGC inputs
+ *
+ * \todo FogOption
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include "radeon_program_pair.h"
+
+
+#define PROG_CODE \
+ struct r500_fragment_program_compiler *c = (struct r500_fragment_program_compiler*)data; \
+ struct r500_fragment_program_code *code = c->code
+
+#define error(fmt, args...) do { \
+ fprintf(stderr, "%s::%s(): " fmt "\n", \
+ __FILE__, __FUNCTION__, ##args); \
+ } while(0)
+
+
+/**
+ * Callback to register hardware constants.
+ */
+static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex)
+{
+ PROG_CODE;
+
+ for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
+ if (code->constant[*hwindex].File == file &&
+ code->constant[*hwindex].Index == idx)
+ break;
+ }
+
+ if (*hwindex >= code->const_nr) {
+ if (*hwindex >= PFS_NUM_CONST_REGS) {
+ error("Out of hw constants!\n");
+ return GL_FALSE;
+ }
+
+ code->const_nr++;
+ code->constant[*hwindex].File = file;
+ code->constant[*hwindex].Index = idx;
+ }
+
+ return GL_TRUE;
+}
+
+static GLuint translate_rgb_op(GLuint opcode)
+{
+ switch(opcode) {
+ case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+ case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+ case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
+ case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
+ case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
+ case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
+ default:
+ error("translate_rgb_op(%d): unknown opcode\n", opcode);
+ /* fall through */
+ case OPCODE_NOP:
+ /* fall through */
+ case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
+ case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
+ case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
+ case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
+ }
+}
+
+static GLuint translate_alpha_op(GLuint opcode)
+{
+ switch(opcode) {
+ case OPCODE_CMP: return R500_ALPHA_OP_CMP;
+ case OPCODE_COS: return R500_ALPHA_OP_COS;
+ case OPCODE_DDX: return R500_ALPHA_OP_MDH;
+ case OPCODE_DDY: return R500_ALPHA_OP_MDV;
+ case OPCODE_DP3: return R500_ALPHA_OP_DP;
+ case OPCODE_DP4: return R500_ALPHA_OP_DP;
+ case OPCODE_EX2: return R500_ALPHA_OP_EX2;
+ case OPCODE_FRC: return R500_ALPHA_OP_FRC;
+ case OPCODE_LG2: return R500_ALPHA_OP_LN2;
+ default:
+ error("translate_alpha_op(%d): unknown opcode\n", opcode);
+ /* fall through */
+ case OPCODE_NOP:
+ /* fall through */
+ case OPCODE_MAD: return R500_ALPHA_OP_MAD;
+ case OPCODE_MAX: return R500_ALPHA_OP_MAX;
+ case OPCODE_MIN: return R500_ALPHA_OP_MIN;
+ case OPCODE_RCP: return R500_ALPHA_OP_RCP;
+ case OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
+ case OPCODE_SIN: return R500_ALPHA_OP_SIN;
+ }
+}
+
+static GLuint fix_hw_swizzle(GLuint swz)
+{
+ if (swz == 5) swz = 6;
+ if (swz == SWIZZLE_NIL) swz = 4;
+ return swz;
+}
+
+static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg)
+{
+ GLuint t = inst->RGB.Arg[arg].Source;
+ int comp;
+ t |= inst->RGB.Arg[arg].Negate << 11;
+ t |= inst->RGB.Arg[arg].Abs << 12;
+
+ for(comp = 0; comp < 3; ++comp)
+ t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
+
+ return t;
+}
+
+static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i)
+{
+ GLuint t = inst->Alpha.Arg[i].Source;
+ t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
+ t |= inst->Alpha.Arg[i].Negate << 5;
+ t |= inst->Alpha.Arg[i].Abs << 6;
+ return t;
+}
+
+static void use_temporary(struct r500_fragment_program_code* code, GLuint index)
+{
+ if (index > code->max_temp_idx)
+ code->max_temp_idx = index;
+}
+
+static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
+{
+ if (!src.Constant)
+ use_temporary(code, src.Index);
+ return src.Index | src.Constant << 8;
+}
+
+
+/**
+ * Emit a paired ALU instruction.
+ */
+static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
+{
+ PROG_CODE;
+
+ if (code->inst_end >= 511) {
+ error("emit_alu: Too many instructions");
+ return GL_FALSE;
+ }
+
+ int ip = ++code->inst_end;
+
+ code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode);
+ code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode);
+
+ if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
+ code->inst[ip].inst0 = R500_INST_TYPE_OUT;
+ else
+ code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+ code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
+
+ code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
+ code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
+ if (inst->Alpha.DepthWriteMask) {
+ code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
+ c->fp->writes_depth = GL_TRUE;
+ }
+
+ code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
+ code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
+ use_temporary(code, inst->Alpha.DestIndex);
+ use_temporary(code, inst->RGB.DestIndex);
+
+ if (inst->RGB.Saturate)
+ code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
+ if (inst->Alpha.Saturate)
+ code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
+
+ code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
+ code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
+ code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
+
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
+
+ code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
+ code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
+ code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
+
+ code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
+ code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
+ code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
+
+ return GL_TRUE;
+}
+
+static GLuint translate_strq_swizzle(struct prog_src_register src)
+{
+ GLuint swiz = 0;
+ int i;
+ for (i = 0; i < 4; i++)
+ swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2;
+ return swiz;
+}
+
+/**
+ * Emit a single TEX instruction
+ */
+static GLboolean emit_tex(void *data, struct prog_instruction *inst)
+{
+ PROG_CODE;
+
+ if (code->inst_end >= 511) {
+ error("emit_tex: Too many instructions");
+ return GL_FALSE;
+ }
+
+ int ip = ++code->inst_end;
+
+ code->inst[ip].inst0 = R500_INST_TYPE_TEX
+ | (inst->DstReg.WriteMask << 11)
+ | R500_INST_TEX_SEM_WAIT;
+ code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
+ | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
+
+ if (inst->TexSrcTarget == TEXTURE_RECT_INDEX)
+ code->inst[ip].inst1 |= R500_TEX_UNSCALED;
+
+ switch (inst->Opcode) {
+ case OPCODE_KIL:
+ code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
+ break;
+ case OPCODE_TEX:
+ code->inst[ip].inst1 |= R500_TEX_INST_LD;
+ break;
+ case OPCODE_TXB:
+ code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
+ break;
+ case OPCODE_TXP:
+ code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
+ break;
+ default:
+ error("emit_tex can't handle opcode %x\n", inst->Opcode);
+ }
+
+ code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
+ | (translate_strq_swizzle(inst->SrcReg[0]) << 8)
+ | R500_TEX_DST_ADDR(inst->DstReg.Index)
+ | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
+ | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
+
+ return GL_TRUE;
+}
+
+static const struct radeon_pair_handler pair_handler = {
+ .EmitConst = emit_const,
+ .EmitPaired = emit_paired,
+ .EmitTex = emit_tex,
+ .MaxHwTemps = 128
+};
+
+GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler)
+{
+ struct r500_fragment_program_code *code = compiler->code;
+
+ _mesa_bzero(code, sizeof(*code));
+ code->max_temp_idx = 1;
+ code->inst_offset = 0;
+ code->inst_end = -1;
+
+ if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
+ return GL_FALSE;
+
+ if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+ /* This may happen when dead-code elimination is disabled or
+ * when most of the fragment program logic is leading to a KIL */
+ if (code->inst_end >= 511) {
+ error("Introducing fake OUT: Too many instructions");
+ return GL_FALSE;
+ }
+
+ int ip = ++code->inst_end;
+ code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
+ }
+
+ return GL_TRUE;
+}
diff --git a/r300/radeon_context.c b/r300/radeon_context.c
index e9634b4..3fc724a 100644
--- a/r300/radeon_context.c
+++ b/r300/radeon_context.c
@@ -135,6 +135,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon,
/* Fill in additional standard functions. */
radeonInitDriverFuncs(functions);
+ radeon->radeonScreen = screen;
/* Allocate and initialize the Mesa context */
if (sharedContextPrivate)
shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
@@ -156,9 +157,8 @@ GLboolean radeonInitContext(radeonContextPtr radeon,
radeon->dri.hwContext = driContextPriv->hHWContext;
radeon->dri.hwLock = &sPriv->pSAREA->lock;
radeon->dri.fd = sPriv->fd;
- radeon->dri.drmMinor = sPriv->drmMinor;
+ radeon->dri.drmMinor = sPriv->drm_version.minor;
- radeon->radeonScreen = screen;
radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
screen->sarea_priv_offset);
@@ -177,10 +177,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon,
radeon->do_usleeps ? "usleeps" : "busy waits",
fthrottle_mode, radeon->radeonScreen->irq);
- radeon->vblank_flags = (radeon->radeonScreen->irq != 0)
- ? driGetDefaultVBlankFlags(&radeon->optionCache) : VBLANK_FLAG_NO_IRQ;
-
- (*dri_interface->getUST) (&radeon->swap_ust);
+ (*sPriv->systemTime->getUST) (&radeon->swap_ust);
return GL_TRUE;
}
@@ -277,9 +274,15 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
radeon->glCtx);
if (radeon->dri.drawable != driDrawPriv) {
- driDrawableInitVBlank(driDrawPriv,
- radeon->vblank_flags,
- &radeon->vbl_seq);
+ if (driDrawPriv->swap_interval == (unsigned)-1) {
+ driDrawPriv->vblFlags =
+ (radeon->radeonScreen->irq != 0)
+ ? driGetDefaultVBlankFlags(&radeon->
+ optionCache)
+ : VBLANK_FLAG_NO_IRQ;
+
+ driDrawableInitVBlank(driDrawPriv);
+ }
}
radeon->dri.readable = driReadPriv;
diff --git a/r300/radeon_context.h b/r300/radeon_context.h
index 2f23941..7458d63 100644
--- a/r300/radeon_context.h
+++ b/r300/radeon_context.h
@@ -53,16 +53,6 @@ struct radeon_context;
typedef struct radeon_context radeonContextRec;
typedef struct radeon_context *radeonContextPtr;
-#define TEX_0 0x1
-#define TEX_1 0x2
-#define TEX_2 0x4
-#define TEX_3 0x8
-#define TEX_4 0x10
-#define TEX_5 0x20
-#define TEX_6 0x40
-#define TEX_7 0x80
-#define TEX_ALL 0xff
-
/* Rasterizing fallbacks */
/* See correponding strings in r200_swtcl.c */
#define RADEON_FALLBACK_TEXTURE 0x0001
@@ -182,10 +172,7 @@ struct radeon_context {
GLuint irqsEmitted;
drm_radeon_irq_wait_t iw;
- /* VBI / buffer swap */
- GLuint vbl_seq;
- GLuint vblank_flags;
-
+ /* buffer swap */
int64_t swap_ust;
int64_t swap_missed_ust;
diff --git a/r300/radeon_ioctl.c b/r300/radeon_ioctl.c
index 0b8656b..0c1a195 100644
--- a/r300/radeon_ioctl.c
+++ b/r300/radeon_ioctl.c
@@ -157,13 +157,14 @@ static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
/* Copy the back color buffer to the front color buffer.
*/
-void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv,
+void radeonCopyBuffer(__DRIdrawablePrivate * dPriv,
const drm_clip_rect_t * rect)
{
radeonContextPtr radeon;
GLint nbox, i, ret;
GLboolean missed_target;
int64_t ust;
+ __DRIscreenPrivate *psp = dPriv->driScreenPriv;
assert(dPriv);
assert(dPriv->driContextPriv);
@@ -187,8 +188,7 @@ void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv,
if (!rect)
{
UNLOCK_HARDWARE(radeon);
- driWaitForVBlank(dPriv, &radeon->vbl_seq, radeon->vblank_flags,
- &missed_target);
+ driWaitForVBlank(dPriv, &missed_target);
LOCK_HARDWARE(radeon);
}
@@ -215,16 +215,18 @@ void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv,
if (rect->y2 < b->y2)
b->y2 = rect->y2;
- if (b->x1 < b->x2 && b->y1 < b->y2)
- b++;
+ if (b->x1 >= b->x2 || b->y1 >= b->y2)
+ continue;
}
- else
- b++;
+ b++;
n++;
}
radeon->sarea->nbox = n;
+ if (!n)
+ continue;
+
ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP);
if (ret) {
@@ -241,7 +243,7 @@ void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv,
((r300ContextPtr)radeon)->hw.all_dirty = GL_TRUE;
radeon->swap_count++;
- (*dri_interface->getUST) (&ust);
+ (*psp->systemTime->getUST) (&ust);
if (missed_target) {
radeon->swap_missed_count++;
radeon->swap_missed_ust = ust - radeon->swap_ust;
@@ -253,11 +255,12 @@ void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv,
}
}
-void radeonPageFlip(const __DRIdrawablePrivate * dPriv)
+void radeonPageFlip(__DRIdrawablePrivate * dPriv)
{
radeonContextPtr radeon;
GLint ret;
GLboolean missed_target;
+ __DRIscreenPrivate *psp = dPriv->driScreenPriv;
assert(dPriv);
assert(dPriv->driContextPriv);
@@ -293,11 +296,10 @@ void radeonPageFlip(const __DRIdrawablePrivate * dPriv)
*/
radeonWaitForFrameCompletion(radeon);
UNLOCK_HARDWARE(radeon);
- driWaitForVBlank(dPriv, &radeon->vbl_seq, radeon->vblank_flags,
- &missed_target);
+ driWaitForVBlank(dPriv, &missed_target);
if (missed_target) {
radeon->swap_missed_count++;
- (void)(*dri_interface->getUST) (&radeon->swap_missed_ust);
+ (void)(*psp->systemTime->getUST) (&radeon->swap_missed_ust);
}
LOCK_HARDWARE(radeon);
@@ -311,7 +313,7 @@ void radeonPageFlip(const __DRIdrawablePrivate * dPriv)
}
radeon->swap_count++;
- (void)(*dri_interface->getUST) (&radeon->swap_ust);
+ (void)(*psp->systemTime->getUST) (&radeon->swap_ust);
driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer,
radeon->sarea->pfCurrentPage);
diff --git a/r300/radeon_ioctl.h b/r300/radeon_ioctl.h
index 3a80d36..210001e 100644
--- a/r300/radeon_ioctl.h
+++ b/r300/radeon_ioctl.h
@@ -46,9 +46,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#endif
#include "radeon_drm.h"
-extern void radeonCopyBuffer(const __DRIdrawablePrivate * drawable,
+extern void radeonCopyBuffer(__DRIdrawablePrivate * drawable,
const drm_clip_rect_t * rect);
-extern void radeonPageFlip(const __DRIdrawablePrivate * drawable);
+extern void radeonPageFlip(__DRIdrawablePrivate * drawable);
extern void radeonFlush(GLcontext * ctx);
extern void radeonFinish(GLcontext * ctx);
extern void radeonWaitForIdleLocked(radeonContextPtr radeon);
diff --git a/r300/radeon_lock.c b/r300/radeon_lock.c
index bc3c2d6..d54a821 100644
--- a/r300/radeon_lock.c
+++ b/r300/radeon_lock.c
@@ -68,8 +68,8 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa)
}
use_back = rmesa->glCtx->DrawBuffer ?
- (rmesa->glCtx->DrawBuffer->_ColorDrawBufferMask[0] ==
- BUFFER_BIT_BACK_LEFT) : 1;
+ (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] ==
+ BUFFER_BACK_LEFT) : 1;
use_back ^= (rmesa->sarea->pfCurrentPage == 1);
if (use_back) {
diff --git a/r300/radeon_lock.h b/r300/radeon_lock.h
index c47adc9..a344837 100644
--- a/r300/radeon_lock.h
+++ b/r300/radeon_lock.h
@@ -42,9 +42,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#ifndef __RADEON_LOCK_H__
#define __RADEON_LOCK_H__
-#if 0
-#include "r200_ioctl.h"
-#endif
#include "radeon_context.h"
extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
diff --git a/r300/radeon_nqssadce.c b/r300/radeon_nqssadce.c
new file mode 100644
index 0000000..97ce016
--- /dev/null
+++ b/r300/radeon_nqssadce.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * "Not-quite SSA" and Dead-Code Elimination.
+ *
+ * @note This code uses SWIZZLE_NIL in a source register to indicate that
+ * the corresponding component is ignored by the corresponding instruction.
+ */
+
+#include "radeon_nqssadce.h"
+
+
+/**
+ * Return the @ref register_state for the given register (or 0 for untracked
+ * registers, i.e. constants).
+ */
+static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
+{
+ switch(file) {
+ case PROGRAM_TEMPORARY: return &s->Temps[index];
+ case PROGRAM_OUTPUT: return &s->Outputs[index];
+ default: return 0;
+ }
+}
+
+
+/**
+ * Left multiplication of a register with a swizzle
+ *
+ * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
+ */
+static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
+{
+ struct prog_src_register tmp = srcreg;
+ int i;
+ tmp.Swizzle = 0;
+ tmp.NegateBase = 0;
+ for(i = 0; i < 4; ++i) {
+ GLuint swz = GET_SWZ(swizzle, i);
+ if (swz < 4) {
+ tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+ tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i;
+ } else {
+ tmp.Swizzle |= swz << (i*3);
+ }
+ }
+ return tmp;
+}
+
+
+static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
+ struct prog_instruction *inst, GLint src, GLuint sourced)
+{
+ int i;
+ GLuint deswz_source = 0;
+
+ for(i = 0; i < 4; ++i) {
+ if (GET_BIT(sourced, i)) {
+ GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
+ deswz_source |= 1 << swz;
+ } else {
+ inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
+ inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
+ }
+ }
+
+ if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
+ struct prog_dst_register dstreg = inst->DstReg;
+ dstreg.File = PROGRAM_TEMPORARY;
+ dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
+ dstreg.WriteMask = sourced;
+
+ s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
+
+ inst = s->Program->Instructions + s->IP;
+ inst->SrcReg[src].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[src].Index = dstreg.Index;
+ inst->SrcReg[src].Swizzle = 0;
+ inst->SrcReg[src].NegateBase = 0;
+ inst->SrcReg[src].Abs = 0;
+ inst->SrcReg[src].NegateAbs = 0;
+ for(i = 0; i < 4; ++i) {
+ if (GET_BIT(sourced, i))
+ inst->SrcReg[src].Swizzle |= i << (3*i);
+ else
+ inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
+ }
+ deswz_source = sourced;
+ }
+
+ struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
+ if (regstate)
+ regstate->Sourced |= deswz_source & 0xf;
+
+ return inst;
+}
+
+
+static void rewrite_depth_out(struct prog_instruction *inst)
+{
+ if (inst->DstReg.WriteMask & WRITEMASK_Z) {
+ inst->DstReg.WriteMask = WRITEMASK_W;
+ } else {
+ inst->DstReg.WriteMask = 0;
+ return;
+ }
+
+ switch (inst->Opcode) {
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ break;
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ break;
+ case OPCODE_CMP:
+ case OPCODE_MAD:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
+ break;
+ default:
+ // Scalar instructions needn't be reswizzled
+ break;
+ }
+}
+
+static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
+{
+ int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int i;
+ for(i = 0; i < nsrc; ++i)
+ if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
+ inst->SrcReg[i].Index = newindex;
+}
+
+static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
+{
+ GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
+ int ip;
+ for(ip = 0; ip < s->IP; ++ip) {
+ struct prog_instruction* inst = s->Program->Instructions + ip;
+ if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
+ inst->DstReg.Index = newindex;
+ unalias_srcregs(inst, oldindex, newindex);
+ }
+ unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
+}
+
+
+/**
+ * Handle one instruction.
+ */
+static void process_instruction(struct nqssadce_state* s)
+{
+ struct prog_instruction *inst = s->Program->Instructions + s->IP;
+
+ if (inst->Opcode == OPCODE_END)
+ return;
+
+ if (inst->Opcode != OPCODE_KIL) {
+ if (s->Descr->RewriteDepthOut) {
+ if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPR)
+ rewrite_depth_out(inst);
+ }
+
+ struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
+ if (!regstate) {
+ _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
+ inst->DstReg.File, inst->DstReg.Index);
+ return;
+ }
+
+ inst->DstReg.WriteMask &= regstate->Sourced;
+ regstate->Sourced &= ~inst->DstReg.WriteMask;
+
+ if (inst->DstReg.WriteMask == 0) {
+ _mesa_delete_instructions(s->Program, s->IP, 1);
+ return;
+ }
+
+ if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
+ unalias_temporary(s, inst->DstReg.Index);
+ }
+
+ /* Attention: Due to swizzle emulation code, the following
+ * might change the instruction stream under us, so we have
+ * to be careful with the inst pointer. */
+ switch (inst->Opcode) {
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ break;
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
+ break;
+ case OPCODE_CMP:
+ case OPCODE_MAD:
+ inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
+ inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
+ break;
+ case OPCODE_COS:
+ case OPCODE_EX2:
+ case OPCODE_LG2:
+ case OPCODE_RCP:
+ case OPCODE_RSQ:
+ case OPCODE_SIN:
+ inst = track_used_srcreg(s, inst, 0, 0x1);
+ break;
+ case OPCODE_DP3:
+ inst = track_used_srcreg(s, inst, 0, 0x7);
+ inst = track_used_srcreg(s, inst, 1, 0x7);
+ break;
+ case OPCODE_DP4:
+ inst = track_used_srcreg(s, inst, 0, 0xf);
+ inst = track_used_srcreg(s, inst, 1, 0xf);
+ break;
+ case OPCODE_KIL:
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ case OPCODE_TXP:
+ inst = track_used_srcreg(s, inst, 0, 0xf);
+ break;
+ default:
+ _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
+ return;
+ }
+}
+
+
+void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
+{
+ struct nqssadce_state s;
+
+ _mesa_bzero(&s, sizeof(s));
+ s.Ctx = ctx;
+ s.Program = p;
+ s.Descr = descr;
+ s.Descr->Init(&s);
+ s.IP = p->NumInstructions;
+
+ while(s.IP > 0) {
+ s.IP--;
+ process_instruction(&s);
+ }
+}
diff --git a/r300/radeon_nqssadce.h b/r300/radeon_nqssadce.h
new file mode 100644
index 0000000..a4f94ab
--- /dev/null
+++ b/r300/radeon_nqssadce.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_NQSSADCE_H_
+#define __RADEON_PROGRAM_NQSSADCE_H_
+
+#include "radeon_program.h"
+
+
+struct register_state {
+ /**
+ * Bitmask indicating which components of the register are sourced
+ * by later instructions.
+ */
+ GLuint Sourced : 4;
+};
+
+/**
+ * Maintain state such as which registers are used, which registers are
+ * read from, etc.
+ */
+struct nqssadce_state {
+ GLcontext *Ctx;
+ struct gl_program *Program;
+ struct radeon_nqssadce_descr *Descr;
+
+ /**
+ * All instructions after this instruction pointer have been dealt with.
+ */
+ int IP;
+
+ /**
+ * Which registers are read by subsequent instructions?
+ */
+ struct register_state Temps[MAX_PROGRAM_TEMPS];
+ struct register_state Outputs[VERT_RESULT_MAX];
+};
+
+
+/**
+ * This structure contains a description of the hardware in-so-far as
+ * it is required for the NqSSA-DCE pass.
+ */
+struct radeon_nqssadce_descr {
+ /**
+ * Fill in which outputs
+ */
+ void (*Init)(struct nqssadce_state *);
+
+ /**
+ * Check whether the given swizzle, absolute and negate combination
+ * can be implemented natively by the hardware for this opcode.
+ */
+ GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg);
+
+ /**
+ * Emit (at the current IP) the instruction MOV dst, src;
+ * The transformation will work recursively on the emitted instruction(s).
+ */
+ void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
+
+ /**
+ * Rewrite instructions that write to DEPR.z to write to DEPR.w
+ * instead (rewriting is done *before* the WriteMask test).
+ */
+ GLboolean RewriteDepthOut;
+ void *Data;
+};
+
+void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
+
+#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/r300/radeon_program.c b/r300/radeon_program.c
new file mode 100644
index 0000000..da5e7ae
--- /dev/null
+++ b/r300/radeon_program.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program.h"
+
+#include "shader/prog_print.h"
+
+
+/**
+ * Transform the given clause in the following way:
+ * 1. Replace it with an empty clause
+ * 2. For every instruction in the original clause, try the given
+ * transformations in order.
+ * 3. If one of the transformations returns GL_TRUE, assume that it
+ * has emitted the appropriate instruction(s) into the new clause;
+ * otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void radeonLocalTransform(
+ GLcontext *Ctx,
+ struct gl_program *program,
+ int num_transformations,
+ struct radeon_program_transformation* transformations)
+{
+ struct radeon_transform_context ctx;
+ int ip;
+
+ ctx.Ctx = Ctx;
+ ctx.Program = program;
+ ctx.OldInstructions = program->Instructions;
+ ctx.OldNumInstructions = program->NumInstructions;
+
+ program->Instructions = 0;
+ program->NumInstructions = 0;
+
+ for(ip = 0; ip < ctx.OldNumInstructions; ++ip) {
+ struct prog_instruction *instr = ctx.OldInstructions + ip;
+ int i;
+
+ for(i = 0; i < num_transformations; ++i) {
+ struct radeon_program_transformation* t = transformations + i;
+
+ if (t->function(&ctx, instr, t->userData))
+ break;
+ }
+
+ if (i >= num_transformations) {
+ struct prog_instruction* dest = radeonAppendInstructions(program, 1);
+ _mesa_copy_instructions(dest, instr, 1);
+ }
+ }
+
+ _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions);
+}
+
+
+static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count)
+{
+ GLuint i;
+ for (i = 0; i < count; i++) {
+ const struct prog_instruction *inst = insts + i;
+ const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint k;
+
+ for (k = 0; k < n; k++) {
+ if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
+ used[inst->SrcReg[k].Index] = GL_TRUE;
+ }
+ }
+}
+
+GLint radeonFindFreeTemporary(struct radeon_transform_context *t)
+{
+ GLboolean used[MAX_PROGRAM_TEMPS];
+ GLuint i;
+
+ _mesa_memset(used, 0, sizeof(used));
+ scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions);
+ scan_instructions(used, t->OldInstructions, t->OldNumInstructions);
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ if (!used[i])
+ return i;
+ }
+
+ return -1;
+}
+
+
+/**
+ * Append the given number of instructions to the program and return a
+ * pointer to the first new instruction.
+ */
+struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count)
+{
+ int oldnum = program->NumInstructions;
+ _mesa_insert_instructions(program, oldnum, count);
+ return program->Instructions + oldnum;
+}
diff --git a/r300/radeon_program.h b/r300/radeon_program.h
new file mode 100644
index 0000000..2e01dd4
--- /dev/null
+++ b/r300/radeon_program.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_H_
+#define __RADEON_PROGRAM_H_
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+
+
+enum {
+ CLAUSE_MIXED = 0,
+ CLAUSE_ALU,
+ CLAUSE_TEX
+};
+
+enum {
+ PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
+};
+
+enum {
+ OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */
+};
+
+#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
+#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
+
+/**
+ * Transformation context that is passed to local transformations.
+ *
+ * Care must be taken with some operations during transformation,
+ * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
+ */
+struct radeon_transform_context {
+ GLcontext *Ctx;
+ struct gl_program *Program;
+ struct prog_instruction *OldInstructions;
+ GLuint OldNumInstructions;
+};
+
+/**
+ * A transformation that can be passed to \ref radeonLocalTransform.
+ *
+ * The function will be called once for each instruction.
+ * It has to either emit the appropriate transformed code for the instruction
+ * and return GL_TRUE, or return GL_FALSE if it doesn't understand the
+ * instruction.
+ *
+ * The function gets passed the userData as last parameter.
+ */
+struct radeon_program_transformation {
+ GLboolean (*function)(
+ struct radeon_transform_context*,
+ struct prog_instruction*,
+ void*);
+ void *userData;
+};
+
+void radeonLocalTransform(
+ GLcontext* ctx,
+ struct gl_program *program,
+ int num_transformations,
+ struct radeon_program_transformation* transformations);
+
+/**
+ * Find a usable free temporary register during program transformation
+ */
+GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
+
+struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
+
+#endif
diff --git a/r300/radeon_program_alu.c b/r300/radeon_program_alu.c
new file mode 100644
index 0000000..1ef71e7
--- /dev/null
+++ b/r300/radeon_program_alu.c
@@ -0,0 +1,658 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Shareable transformations that transform "special" ALU instructions
+ * into ALU instructions that are supported by hardware.
+ *
+ */
+
+#include "radeon_program_alu.h"
+
+#include "shader/prog_parameter.h"
+
+
+static struct prog_instruction *emit1(struct gl_program* p,
+ gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
+ struct prog_src_register SrcReg)
+{
+ struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+
+ fpi->Opcode = Opcode;
+ fpi->SaturateMode = Saturate;
+ fpi->DstReg = DstReg;
+ fpi->SrcReg[0] = SrcReg;
+ return fpi;
+}
+
+static struct prog_instruction *emit2(struct gl_program* p,
+ gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
+ struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
+{
+ struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+
+ fpi->Opcode = Opcode;
+ fpi->SaturateMode = Saturate;
+ fpi->DstReg = DstReg;
+ fpi->SrcReg[0] = SrcReg0;
+ fpi->SrcReg[1] = SrcReg1;
+ return fpi;
+}
+
+static struct prog_instruction *emit3(struct gl_program* p,
+ gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
+ struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
+ struct prog_src_register SrcReg2)
+{
+ struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+
+ fpi->Opcode = Opcode;
+ fpi->SaturateMode = Saturate;
+ fpi->DstReg = DstReg;
+ fpi->SrcReg[0] = SrcReg0;
+ fpi->SrcReg[1] = SrcReg1;
+ fpi->SrcReg[2] = SrcReg2;
+ return fpi;
+}
+
+static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int swz)
+{
+ SrcReg->Swizzle &= ~(7 << (3*coordinate));
+ SrcReg->Swizzle |= swz << (3*coordinate);
+}
+
+static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate)
+{
+ SrcReg->NegateBase &= ~(1 << coordinate);
+ SrcReg->NegateBase |= (negate << coordinate);
+}
+
+static struct prog_dst_register dstreg(int file, int index)
+{
+ struct prog_dst_register dst;
+ dst.File = file;
+ dst.Index = index;
+ dst.WriteMask = WRITEMASK_XYZW;
+ dst.CondMask = COND_TR;
+ dst.CondSwizzle = SWIZZLE_NOOP;
+ dst.CondSrc = 0;
+ dst.pad = 0;
+ return dst;
+}
+
+static struct prog_dst_register dstregtmpmask(int index, int mask)
+{
+ struct prog_dst_register dst;
+ dst.File = PROGRAM_TEMPORARY;
+ dst.Index = index;
+ dst.WriteMask = mask;
+ dst.CondMask = COND_TR;
+ dst.CondSwizzle = SWIZZLE_NOOP;
+ dst.CondSrc = 0;
+ dst.pad = 0;
+ return dst;
+}
+
+static const struct prog_src_register builtin_zero = {
+ .File = PROGRAM_BUILTIN,
+ .Index = 0,
+ .Swizzle = SWIZZLE_0000
+};
+static const struct prog_src_register builtin_one = {
+ .File = PROGRAM_BUILTIN,
+ .Index = 0,
+ .Swizzle = SWIZZLE_1111
+};
+static const struct prog_src_register srcreg_undefined = {
+ .File = PROGRAM_UNDEFINED,
+ .Index = 0,
+ .Swizzle = SWIZZLE_NOOP
+};
+
+static struct prog_src_register srcreg(int file, int index)
+{
+ struct prog_src_register src = srcreg_undefined;
+ src.File = file;
+ src.Index = index;
+ return src;
+}
+
+static struct prog_src_register srcregswz(int file, int index, int swz)
+{
+ struct prog_src_register src = srcreg_undefined;
+ src.File = file;
+ src.Index = index;
+ src.Swizzle = swz;
+ return src;
+}
+
+static struct prog_src_register absolute(struct prog_src_register reg)
+{
+ struct prog_src_register newreg = reg;
+ newreg.Abs = 1;
+ newreg.NegateBase = 0;
+ newreg.NegateAbs = 0;
+ return newreg;
+}
+
+static struct prog_src_register negate(struct prog_src_register reg)
+{
+ struct prog_src_register newreg = reg;
+ newreg.NegateAbs = !newreg.NegateAbs;
+ return newreg;
+}
+
+static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w)
+{
+ struct prog_src_register swizzled = reg;
+ swizzled.Swizzle = MAKE_SWIZZLE4(
+ x >= 4 ? x : GET_SWZ(reg.Swizzle, x),
+ y >= 4 ? y : GET_SWZ(reg.Swizzle, y),
+ z >= 4 ? z : GET_SWZ(reg.Swizzle, z),
+ w >= 4 ? w : GET_SWZ(reg.Swizzle, w));
+ return swizzled;
+}
+
+static struct prog_src_register scalar(struct prog_src_register reg)
+{
+ return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+}
+
+static void transform_ABS(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ struct prog_src_register src = inst->SrcReg[0];
+ src.Abs = 1;
+ src.NegateBase = 0;
+ src.NegateAbs = 0;
+ emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src);
+}
+
+static void transform_DPH(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ struct prog_src_register src0 = inst->SrcReg[0];
+ if (src0.NegateAbs) {
+ if (src0.Abs) {
+ int tempreg = radeonFindFreeTemporary(t);
+ emit1(t->Program, OPCODE_MOV, 0, dstreg(PROGRAM_TEMPORARY, tempreg), src0);
+ src0 = srcreg(src0.File, src0.Index);
+ } else {
+ src0.NegateAbs = 0;
+ src0.NegateBase ^= NEGATE_XYZW;
+ }
+ }
+ set_swizzle(&src0, 3, SWIZZLE_ONE);
+ set_negate_base(&src0, 3, 0);
+ emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]);
+}
+
+/**
+ * [1, src0.y*src1.y, src0.z, src1.w]
+ * So basically MUL with lotsa swizzling.
+ */
+static void transform_DST(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg,
+ swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
+ swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
+}
+
+static void transform_FLR(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+ emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
+ emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg,
+ inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+}
+
+/**
+ * Definition of LIT (from ARB_fragment_program):
+ *
+ * tmp = VectorLoad(op0);
+ * if (tmp.x < 0) tmp.x = 0;
+ * if (tmp.y < 0) tmp.y = 0;
+ * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ * result.x = 1.0;
+ * result.y = tmp.x;
+ * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ * result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots, if the subsequent optimization passes are clever enough
+ * to pair instructions correctly.
+ */
+static void transform_LIT(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ static const GLfloat LitConst[4] = { -127.999999 };
+
+ GLuint constant;
+ GLuint constant_swizzle;
+ GLuint temp;
+ int needTemporary = 0;
+ struct prog_src_register srctemp;
+
+ constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
+
+ if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
+ needTemporary = 1;
+ } else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
+ // LIT is typically followed by DP3/DP4, so there's no point
+ // in creating special code for this case
+ needTemporary = 1;
+ }
+
+ if (needTemporary) {
+ temp = radeonFindFreeTemporary(t);
+ } else {
+ temp = inst->DstReg.Index;
+ }
+ srctemp = srcreg(PROGRAM_TEMPORARY, temp);
+
+ // tmp.x = max(0.0, Src.x);
+ // tmp.y = max(0.0, Src.y);
+ // tmp.w = clamp(Src.z, -128+eps, 128-eps);
+ emit2(t->Program, OPCODE_MAX, 0,
+ dstregtmpmask(temp, WRITEMASK_XYW),
+ inst->SrcReg[0],
+ swizzle(srcreg(PROGRAM_CONSTANT, constant),
+ SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
+ emit2(t->Program, OPCODE_MIN, 0,
+ dstregtmpmask(temp, WRITEMASK_Z),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
+
+ // tmp.w = Pow(tmp.y, tmp.w)
+ emit1(t->Program, OPCODE_LG2, 0,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
+ emit2(t->Program, OPCODE_MUL, 0,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
+ emit1(t->Program, OPCODE_EX2, 0,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+
+ // tmp.z = (tmp.x > 0) ? tmp.w : 0.0
+ emit3(t->Program, OPCODE_CMP, inst->SaturateMode,
+ dstregtmpmask(temp, WRITEMASK_Z),
+ negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ builtin_zero);
+
+ // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
+ emit1(t->Program, OPCODE_MOV, inst->SaturateMode,
+ dstregtmpmask(temp, WRITEMASK_XYW),
+ swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
+
+ if (needTemporary)
+ emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp);
+}
+
+static void transform_LRP(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+
+ emit2(t->Program, OPCODE_ADD, 0,
+ dstreg(PROGRAM_TEMPORARY, tempreg),
+ inst->SrcReg[1], negate(inst->SrcReg[2]));
+ emit3(t->Program, OPCODE_MAD, inst->SaturateMode,
+ inst->DstReg,
+ inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);
+}
+
+static void transform_POW(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+ struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
+ struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
+ tempdst.WriteMask = WRITEMASK_W;
+ tempsrc.Swizzle = SWIZZLE_WWWW;
+
+ emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0]));
+ emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1]));
+ emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);
+}
+
+static void transform_RSQ(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));
+}
+
+static void transform_SGE(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+
+ emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
+ emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
+ srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
+}
+
+static void transform_SLT(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+
+ emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
+ emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
+ srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
+}
+
+static void transform_SUB(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
+}
+
+static void transform_SWZ(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);
+}
+
+static void transform_XPD(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+
+ emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
+ swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
+ swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
+ emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg,
+ swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
+ swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
+ negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+}
+
+
+/**
+ * Can be used as a transformation for @ref radeonClauseLocalTransform,
+ * no userData necessary.
+ *
+ * Eliminates the following ALU instructions:
+ * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD
+ * using:
+ * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
+ *
+ * Transforms RSQ to Radeon's native RSQ by explicitly setting
+ * absolute value.
+ *
+ * @note should be applicable to R300 and R500 fragment programs.
+ */
+GLboolean radeonTransformALU(struct radeon_transform_context* t,
+ struct prog_instruction* inst,
+ void* unused)
+{
+ switch(inst->Opcode) {
+ case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
+ case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
+ case OPCODE_DST: transform_DST(t, inst); return GL_TRUE;
+ case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
+ case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
+ case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
+ case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
+ case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE;
+ case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
+ case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
+ case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
+ case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
+ case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
+ default:
+ return GL_FALSE;
+ }
+}
+
+
+static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
+{
+ static const GLfloat SinCosConsts[2][4] = {
+ {
+ 1.273239545, // 4/PI
+ -0.405284735, // -4/(PI*PI)
+ 3.141592654, // PI
+ 0.2225 // weight
+ },
+ {
+ 0.75,
+ 0.5,
+ 0.159154943, // 1/(2*PI)
+ 6.283185307 // 2*PI
+ }
+ };
+ int i;
+
+ for(i = 0; i < 2; ++i) {
+ GLuint swz;
+ constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
+ ASSERT(swz == SWIZZLE_NOOP);
+ }
+}
+
+/**
+ * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
+ *
+ * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
+ * MAD tmp.x, tmp.y, |src|, tmp.x
+ * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
+ * MAD dest, tmp.y, weight, tmp.x
+ */
+static void sin_approx(struct radeon_transform_context* t,
+ struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
+{
+ GLuint tempreg = radeonFindFreeTemporary(t);
+
+ emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ srcreg(PROGRAM_CONSTANT, constants[0]));
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+ negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
+ emit3(t->Program, OPCODE_MAD, 0, dst,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+}
+
+/**
+ * Translate the trigonometric functions COS, SIN, and SCS
+ * using only the basic instructions
+ * MOV, ADD, MUL, MAD, FRC
+ */
+GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
+ struct prog_instruction* inst,
+ void* unused)
+{
+ if (inst->Opcode != OPCODE_COS &&
+ inst->Opcode != OPCODE_SIN &&
+ inst->Opcode != OPCODE_SCS)
+ return GL_FALSE;
+
+ GLuint constants[2];
+ GLuint tempreg = radeonFindFreeTemporary(t);
+
+ sincos_constants(t, constants);
+
+ if (inst->Opcode == OPCODE_COS) {
+ // MAD tmp.x, src, 1/(2*PI), 0.75
+ // FRC tmp.x, tmp.x
+ // MAD tmp.z, tmp.x, 2*PI, -PI
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+ emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+ sin_approx(t, inst->DstReg,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ constants);
+ } else if (inst->Opcode == OPCODE_SIN) {
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
+ emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+ sin_approx(t, inst->DstReg,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ constants);
+ } else {
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
+ emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ srcreg(PROGRAM_TEMPORARY, tempreg));
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ srcreg(PROGRAM_TEMPORARY, tempreg),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+ struct prog_dst_register dst = inst->DstReg;
+
+ dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
+ sin_approx(t, dst,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ constants);
+
+ dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
+ sin_approx(t, dst,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ constants);
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * to include pre-scaling by 1/(2*PI) and taking the fractional
+ * part, so that the input to COS and SIN is always in the range [0,1).
+ * SCS is replaced by one COS and one SIN instruction.
+ *
+ * @warning This transformation implicitly changes the semantics of SIN and COS!
+ */
+GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
+ struct prog_instruction* inst,
+ void* unused)
+{
+ if (inst->Opcode != OPCODE_COS &&
+ inst->Opcode != OPCODE_SIN &&
+ inst->Opcode != OPCODE_SCS)
+ return GL_FALSE;
+
+ static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
+ GLuint temp;
+ GLuint constant;
+ GLuint constant_swizzle;
+
+ temp = radeonFindFreeTemporary(t);
+ constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
+
+ emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
+ emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
+ srcreg(PROGRAM_TEMPORARY, temp));
+
+ if (inst->Opcode == OPCODE_COS) {
+ emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg,
+ srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ } else if (inst->Opcode == OPCODE_SIN) {
+ emit1(t->Program, OPCODE_SIN, inst->SaturateMode,
+ inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ } else if (inst->Opcode == OPCODE_SCS) {
+ struct prog_dst_register moddst = inst->DstReg;
+
+ if (inst->DstReg.WriteMask & WRITEMASK_X) {
+ moddst.WriteMask = WRITEMASK_X;
+ emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst,
+ srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ }
+ if (inst->DstReg.WriteMask & WRITEMASK_Y) {
+ moddst.WriteMask = WRITEMASK_Y;
+ emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst,
+ srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ }
+ }
+
+ return GL_TRUE;
+}
+
+/**
+ * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
+ * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
+ * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
+ *
+ * @warning This explicitly changes the form of DDX and DDY!
+ */
+
+GLboolean radeonTransformDeriv(struct radeon_transform_context* t,
+ struct prog_instruction* inst,
+ void* unused)
+{
+ if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)
+ return GL_FALSE;
+
+ struct prog_src_register B = inst->SrcReg[1];
+
+ B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,
+ SWIZZLE_ONE, SWIZZLE_ONE);
+ B.NegateBase = NEGATE_XYZW;
+
+ emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,
+ inst->SrcReg[0], B);
+
+ return GL_TRUE;
+}
diff --git a/r300/radeon_program_alu.h b/r300/radeon_program_alu.h
new file mode 100644
index 0000000..b459581
--- /dev/null
+++ b/r300/radeon_program_alu.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_ALU_H_
+#define __RADEON_PROGRAM_ALU_H_
+
+#include "radeon_program.h"
+
+GLboolean radeonTransformALU(
+ struct radeon_transform_context *t,
+ struct prog_instruction*,
+ void*);
+
+GLboolean radeonTransformTrigSimple(
+ struct radeon_transform_context *t,
+ struct prog_instruction*,
+ void*);
+
+GLboolean radeonTransformTrigScale(
+ struct radeon_transform_context *t,
+ struct prog_instruction*,
+ void*);
+
+GLboolean radeonTransformDeriv(
+ struct radeon_transform_context *t,
+ struct prog_instruction*,
+ void*);
+
+#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/r300/radeon_program_pair.c b/r300/radeon_program_pair.c
new file mode 100644
index 0000000..5ad50d2
--- /dev/null
+++ b/r300/radeon_program_pair.c
@@ -0,0 +1,1001 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Perform temporary register allocation and attempt to pair off instructions
+ * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction
+ * vs. ALU instruction scheduling.
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_context.h"
+
+#include "shader/prog_print.h"
+
+#define error(fmt, args...) do { \
+ _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \
+ __FILE__, __FUNCTION__, ##args); \
+ s->Error = GL_TRUE; \
+} while(0)
+
+struct pair_state_instruction {
+ GLuint IsTex:1; /**< Is a texture instruction */
+ GLuint NeedRGB:1; /**< Needs the RGB ALU */
+ GLuint NeedAlpha:1; /**< Needs the Alpha ALU */
+ GLuint IsTranscendent:1; /**< Is a special transcendent instruction */
+
+ /**
+ * Number of (read and write) dependencies that must be resolved before
+ * this instruction can be scheduled.
+ */
+ GLuint NumDependencies:5;
+
+ /**
+ * Next instruction in the linked list of ready instructions.
+ */
+ struct pair_state_instruction *NextReady;
+
+ /**
+ * Values that this instruction writes
+ */
+ struct reg_value *Values[4];
+};
+
+
+/**
+ * Used to keep track of which instructions read a value.
+ */
+struct reg_value_reader {
+ GLuint IP; /**< IP of the instruction that performs this access */
+ struct reg_value_reader *Next;
+};
+
+/**
+ * Used to keep track which values are stored in each component of a
+ * PROGRAM_TEMPORARY.
+ */
+struct reg_value {
+ GLuint IP; /**< IP of the instruction that writes this value */
+ struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
+
+ /**
+ * Unordered linked list of instructions that read from this value.
+ */
+ struct reg_value_reader *Readers;
+
+ /**
+ * Number of readers of this value. This is calculated during @ref scan_instructions
+ * and continually decremented during code emission.
+ * When this count reaches zero, the instruction that writes the @ref Next value
+ * can be scheduled.
+ */
+ GLuint NumReaders;
+};
+
+/**
+ * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register
+ * to the proper hardware temporary.
+ */
+struct pair_register_translation {
+ GLuint Allocated:1;
+ GLuint HwIndex:8;
+ GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */
+
+ /**
+ * Notes the value that is currently contained in each component
+ * (only used for PROGRAM_TEMPORARY registers).
+ */
+ struct reg_value *Value[4];
+};
+
+struct pair_state {
+ GLcontext *Ctx;
+ struct gl_program *Program;
+ const struct radeon_pair_handler *Handler;
+ GLboolean Error;
+ GLboolean Debug;
+ GLboolean Verbose;
+ void *UserData;
+
+ /**
+ * Translate Mesa registers to hardware registers
+ */
+ struct pair_register_translation Inputs[FRAG_ATTRIB_MAX];
+ struct pair_register_translation Temps[MAX_PROGRAM_TEMPS];
+
+ /**
+ * Derived information about program instructions.
+ */
+ struct pair_state_instruction *Instructions;
+
+ struct {
+ GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
+ } HwTemps[128];
+
+ /**
+ * Linked list of instructions that can be scheduled right now,
+ * based on which ALU/TEX resources they require.
+ */
+ struct pair_state_instruction *ReadyFullALU;
+ struct pair_state_instruction *ReadyRGB;
+ struct pair_state_instruction *ReadyAlpha;
+ struct pair_state_instruction *ReadyTEX;
+
+ /**
+ * Pool of @ref reg_value structures for fast allocation.
+ */
+ struct reg_value *ValuePool;
+ GLuint ValuePoolUsed;
+ struct reg_value_reader *ReaderPool;
+ GLuint ReaderPoolUsed;
+};
+
+
+static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index)
+{
+ switch(file) {
+ case PROGRAM_TEMPORARY: return &s->Temps[index];
+ case PROGRAM_INPUT: return &s->Inputs[index];
+ default: return 0;
+ }
+}
+
+static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex)
+{
+ struct pair_register_translation *t = get_register(s, file, index);
+ ASSERT(!s->HwTemps[hwindex].RefCount);
+ ASSERT(!t->Allocated);
+ s->HwTemps[hwindex].RefCount = t->RefCount;
+ t->Allocated = 1;
+ t->HwIndex = hwindex;
+}
+
+static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index)
+{
+ GLuint hwindex;
+
+ struct pair_register_translation *t = get_register(s, file, index);
+ if (!t) {
+ _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index);
+ return 0;
+ }
+
+ if (t->Allocated)
+ return t->HwIndex;
+
+ for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex)
+ if (!s->HwTemps[hwindex].RefCount)
+ break;
+
+ if (hwindex >= s->Handler->MaxHwTemps) {
+ error("Ran out of hardware temporaries");
+ return 0;
+ }
+
+ alloc_hw_reg(s, file, index, hwindex);
+ return hwindex;
+}
+
+
+static void deref_hw_reg(struct pair_state *s, GLuint hwindex)
+{
+ if (!s->HwTemps[hwindex].RefCount) {
+ error("Hwindex %i refcount error", hwindex);
+ return;
+ }
+
+ s->HwTemps[hwindex].RefCount--;
+}
+
+static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst)
+{
+ pairinst->NextReady = *list;
+ *list = pairinst;
+}
+
+/**
+ * The instruction at the given IP has become ready. Link it into the ready
+ * instructions.
+ */
+static void instruction_ready(struct pair_state *s, int ip)
+{
+ struct pair_state_instruction *pairinst = s->Instructions + ip;
+
+ if (s->Verbose)
+ _mesa_printf("instruction_ready(%i)\n", ip);
+
+ if (pairinst->IsTex)
+ add_pairinst_to_list(&s->ReadyTEX, pairinst);
+ else if (!pairinst->NeedAlpha)
+ add_pairinst_to_list(&s->ReadyRGB, pairinst);
+ else if (!pairinst->NeedRGB)
+ add_pairinst_to_list(&s->ReadyAlpha, pairinst);
+ else
+ add_pairinst_to_list(&s->ReadyFullALU, pairinst);
+}
+
+
+/**
+ * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
+ * and reverse the order of arguments for CMP.
+ */
+static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
+{
+ struct prog_src_register tmp;
+
+ switch(inst->Opcode) {
+ case OPCODE_ADD:
+ inst->SrcReg[2] = inst->SrcReg[1];
+ inst->SrcReg[1].File = PROGRAM_BUILTIN;
+ inst->SrcReg[1].Swizzle = SWIZZLE_1111;
+ inst->SrcReg[1].NegateBase = 0;
+ inst->SrcReg[1].NegateAbs = 0;
+ inst->Opcode = OPCODE_MAD;
+ break;
+ case OPCODE_CMP:
+ tmp = inst->SrcReg[2];
+ inst->SrcReg[2] = inst->SrcReg[0];
+ inst->SrcReg[0] = tmp;
+ break;
+ case OPCODE_MOV:
+ /* AMD say we should use CMP.
+ * However, when we transform
+ * KIL -r0;
+ * into
+ * CMP tmp, -r0, -r0, 0;
+ * KIL tmp;
+ * we get incorrect behaviour on R500 when r0 == 0.0.
+ * It appears that the R500 KIL hardware treats -0.0 as less
+ * than zero.
+ */
+ inst->SrcReg[1].File = PROGRAM_BUILTIN;
+ inst->SrcReg[1].Swizzle = SWIZZLE_1111;
+ inst->SrcReg[2].File = PROGRAM_BUILTIN;
+ inst->SrcReg[2].Swizzle = SWIZZLE_0000;
+ inst->Opcode = OPCODE_MAD;
+ break;
+ case OPCODE_MUL:
+ inst->SrcReg[2].File = PROGRAM_BUILTIN;
+ inst->SrcReg[2].Swizzle = SWIZZLE_0000;
+ inst->Opcode = OPCODE_MAD;
+ break;
+ default:
+ /* nothing to do */
+ break;
+ }
+}
+
+
+/**
+ * Classify an instruction according to which ALUs etc. it needs
+ */
+static void classify_instruction(struct pair_state *s,
+ struct prog_instruction *inst, struct pair_state_instruction *pairinst)
+{
+ pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
+ pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
+
+ switch(inst->Opcode) {
+ case OPCODE_ADD:
+ case OPCODE_CMP:
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ case OPCODE_FRC:
+ case OPCODE_MAD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MOV:
+ case OPCODE_MUL:
+ break;
+ case OPCODE_COS:
+ case OPCODE_EX2:
+ case OPCODE_LG2:
+ case OPCODE_RCP:
+ case OPCODE_RSQ:
+ case OPCODE_SIN:
+ pairinst->IsTranscendent = 1;
+ pairinst->NeedAlpha = 1;
+ break;
+ case OPCODE_DP4:
+ pairinst->NeedAlpha = 1;
+ /* fall through */
+ case OPCODE_DP3:
+ pairinst->NeedRGB = 1;
+ break;
+ case OPCODE_KIL:
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ case OPCODE_TXP:
+ case OPCODE_END:
+ pairinst->IsTex = 1;
+ break;
+ default:
+ error("Unknown opcode %d\n", inst->Opcode);
+ break;
+ }
+}
+
+
+/**
+ * Count which (input, temporary) register is read and written how often,
+ * and scan the instruction stream to find dependencies.
+ */
+static void scan_instructions(struct pair_state *s)
+{
+ struct prog_instruction *inst;
+ struct pair_state_instruction *pairinst;
+ GLuint ip;
+
+ for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0;
+ inst->Opcode != OPCODE_END;
+ ++inst, ++pairinst, ++ip) {
+ final_rewrite(s, inst);
+ classify_instruction(s, inst, pairinst);
+
+ int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int j;
+ for(j = 0; j < nsrc; j++) {
+ struct pair_register_translation *t =
+ get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index);
+ if (!t)
+ continue;
+
+ t->RefCount++;
+
+ if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ int i;
+ for(i = 0; i < 4; ++i) {
+ GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i);
+ if (swz >= 4)
+ continue; /* constant or NIL swizzle */
+ if (!t->Value[swz])
+ continue; /* this is an undefined read */
+
+ /* Do not add a dependency if this instruction
+ * also rewrites the value. The code below adds
+ * a dependency for the DstReg, which is a superset
+ * of the SrcReg dependency. */
+ if (inst->DstReg.File == PROGRAM_TEMPORARY &&
+ inst->DstReg.Index == inst->SrcReg[j].Index &&
+ GET_BIT(inst->DstReg.WriteMask, swz))
+ continue;
+
+ struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++];
+ pairinst->NumDependencies++;
+ t->Value[swz]->NumReaders++;
+ r->IP = ip;
+ r->Next = t->Value[swz]->Readers;
+ t->Value[swz]->Readers = r;
+ }
+ }
+ }
+
+ int ndst = _mesa_num_inst_dst_regs(inst->Opcode);
+ if (ndst) {
+ struct pair_register_translation *t =
+ get_register(s, inst->DstReg.File, inst->DstReg.Index);
+ if (t) {
+ t->RefCount++;
+
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ int j;
+ for(j = 0; j < 4; ++j) {
+ if (!GET_BIT(inst->DstReg.WriteMask, j))
+ continue;
+
+ struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++];
+ v->IP = ip;
+ if (t->Value[j]) {
+ pairinst->NumDependencies++;
+ t->Value[j]->Next = v;
+ }
+ t->Value[j] = v;
+ pairinst->Values[j] = v;
+ }
+ }
+ }
+ }
+
+ if (s->Verbose)
+ _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);
+
+ if (!pairinst->NumDependencies)
+ instruction_ready(s, ip);
+ }
+
+ /* Clear the PROGRAM_TEMPORARY state */
+ int i, j;
+ for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) {
+ for(j = 0; j < 4; ++j)
+ s->Temps[i].Value[j] = 0;
+ }
+}
+
+
+/**
+ * Reserve hardware temporary registers for the program inputs.
+ *
+ * @note This allocation is performed explicitly, because the order of inputs
+ * is determined by the RS hardware.
+ */
+static void allocate_input_registers(struct pair_state *s)
+{
+ GLuint InputsRead = s->Program->InputsRead;
+ int i;
+ GLuint hwindex = 0;
+
+ /* Texcoords come first */
+ for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {
+ if (InputsRead & (FRAG_BIT_TEX0 << i))
+ alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
+ }
+ InputsRead &= ~FRAG_BITS_TEX_ANY;
+
+ /* fragment position treated as a texcoord */
+ if (InputsRead & FRAG_BIT_WPOS)
+ alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
+ InputsRead &= ~FRAG_BIT_WPOS;
+
+ /* Then primary colour */
+ if (InputsRead & FRAG_BIT_COL0)
+ alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL0;
+
+ /* Secondary color */
+ if (InputsRead & FRAG_BIT_COL1)
+ alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL1;
+
+ /* Anything else */
+ if (InputsRead)
+ error("Don't know how to handle inputs 0x%x\n", InputsRead);
+}
+
+
+static void decrement_dependencies(struct pair_state *s, int ip)
+{
+ struct pair_state_instruction *pairinst = s->Instructions + ip;
+ ASSERT(pairinst->NumDependencies > 0);
+ if (!--pairinst->NumDependencies)
+ instruction_ready(s, ip);
+}
+
+/**
+ * Update the dependency tracking state based on what the instruction
+ * at the given IP does.
+ */
+static void commit_instruction(struct pair_state *s, int ip)
+{
+ struct prog_instruction *inst = s->Program->Instructions + ip;
+ struct pair_state_instruction *pairinst = s->Instructions + ip;
+
+ if (s->Verbose)
+ _mesa_printf("commit_instruction(%i)\n", ip);
+
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ struct pair_register_translation *t = &s->Temps[inst->DstReg.Index];
+ deref_hw_reg(s, t->HwIndex);
+
+ int i;
+ for(i = 0; i < 4; ++i) {
+ if (!GET_BIT(inst->DstReg.WriteMask, i))
+ continue;
+
+ t->Value[i] = pairinst->Values[i];
+ if (t->Value[i]->NumReaders) {
+ struct reg_value_reader *r;
+ for(r = pairinst->Values[i]->Readers; r; r = r->Next)
+ decrement_dependencies(s, r->IP);
+ } else if (t->Value[i]->Next) {
+ /* This happens when the only reader writes
+ * the register at the same time */
+ decrement_dependencies(s, t->Value[i]->Next->IP);
+ }
+ }
+ }
+
+ int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int i;
+ for(i = 0; i < nsrc; i++) {
+ struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ if (!t)
+ continue;
+
+ deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index));
+
+ if (inst->SrcReg[i].File != PROGRAM_TEMPORARY)
+ continue;
+
+ int j;
+ for(j = 0; j < 4; ++j) {
+ GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+ if (swz >= 4)
+ continue;
+ if (!t->Value[swz])
+ continue;
+
+ /* Do not free a dependency if this instruction
+ * also rewrites the value. See scan_instructions. */
+ if (inst->DstReg.File == PROGRAM_TEMPORARY &&
+ inst->DstReg.Index == inst->SrcReg[i].Index &&
+ GET_BIT(inst->DstReg.WriteMask, swz))
+ continue;
+
+ if (!--t->Value[swz]->NumReaders) {
+ if (t->Value[swz]->Next)
+ decrement_dependencies(s, t->Value[swz]->Next->IP);
+ }
+ }
+ }
+}
+
+
+/**
+ * Emit all ready texture instructions in a single block.
+ *
+ * Emit as a single block to (hopefully) sample many textures in parallel,
+ * and to avoid hardware indirections on R300.
+ *
+ * In R500, we don't really know when the result of a texture instruction
+ * arrives. So allocate all destinations first, to make sure they do not
+ * arrive early and overwrite a texture coordinate we're going to use later
+ * in the block.
+ */
+static void emit_all_tex(struct pair_state *s)
+{
+ struct pair_state_instruction *readytex;
+ struct pair_state_instruction *pairinst;
+
+ ASSERT(s->ReadyTEX);
+
+ // Don't let the ready list change under us!
+ readytex = s->ReadyTEX;
+ s->ReadyTEX = 0;
+
+ // Allocate destination hardware registers in one block to avoid conflicts.
+ for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
+ int ip = pairinst - s->Instructions;
+ struct prog_instruction *inst = s->Program->Instructions + ip;
+ if (inst->Opcode != OPCODE_KIL)
+ get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
+ }
+
+ if (s->Debug)
+ _mesa_printf(" BEGIN_TEX\n");
+
+ if (s->Handler->BeginTexBlock)
+ s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData);
+
+ for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
+ int ip = pairinst - s->Instructions;
+ struct prog_instruction *inst = s->Program->Instructions + ip;
+ commit_instruction(s, ip);
+
+ if (inst->Opcode != OPCODE_KIL)
+ inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
+ inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
+
+ if (s->Debug) {
+ _mesa_printf(" ");
+ _mesa_print_instruction(inst);
+ }
+ s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst);
+ }
+
+ if (s->Debug)
+ _mesa_printf(" END_TEX\n");
+}
+
+
+static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair,
+ struct prog_src_register src, GLboolean rgb, GLboolean alpha)
+{
+ int candidate = -1;
+ int candidate_quality = -1;
+ int i;
+
+ if (!rgb && !alpha)
+ return 0;
+
+ GLuint constant;
+ GLuint index;
+
+ if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) {
+ constant = 0;
+ index = get_hw_reg(s, src.File, src.Index);
+ } else {
+ constant = 1;
+ s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index);
+ }
+
+ for(i = 0; i < 3; ++i) {
+ int q = 0;
+ if (rgb) {
+ if (pair->RGB.Src[i].Used) {
+ if (pair->RGB.Src[i].Constant != constant ||
+ pair->RGB.Src[i].Index != index)
+ continue;
+ q++;
+ }
+ }
+ if (alpha) {
+ if (pair->Alpha.Src[i].Used) {
+ if (pair->Alpha.Src[i].Constant != constant ||
+ pair->Alpha.Src[i].Index != index)
+ continue;
+ q++;
+ }
+ }
+ if (q > candidate_quality) {
+ candidate_quality = q;
+ candidate = i;
+ }
+ }
+
+ if (candidate >= 0) {
+ if (rgb) {
+ pair->RGB.Src[candidate].Used = 1;
+ pair->RGB.Src[candidate].Constant = constant;
+ pair->RGB.Src[candidate].Index = index;
+ }
+ if (alpha) {
+ pair->Alpha.Src[candidate].Used = 1;
+ pair->Alpha.Src[candidate].Constant = constant;
+ pair->Alpha.Src[candidate].Index = index;
+ }
+ }
+
+ return candidate;
+}
+
+/**
+ * Fill the given ALU instruction's opcodes and source operands into the given pair,
+ * if possible.
+ */
+static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
+{
+ struct pair_state_instruction *pairinst = s->Instructions + ip;
+ struct prog_instruction *inst = s->Program->Instructions + ip;
+
+ ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP);
+ ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP);
+
+ if (pairinst->NeedRGB) {
+ if (pairinst->IsTranscendent)
+ pair->RGB.Opcode = OPCODE_REPL_ALPHA;
+ else
+ pair->RGB.Opcode = inst->Opcode;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ pair->RGB.Saturate = 1;
+ }
+ if (pairinst->NeedAlpha) {
+ pair->Alpha.Opcode = inst->Opcode;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ pair->Alpha.Saturate = 1;
+ }
+
+ int nargs = _mesa_num_inst_src_regs(inst->Opcode);
+ int i;
+
+ /* Special case for DDX/DDY (MDH/MDV). */
+ if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) {
+ if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used)
+ return GL_FALSE;
+ else
+ nargs++;
+ }
+
+ for(i = 0; i < nargs; ++i) {
+ int source;
+ if (pairinst->NeedRGB && !pairinst->IsTranscendent) {
+ GLboolean srcrgb = GL_FALSE;
+ GLboolean srcalpha = GL_FALSE;
+ GLuint negatebase = 0;
+ int j;
+ for(j = 0; j < 3; ++j) {
+ GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+ if (swz < 3)
+ srcrgb = GL_TRUE;
+ else if (swz < 4)
+ srcalpha = GL_TRUE;
+ if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].NegateBase, j))
+ negatebase = 1;
+ }
+ source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
+ if (source < 0)
+ return GL_FALSE;
+ pair->RGB.Arg[i].Source = source;
+ pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
+ pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs;
+ }
+ if (pairinst->NeedAlpha) {
+ GLboolean srcrgb = GL_FALSE;
+ GLboolean srcalpha = GL_FALSE;
+ GLuint negatebase = GET_BIT(inst->SrcReg[i].NegateBase, pairinst->IsTranscendent ? 0 : 3);
+ GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3);
+ if (swz < 3)
+ srcrgb = GL_TRUE;
+ else if (swz < 4)
+ srcalpha = GL_TRUE;
+ source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
+ if (source < 0)
+ return GL_FALSE;
+ pair->Alpha.Arg[i].Source = source;
+ pair->Alpha.Arg[i].Swizzle = swz;
+ pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Fill in the destination register information.
+ *
+ * This is split from filling in source registers because we want
+ * to avoid allocating hardware temporaries for destinations until
+ * we are absolutely certain that we're going to emit a certain
+ * instruction pairing.
+ */
+static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
+{
+ struct pair_state_instruction *pairinst = s->Instructions + ip;
+ struct prog_instruction *inst = s->Program->Instructions + ip;
+
+ if (inst->DstReg.File == PROGRAM_OUTPUT) {
+ if (inst->DstReg.Index == FRAG_RESULT_COLR) {
+ pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
+ pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ } else if (inst->DstReg.Index == FRAG_RESULT_DEPR) {
+ pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ }
+ } else {
+ GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
+ if (pairinst->NeedRGB) {
+ pair->RGB.DestIndex = hwindex;
+ pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
+ }
+ if (pairinst->NeedAlpha) {
+ pair->Alpha.DestIndex = hwindex;
+ pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ }
+ }
+}
+
+
+/**
+ * Find a good ALU instruction or pair of ALU instruction and emit it.
+ *
+ * Prefer emitting full ALU instructions, so that when we reach a point
+ * where no full ALU instruction can be emitted, we have more candidates
+ * for RGB/Alpha pairing.
+ */
+static void emit_alu(struct pair_state *s)
+{
+ struct radeon_pair_instruction pair;
+
+ if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
+ int ip;
+ if (s->ReadyFullALU) {
+ ip = s->ReadyFullALU - s->Instructions;
+ s->ReadyFullALU = s->ReadyFullALU->NextReady;
+ } else if (s->ReadyRGB) {
+ ip = s->ReadyRGB - s->Instructions;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+ } else {
+ ip = s->ReadyAlpha - s->Instructions;
+ s->ReadyAlpha = s->ReadyAlpha->NextReady;
+ }
+
+ _mesa_bzero(&pair, sizeof(pair));
+ fill_instruction_into_pair(s, &pair, ip);
+ fill_dest_into_pair(s, &pair, ip);
+ commit_instruction(s, ip);
+ } else {
+ struct pair_state_instruction **prgb;
+ struct pair_state_instruction **palpha;
+
+ /* Some pairings might fail because they require too
+ * many source slots; try all possible pairings if necessary */
+ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+ for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
+ int rgbip = *prgb - s->Instructions;
+ int alphaip = *palpha - s->Instructions;
+ _mesa_bzero(&pair, sizeof(pair));
+ fill_instruction_into_pair(s, &pair, rgbip);
+ if (!fill_instruction_into_pair(s, &pair, alphaip))
+ continue;
+ *prgb = (*prgb)->NextReady;
+ *palpha = (*palpha)->NextReady;
+ fill_dest_into_pair(s, &pair, rgbip);
+ fill_dest_into_pair(s, &pair, alphaip);
+ commit_instruction(s, rgbip);
+ commit_instruction(s, alphaip);
+ goto success;
+ }
+ }
+
+ /* No success in pairing; just take the first RGB instruction */
+ int ip = s->ReadyRGB - s->Instructions;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+ _mesa_bzero(&pair, sizeof(pair));
+ fill_instruction_into_pair(s, &pair, ip);
+ fill_dest_into_pair(s, &pair, ip);
+ commit_instruction(s, ip);
+ success: ;
+ }
+
+ if (s->Debug)
+ radeonPrintPairInstruction(&pair);
+
+ s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair);
+}
+
+
+GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
+ const struct radeon_pair_handler* handler, void *userdata)
+{
+ struct pair_state s;
+
+ _mesa_bzero(&s, sizeof(s));
+ s.Ctx = ctx;
+ s.Program = program;
+ s.Handler = handler;
+ s.UserData = userdata;
+ s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
+ s.Verbose = GL_FALSE && s.Debug;
+
+ s.Instructions = (struct pair_state_instruction*)_mesa_calloc(
+ sizeof(struct pair_state_instruction)*s.Program->NumInstructions);
+ s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4);
+ s.ReaderPool = (struct reg_value_reader*)_mesa_calloc(
+ sizeof(struct reg_value_reader)*s.Program->NumInstructions*12);
+
+ if (s.Debug)
+ _mesa_printf("Emit paired program\n");
+
+ scan_instructions(&s);
+ allocate_input_registers(&s);
+
+ while(!s.Error &&
+ (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
+ if (s.ReadyTEX)
+ emit_all_tex(&s);
+
+ while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)
+ emit_alu(&s);
+ }
+
+ if (s.Debug)
+ _mesa_printf(" END\n");
+
+ _mesa_free(s.Instructions);
+ _mesa_free(s.ValuePool);
+ _mesa_free(s.ReaderPool);
+
+ return !s.Error;
+}
+
+
+static void print_pair_src(int i, struct radeon_pair_instruction_source* src)
+{
+ _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index);
+}
+
+static const char* opcode_string(GLuint opcode)
+{
+ if (opcode == OPCODE_REPL_ALPHA)
+ return "SOP";
+ else
+ return _mesa_opcode_string(opcode);
+}
+
+static int num_pairinst_args(GLuint opcode)
+{
+ if (opcode == OPCODE_REPL_ALPHA)
+ return 0;
+ else
+ return _mesa_num_inst_src_regs(opcode);
+}
+
+static char swizzle_char(GLuint swz)
+{
+ switch(swz) {
+ case SWIZZLE_X: return 'x';
+ case SWIZZLE_Y: return 'y';
+ case SWIZZLE_Z: return 'z';
+ case SWIZZLE_W: return 'w';
+ case SWIZZLE_ZERO: return '0';
+ case SWIZZLE_ONE: return '1';
+ case SWIZZLE_NIL: return '_';
+ default: return '?';
+ }
+}
+
+void radeonPrintPairInstruction(struct radeon_pair_instruction *inst)
+{
+ int nargs;
+ int i;
+
+ _mesa_printf(" RGB: ");
+ for(i = 0; i < 3; ++i) {
+ if (inst->RGB.Src[i].Used)
+ print_pair_src(i, inst->RGB.Src + i);
+ }
+ _mesa_printf("\n");
+ _mesa_printf(" Alpha:");
+ for(i = 0; i < 3; ++i) {
+ if (inst->Alpha.Src[i].Used)
+ print_pair_src(i, inst->Alpha.Src + i);
+ }
+ _mesa_printf("\n");
+
+ _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : "");
+ if (inst->RGB.WriteMask)
+ _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex,
+ (inst->RGB.WriteMask & 1) ? "x" : "",
+ (inst->RGB.WriteMask & 2) ? "y" : "",
+ (inst->RGB.WriteMask & 4) ? "z" : "");
+ if (inst->RGB.OutputWriteMask)
+ _mesa_printf(" COLOR.%s%s%s",
+ (inst->RGB.OutputWriteMask & 1) ? "x" : "",
+ (inst->RGB.OutputWriteMask & 2) ? "y" : "",
+ (inst->RGB.OutputWriteMask & 4) ? "z" : "");
+ nargs = num_pairinst_args(inst->RGB.Opcode);
+ for(i = 0; i < nargs; ++i) {
+ const char* abs = inst->RGB.Arg[i].Abs ? "|" : "";
+ const char* neg = inst->RGB.Arg[i].Negate ? "-" : "";
+ _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source,
+ swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)),
+ swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)),
+ swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)),
+ abs);
+ }
+ _mesa_printf("\n");
+
+ _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : "");
+ if (inst->Alpha.WriteMask)
+ _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex);
+ if (inst->Alpha.OutputWriteMask)
+ _mesa_printf(" COLOR.w");
+ if (inst->Alpha.DepthWriteMask)
+ _mesa_printf(" DEPTH.w");
+ nargs = num_pairinst_args(inst->Alpha.Opcode);
+ for(i = 0; i < nargs; ++i) {
+ const char* abs = inst->Alpha.Arg[i].Abs ? "|" : "";
+ const char* neg = inst->Alpha.Arg[i].Negate ? "-" : "";
+ _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source,
+ swizzle_char(inst->Alpha.Arg[i].Swizzle), abs);
+ }
+ _mesa_printf("\n");
+}
diff --git a/r300/radeon_program_pair.h b/r300/radeon_program_pair.h
new file mode 100644
index 0000000..4624a24
--- /dev/null
+++ b/r300/radeon_program_pair.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_PAIR_H_
+#define __RADEON_PROGRAM_PAIR_H_
+
+#include "radeon_program.h"
+
+
+/**
+ * Represents a paired instruction, as found in R300 and R500
+ * fragment programs.
+ */
+struct radeon_pair_instruction_source {
+ GLuint Index:8;
+ GLuint Constant:1;
+ GLuint Used:1;
+};
+
+struct radeon_pair_instruction_rgb {
+ GLuint Opcode:8;
+ GLuint DestIndex:8;
+ GLuint WriteMask:3;
+ GLuint OutputWriteMask:3;
+ GLuint Saturate:1;
+
+ struct radeon_pair_instruction_source Src[3];
+
+ struct {
+ GLuint Source:2;
+ GLuint Swizzle:9;
+ GLuint Abs:1;
+ GLuint Negate:1;
+ } Arg[3];
+};
+
+struct radeon_pair_instruction_alpha {
+ GLuint Opcode:8;
+ GLuint DestIndex:8;
+ GLuint WriteMask:1;
+ GLuint OutputWriteMask:1;
+ GLuint DepthWriteMask:1;
+ GLuint Saturate:1;
+
+ struct radeon_pair_instruction_source Src[3];
+
+ struct {
+ GLuint Source:2;
+ GLuint Swizzle:3;
+ GLuint Abs:1;
+ GLuint Negate:1;
+ } Arg[3];
+};
+
+struct radeon_pair_instruction {
+ struct radeon_pair_instruction_rgb RGB;
+ struct radeon_pair_instruction_alpha Alpha;
+};
+
+
+/**
+ *
+ */
+struct radeon_pair_handler {
+ /**
+ * Fill in the proper hardware index for the given constant register.
+ *
+ * @return GL_FALSE on error.
+ */
+ GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex);
+
+ /**
+ * Write a paired instruction to the hardware.
+ *
+ * @return GL_FALSE on error.
+ */
+ GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*);
+
+ /**
+ * Write a texture instruction to the hardware.
+ * Register indices have already been rewritten to the allocated
+ * hardware register numbers.
+ *
+ * @return GL_FALSE on error.
+ */
+ GLboolean (*EmitTex)(void*, struct prog_instruction*);
+
+ /**
+ * Called before a block of contiguous, independent texture
+ * instructions is emitted.
+ */
+ GLboolean (*BeginTexBlock)(void*);
+
+ GLuint MaxHwTemps;
+};
+
+GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
+ const struct radeon_pair_handler*, void *userdata);
+
+void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
+
+#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/r300/radeon_span.c b/r300/radeon_span.c
index eae09d6..3616d8b 100644
--- a/r300/radeon_span.c
+++ b/r300/radeon_span.c
@@ -172,6 +172,8 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
/* 16-bit depth buffer functions
*/
+#define VALUE_TYPE GLushort
+
#define WRITE_DEPTH( _x, _y, d ) \
*(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
@@ -186,6 +188,8 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
* Careful: It looks like the R300 uses ZZZS byte order while the R200
* uses SZZZ for 24 bit depth, 8 bit stencil mode.
*/
+#define VALUE_TYPE GLuint
+
#ifdef COMPILE_R300
#define WRITE_DEPTH( _x, _y, d ) \
do { \
@@ -282,6 +286,30 @@ static void radeonSpanRenderStart(GLcontext * ctx)
#endif
LOCK_HARDWARE(rmesa);
radeonWaitForIdleLocked(rmesa);
+
+ /* Read the first pixel in the frame buffer. This should
+ * be a noop, right? In fact without this conform fails as reading
+ * from the framebuffer sometimes produces old results -- the
+ * on-card read cache gets mixed up and doesn't notice that the
+ * framebuffer has been updated.
+ *
+ * Note that we should probably be reading some otherwise unused
+ * region of VRAM, otherwise we might get incorrect results when
+ * reading pixels from the top left of the screen.
+ *
+ * I found this problem on an R420 with glean's texCube test.
+ * Note that the R200 span code also *writes* the first pixel in the
+ * framebuffer, but I've found this to be unnecessary.
+ * -- Nicolai Hähnle, June 2008
+ */
+ {
+ int p;
+ driRenderbuffer *drb =
+ (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
+ volatile int *buf =
+ (volatile int *)(rmesa->dri.screen->pFB + drb->offset);
+ p = *buf;
+ }
}
static void radeonSpanRenderFinish(GLcontext * ctx)
diff --git a/r300/radeon_state.c b/r300/radeon_state.c
index 82bfd95..d81318c 100644
--- a/r300/radeon_state.c
+++ b/r300/radeon_state.c
@@ -125,8 +125,8 @@ void radeonUpdateScissor(GLcontext* ctx)
radeon->state.scissor.rect.x1 = x1;
radeon->state.scissor.rect.y1 = y1;
- radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width - 1;
- radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height - 1;
+ radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width;
+ radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height;
radeonRecalcScissorRects(radeon);
}
@@ -152,7 +152,7 @@ void radeonSetCliprects(radeonContextPtr radeon)
GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate;
GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate;
- if (draw_fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) {
+ if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
/* Can't ignore 2d windows if we are page flipping. */
if (drawable->numBackClipRects == 0 || radeon->doPageFlip ||
radeon->sarea->pfCurrentPage == 1) {
diff --git a/radeon/Makefile.am b/radeon/Makefile.am
index ee1d008..9b2fbcf 100644
--- a/radeon/Makefile.am
+++ b/radeon/Makefile.am
@@ -21,4 +21,4 @@ radeon_dri_la_SOURCES = \
radeon_swtcl.c \
radeon_span.c \
radeon_maos.c \
- radeon_sanity.c
+ radeon_sanity.c
diff --git a/radeon/radeon_chipset.h b/radeon/radeon_chipset.h
index 5c17e8f..55a73ea 100644
--- a/radeon/radeon_chipset.h
+++ b/radeon/radeon_chipset.h
@@ -106,6 +106,7 @@
#define PCI_CHIP_RV410_564F 0x564F
#define PCI_CHIP_RV410_5652 0x5652
#define PCI_CHIP_RV410_5653 0x5653
+#define PCI_CHIP_RV410_5657 0x5657
#define PCI_CHIP_RS300_5834 0x5834
#define PCI_CHIP_RS300_5835 0x5835
#define PCI_CHIP_RS480_5954 0x5954
@@ -127,7 +128,6 @@
#define PCI_CHIP_RV370_5B63 0x5B63
#define PCI_CHIP_RV370_5B64 0x5B64
#define PCI_CHIP_RV370_5B65 0x5B65
-#define PCI_CHIP_RV370_5657 0x5657
#define PCI_CHIP_RV280_5C61 0x5C61
#define PCI_CHIP_RV280_5C63 0x5C63
#define PCI_CHIP_R430_5D48 0x5D48
@@ -146,8 +146,112 @@
#define PCI_CHIP_RV410_5E4C 0x5E4C
#define PCI_CHIP_RV410_5E4D 0x5E4D
#define PCI_CHIP_RV410_5E4F 0x5E4F
+
+#define PCI_CHIP_R520_7100 0x7100
+#define PCI_CHIP_R520_7101 0x7101
+#define PCI_CHIP_R520_7102 0x7102
+#define PCI_CHIP_R520_7103 0x7103
+#define PCI_CHIP_R520_7104 0x7104
+#define PCI_CHIP_R520_7105 0x7105
+#define PCI_CHIP_R520_7106 0x7106
+#define PCI_CHIP_R520_7108 0x7108
+#define PCI_CHIP_R520_7109 0x7109
+#define PCI_CHIP_R520_710A 0x710A
+#define PCI_CHIP_R520_710B 0x710B
+#define PCI_CHIP_R520_710C 0x710C
+#define PCI_CHIP_R520_710E 0x710E
+#define PCI_CHIP_R520_710F 0x710F
+#define PCI_CHIP_RV515_7140 0x7140
+#define PCI_CHIP_RV515_7141 0x7141
+#define PCI_CHIP_RV515_7142 0x7142
+#define PCI_CHIP_RV515_7143 0x7143
+#define PCI_CHIP_RV515_7144 0x7144
+#define PCI_CHIP_RV515_7145 0x7145
+#define PCI_CHIP_RV515_7146 0x7146
+#define PCI_CHIP_RV515_7147 0x7147
+#define PCI_CHIP_RV515_7149 0x7149
+#define PCI_CHIP_RV515_714A 0x714A
+#define PCI_CHIP_RV515_714B 0x714B
+#define PCI_CHIP_RV515_714C 0x714C
+#define PCI_CHIP_RV515_714D 0x714D
+#define PCI_CHIP_RV515_714E 0x714E
+#define PCI_CHIP_RV515_714F 0x714F
+#define PCI_CHIP_RV515_7151 0x7151
+#define PCI_CHIP_RV515_7152 0x7152
+#define PCI_CHIP_RV515_7153 0x7153
+#define PCI_CHIP_RV515_715E 0x715E
+#define PCI_CHIP_RV515_715F 0x715F
+#define PCI_CHIP_RV515_7180 0x7180
+#define PCI_CHIP_RV515_7181 0x7181
+#define PCI_CHIP_RV515_7183 0x7183
+#define PCI_CHIP_RV515_7186 0x7186
+#define PCI_CHIP_RV515_7187 0x7187
+#define PCI_CHIP_RV515_7188 0x7188
+#define PCI_CHIP_RV515_718A 0x718A
+#define PCI_CHIP_RV515_718B 0x718B
+#define PCI_CHIP_RV515_718C 0x718C
+#define PCI_CHIP_RV515_718D 0x718D
+#define PCI_CHIP_RV515_718F 0x718F
+#define PCI_CHIP_RV515_7193 0x7193
+#define PCI_CHIP_RV515_7196 0x7196
+#define PCI_CHIP_RV515_719B 0x719B
+#define PCI_CHIP_RV515_719F 0x719F
+#define PCI_CHIP_RV530_71C0 0x71C0
+#define PCI_CHIP_RV530_71C1 0x71C1
+#define PCI_CHIP_RV530_71C2 0x71C2
+#define PCI_CHIP_RV530_71C3 0x71C3
+#define PCI_CHIP_RV530_71C4 0x71C4
+#define PCI_CHIP_RV530_71C5 0x71C5
+#define PCI_CHIP_RV530_71C6 0x71C6
+#define PCI_CHIP_RV530_71C7 0x71C7
+#define PCI_CHIP_RV530_71CD 0x71CD
+#define PCI_CHIP_RV530_71CE 0x71CE
+#define PCI_CHIP_RV530_71D2 0x71D2
+#define PCI_CHIP_RV530_71D4 0x71D4
+#define PCI_CHIP_RV530_71D5 0x71D5
+#define PCI_CHIP_RV530_71D6 0x71D6
+#define PCI_CHIP_RV530_71DA 0x71DA
+#define PCI_CHIP_RV530_71DE 0x71DE
+#define PCI_CHIP_RV515_7200 0x7200
+#define PCI_CHIP_RV515_7210 0x7210
+#define PCI_CHIP_RV515_7211 0x7211
+#define PCI_CHIP_R580_7240 0x7240
+#define PCI_CHIP_R580_7243 0x7243
+#define PCI_CHIP_R580_7244 0x7244
+#define PCI_CHIP_R580_7245 0x7245
+#define PCI_CHIP_R580_7246 0x7246
+#define PCI_CHIP_R580_7247 0x7247
+#define PCI_CHIP_R580_7248 0x7248
+#define PCI_CHIP_R580_7249 0x7249
+#define PCI_CHIP_R580_724A 0x724A
+#define PCI_CHIP_R580_724B 0x724B
+#define PCI_CHIP_R580_724C 0x724C
+#define PCI_CHIP_R580_724D 0x724D
+#define PCI_CHIP_R580_724E 0x724E
+#define PCI_CHIP_R580_724F 0x724F
+#define PCI_CHIP_RV570_7280 0x7280
+#define PCI_CHIP_RV560_7281 0x7281
+#define PCI_CHIP_RV560_7283 0x7283
+#define PCI_CHIP_R580_7284 0x7284
+#define PCI_CHIP_RV560_7287 0x7287
+#define PCI_CHIP_RV570_7288 0x7288
+#define PCI_CHIP_RV570_7289 0x7289
+#define PCI_CHIP_RV570_728B 0x728B
+#define PCI_CHIP_RV570_728C 0x728C
+#define PCI_CHIP_RV560_7290 0x7290
+#define PCI_CHIP_RV560_7291 0x7291
+#define PCI_CHIP_RV560_7293 0x7293
+#define PCI_CHIP_RV560_7297 0x7297
+
#define PCI_CHIP_RS350_7834 0x7834
#define PCI_CHIP_RS350_7835 0x7835
+#define PCI_CHIP_RS690_791E 0x791E
+#define PCI_CHIP_RS690_791F 0x791F
+#define PCI_CHIP_RS740_796C 0x796C
+#define PCI_CHIP_RS740_796D 0x796D
+#define PCI_CHIP_RS740_796E 0x796E
+#define PCI_CHIP_RS740_796F 0x796F
+
enum {
CHIP_FAMILY_R100,
@@ -166,6 +270,14 @@ enum {
CHIP_FAMILY_R420,
CHIP_FAMILY_RV410,
CHIP_FAMILY_RS400,
+ CHIP_FAMILY_RS690,
+ CHIP_FAMILY_RS740,
+ CHIP_FAMILY_RV515,
+ CHIP_FAMILY_R520,
+ CHIP_FAMILY_RV530,
+ CHIP_FAMILY_R580,
+ CHIP_FAMILY_RV560,
+ CHIP_FAMILY_RV570,
CHIP_FAMILY_LAST
};
diff --git a/radeon/radeon_context.c b/radeon/radeon_context.c
index 9451ec4..7aa0e3e 100644
--- a/radeon/radeon_context.c
+++ b/radeon/radeon_context.c
@@ -35,14 +35,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "glheader.h"
-#include "api_arrayelt.h"
-#include "context.h"
-#include "simple_list.h"
-#include "imports.h"
-#include "matrix.h"
-#include "extensions.h"
-#include "framebuffer.h"
+#include "main/glheader.h"
+#include "main/api_arrayelt.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/matrix.h"
+#include "main/extensions.h"
+#include "main/framebuffer.h"
+#include "main/state.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
@@ -232,14 +233,14 @@ radeonCreateContext( const __GLcontextModes *glVisual,
"def_max_anisotropy");
if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
- if ( sPriv->drmMinor < 13 )
+ if ( sPriv->drm_version.minor < 13 )
fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
- "disabling.\n",sPriv->drmMinor );
+ "disabling.\n", sPriv->drm_version.minor );
else
rmesa->using_hyperz = GL_TRUE;
}
- if ( sPriv->drmMinor >= 15 )
+ if ( sPriv->drm_version.minor >= 15 )
rmesa->texmicrotile = GL_TRUE;
/* Init default driver functions then plug in our Radeon-specific functions
@@ -270,7 +271,7 @@ radeonCreateContext( const __GLcontextModes *glVisual,
rmesa->dri.hwContext = driContextPriv->hHWContext;
rmesa->dri.hwLock = &sPriv->pSAREA->lock;
rmesa->dri.fd = sPriv->fd;
- rmesa->dri.drmMinor = sPriv->drmMinor;
+ rmesa->dri.drmMinor = sPriv->drm_version.minor;
rmesa->radeonScreen = screen;
rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
@@ -423,10 +424,7 @@ radeonCreateContext( const __GLcontextModes *glVisual,
rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
- rmesa->vblank_flags = (rmesa->radeonScreen->irq != 0)
- ? driGetDefaultVBlankFlags(&rmesa->optionCache) : VBLANK_FLAG_NO_IRQ;
-
- (*dri_interface->getUST)( & rmesa->swap_ust );
+ (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
#if DO_DEBUG
@@ -591,16 +589,18 @@ radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
if (RADEON_DEBUG & DEBUG_DRI)
fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) newCtx->glCtx);
- if ( newCtx->dri.drawable != driDrawPriv ) {
- /* XXX we may need to validate the drawable here!!! */
- driDrawableInitVBlank( driDrawPriv, newCtx->vblank_flags,
- &newCtx->vbl_seq );
- }
-
newCtx->dri.readable = driReadPriv;
if ( (newCtx->dri.drawable != driDrawPriv) ||
newCtx->lastStamp != driDrawPriv->lastStamp ) {
+ if (driDrawPriv->swap_interval == (unsigned)-1) {
+ driDrawPriv->vblFlags = (newCtx->radeonScreen->irq != 0)
+ ? driGetDefaultVBlankFlags(&newCtx->optionCache)
+ : VBLANK_FLAG_NO_IRQ;
+
+ driDrawableInitVBlank( driDrawPriv );
+ }
+
newCtx->dri.drawable = driDrawPriv;
radeonSetCliprects(newCtx);
diff --git a/radeon/radeon_context.h b/radeon/radeon_context.h
index 8dedd66..bc43fc5 100644
--- a/radeon/radeon_context.h
+++ b/radeon/radeon_context.h
@@ -161,6 +161,8 @@ struct radeon_tex_obj {
drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
/* Six, for the cube faces */
+ GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
+
GLuint pp_txfilter; /* hardware register values */
GLuint pp_txformat;
GLuint pp_txoffset; /* Image location in texmem.
@@ -667,9 +669,6 @@ struct radeon_context {
/* VBI
*/
- GLuint vbl_seq;
- GLuint vblank_flags;
-
int64_t swap_ust;
int64_t swap_missed_ust;
@@ -708,9 +707,9 @@ struct radeon_context {
#define RADEON_CONTEXT(ctx) ((radeonContextPtr)(ctx->DriverCtx))
-static __inline GLuint radeonPackColor(GLuint cpp,
- GLubyte r, GLubyte g,
- GLubyte b, GLubyte a)
+static INLINE GLuint radeonPackColor(GLuint cpp,
+ GLubyte r, GLubyte g,
+ GLubyte b, GLubyte a)
{
switch (cpp) {
case 2:
diff --git a/radeon/radeon_ioctl.c b/radeon/radeon_ioctl.c
index 4c64bc2..446025b 100644
--- a/radeon/radeon_ioctl.c
+++ b/radeon/radeon_ioctl.c
@@ -863,13 +863,14 @@ static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
/* Copy the back color buffer to the front color buffer.
*/
-void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv,
+void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
const drm_clip_rect_t *rect)
{
radeonContextPtr rmesa;
GLint nbox, i, ret;
GLboolean missed_target;
int64_t ust;
+ __DRIscreenPrivate *psp;
assert(dPriv);
assert(dPriv->driContextPriv);
@@ -891,7 +892,7 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv,
if (!rect)
{
UNLOCK_HARDWARE( rmesa );
- driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
+ driWaitForVBlank( dPriv, & missed_target );
LOCK_HARDWARE( rmesa );
}
@@ -918,16 +919,18 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv,
if (rect->y2 < b->y2)
b->y2 = rect->y2;
- if (b->x1 < b->x2 && b->y1 < b->y2)
- b++;
+ if (b->x1 >= b->x2 || b->y1 >= b->y2)
+ continue;
}
- else
- b++;
+ b++;
n++;
}
rmesa->sarea->nbox = n;
+ if (!n)
+ continue;
+
ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
if ( ret ) {
@@ -940,8 +943,9 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv,
UNLOCK_HARDWARE( rmesa );
if (!rect)
{
+ psp = dPriv->driScreenPriv;
rmesa->swap_count++;
- (*dri_interface->getUST)( & ust );
+ (*psp->systemTime->getUST)( & ust );
if ( missed_target ) {
rmesa->swap_missed_count++;
rmesa->swap_missed_ust = ust - rmesa->swap_ust;
@@ -952,17 +956,19 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv,
}
}
-void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
+void radeonPageFlip( __DRIdrawablePrivate *dPriv )
{
radeonContextPtr rmesa;
GLint ret;
GLboolean missed_target;
+ __DRIscreenPrivate *psp;
assert(dPriv);
assert(dPriv->driContextPriv);
assert(dPriv->driContextPriv->driverPrivate);
rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+ psp = dPriv->driScreenPriv;
if ( RADEON_DEBUG & DEBUG_IOCTL ) {
fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
@@ -987,10 +993,10 @@ void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
*/
radeonWaitForFrameCompletion( rmesa );
UNLOCK_HARDWARE( rmesa );
- driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
+ driWaitForVBlank( dPriv, & missed_target );
if ( missed_target ) {
rmesa->swap_missed_count++;
- (void) (*dri_interface->getUST)( & rmesa->swap_missed_ust );
+ (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
}
LOCK_HARDWARE( rmesa );
@@ -1004,7 +1010,7 @@ void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
}
rmesa->swap_count++;
- (void) (*dri_interface->getUST)( & rmesa->swap_ust );
+ (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
/* Get ready for drawing next frame. Update the renderbuffers'
* flippedOffset/Pitch fields so we draw into the right place.
diff --git a/radeon/radeon_ioctl.h b/radeon/radeon_ioctl.h
index 11a7d02..b3c287f 100644
--- a/radeon/radeon_ioctl.h
+++ b/radeon/radeon_ioctl.h
@@ -87,9 +87,9 @@ extern void radeonReleaseDmaRegion( radeonContextPtr rmesa,
struct radeon_dma_region *region,
const char *caller );
-extern void radeonCopyBuffer( const __DRIdrawablePrivate *drawable,
+extern void radeonCopyBuffer( __DRIdrawablePrivate *drawable,
const drm_clip_rect_t *rect);
-extern void radeonPageFlip( const __DRIdrawablePrivate *drawable );
+extern void radeonPageFlip( __DRIdrawablePrivate *drawable );
extern void radeonFlush( GLcontext *ctx );
extern void radeonFinish( GLcontext *ctx );
extern void radeonWaitForIdleLocked( radeonContextPtr rmesa );
@@ -124,7 +124,7 @@ do { \
memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \
rmesa->hw.ATOM.cmd_size * 4)
-static __inline int RADEON_DB_STATECHANGE(
+static INLINE int RADEON_DB_STATECHANGE(
radeonContextPtr rmesa,
struct radeon_state_atom *atom )
{
@@ -177,7 +177,7 @@ do { \
* and hang on to the lock until the critical section is finished and we flush
* the buffer again and unlock.
*/
-static __inline void radeonEnsureCmdBufSpace( radeonContextPtr rmesa,
+static INLINE void radeonEnsureCmdBufSpace( radeonContextPtr rmesa,
int bytes )
{
if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
@@ -187,7 +187,7 @@ static __inline void radeonEnsureCmdBufSpace( radeonContextPtr rmesa,
/* Alloc space in the command buffer
*/
-static __inline char *radeonAllocCmdBuf( radeonContextPtr rmesa,
+static INLINE char *radeonAllocCmdBuf( radeonContextPtr rmesa,
int bytes, const char *where )
{
if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
diff --git a/radeon/radeon_screen.c b/radeon/radeon_screen.c
index 907a987..84b5c46 100644
--- a/radeon/radeon_screen.c
+++ b/radeon/radeon_screen.c
@@ -49,6 +49,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#if !RADEON_COMMON
#include "radeon_context.h"
#include "radeon_span.h"
+#include "radeon_tex.h"
#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
#include "r200_context.h"
#include "r200_ioctl.h"
@@ -90,7 +91,7 @@ DRI_CONF_BEGIN
DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC)
DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF)
- DRI_CONF_ALLOW_LARGE_TEXTURES(1)
+ DRI_CONF_ALLOW_LARGE_TEXTURES(2)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_DEBUG
DRI_CONF_NO_RAST(false)
@@ -117,7 +118,7 @@ DRI_CONF_BEGIN
DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC)
DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF)
- DRI_CONF_ALLOW_LARGE_TEXTURES(1)
+ DRI_CONF_ALLOW_LARGE_TEXTURES(2)
DRI_CONF_TEXTURE_BLEND_QUALITY(1.0,"0.0:1.0")
DRI_CONF_SECTION_END
DRI_CONF_SECTION_DEBUG
@@ -179,7 +180,7 @@ DRI_CONF_OPT_BEGIN_V(fp_optimization,enum,def,"0:1") \
DRI_CONF_DESC_END \
DRI_CONF_OPT_END
-const char __driConfigOptions[] =
+PUBLIC const char __driConfigOptions[] =
DRI_CONF_BEGIN
DRI_CONF_SECTION_PERFORMANCE
DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN)
@@ -188,14 +189,13 @@ DRI_CONF_BEGIN
DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(8, 2, 8)
DRI_CONF_MAX_TEXTURE_COORD_UNITS(8, 2, 8)
DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
- DRI_CONF_DISABLE_FALLBACK(false)
+ DRI_CONF_DISABLE_FALLBACK(true)
DRI_CONF_DISABLE_DOUBLE_SIDE_STENCIL(false)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_QUALITY
DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
DRI_CONF_DEF_MAX_ANISOTROPY(1.0, "1.0,2.0,4.0,8.0,16.0")
- DRI_CONF_NO_NEG_LOD_BIAS(false)
- DRI_CONF_FORCE_S3TC_ENABLE(false)
+ DRI_CONF_FORCE_S3TC_ENABLE(false)
DRI_CONF_DISABLE_S3TC(false)
DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC)
@@ -206,7 +206,7 @@ DRI_CONF_BEGIN
DRI_CONF_NO_RAST(false)
DRI_CONF_SECTION_END
DRI_CONF_END;
-static const GLuint __driNConfigOptions = 18;
+static const GLuint __driNConfigOptions = 17;
#ifndef RADEON_DEBUG
int RADEON_DEBUG = 0;
@@ -244,25 +244,26 @@ radeonGetParam(int fd, int param, void *value)
{
int ret;
drm_radeon_getparam_t gp;
-
+
gp.param = param;
gp.value = value;
-
+
ret = drmCommandWriteRead( fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
return ret;
}
-static __GLcontextModes *
-radeonFillInModes( unsigned pixel_bits, unsigned depth_bits,
- unsigned stencil_bits, GLboolean have_back_buffer )
+static const __DRIconfig **
+radeonFillInModes( __DRIscreenPrivate *psp,
+ unsigned pixel_bits, unsigned depth_bits,
+ unsigned stencil_bits, GLboolean have_back_buffer )
{
- __GLcontextModes * modes;
- __GLcontextModes * m;
- unsigned num_modes;
+ __DRIconfig **configs;
+ __GLcontextModes *m;
unsigned depth_buffer_factor;
unsigned back_buffer_factor;
GLenum fb_format;
GLenum fb_type;
+ int i;
/* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy
* enough to add support. Basically, if a context is created with an
@@ -279,7 +280,7 @@ radeonFillInModes( unsigned pixel_bits, unsigned depth_bits,
depth_bits_array[0] = depth_bits;
depth_bits_array[1] = depth_bits;
-
+
/* Just like with the accumulation buffer, always provide some modes
* with a stencil buffer. It will be a sw fallback, but some apps won't
* care about that.
@@ -290,8 +291,6 @@ radeonFillInModes( unsigned pixel_bits, unsigned depth_bits,
depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
back_buffer_factor = (have_back_buffer) ? 2 : 1;
- num_modes = depth_buffer_factor * back_buffer_factor * 4;
-
if ( pixel_bits == 16 ) {
fb_format = GL_RGB;
fb_type = GL_UNSIGNED_SHORT_5_6_5;
@@ -301,21 +300,11 @@ radeonFillInModes( unsigned pixel_bits, unsigned depth_bits,
fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
}
- modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) );
- m = modes;
- if ( ! driFillInModes( & m, fb_format, fb_type,
- depth_bits_array, stencil_bits_array, depth_buffer_factor,
- back_buffer_modes, back_buffer_factor,
- GLX_TRUE_COLOR ) ) {
- fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
- __func__, __LINE__ );
- return NULL;
- }
-
- if ( ! driFillInModes( & m, fb_format, fb_type,
- depth_bits_array, stencil_bits_array, depth_buffer_factor,
- back_buffer_modes, back_buffer_factor,
- GLX_DIRECT_COLOR ) ) {
+ configs = driCreateConfigs(fb_format, fb_type,
+ depth_bits_array, stencil_bits_array,
+ depth_buffer_factor,
+ back_buffer_modes, back_buffer_factor);
+ if (configs == NULL) {
fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
__func__, __LINE__ );
return NULL;
@@ -323,15 +312,43 @@ radeonFillInModes( unsigned pixel_bits, unsigned depth_bits,
/* Mark the visual as slow if there are "fake" stencil bits.
*/
- for ( m = modes ; m != NULL ; m = m->next ) {
- if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) {
+ for (i = 0; configs[i]; i++) {
+ m = &configs[i]->modes;
+ if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
m->visualRating = GLX_SLOW_CONFIG;
}
}
- return modes;
+ return (const __DRIconfig **) configs;
}
+#if !RADEON_COMMON
+static const __DRItexOffsetExtension radeonTexOffsetExtension = {
+ { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+ radeonSetTexOffset,
+};
+#endif
+
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+static const __DRIallocateExtension r200AllocateExtension = {
+ { __DRI_ALLOCATE, __DRI_ALLOCATE_VERSION },
+ r200AllocateMemoryMESA,
+ r200FreeMemoryMESA,
+ r200GetMemoryOffsetMESA
+};
+
+static const __DRItexOffsetExtension r200texOffsetExtension = {
+ { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+ r200SetTexOffset,
+};
+#endif
+
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+static const __DRItexOffsetExtension r300texOffsetExtension = {
+ { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+ r300SetTexOffset,
+};
+#endif
/* Create the device specific screen private data struct.
*/
@@ -341,9 +358,9 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
radeonScreenPtr screen;
RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
unsigned char *RADEONMMIO;
- PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension =
- (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension"));
- void * const psc = sPriv->psc->screenConfigs;
+ int i;
+ int ret;
+ uint32_t temp;
if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n");
@@ -374,7 +391,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
int ret;
ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
&screen->gart_buffer_offset);
-
+
if (ret) {
FREE( screen );
fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
@@ -396,13 +413,13 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
return NULL;
}
- screen->drmSupportsCubeMapsR200 = (sPriv->drmMinor >= 7);
- screen->drmSupportsBlendColor = (sPriv->drmMinor >= 11);
- screen->drmSupportsTriPerf = (sPriv->drmMinor >= 16);
- screen->drmSupportsFragShader = (sPriv->drmMinor >= 18);
- screen->drmSupportsPointSprites = (sPriv->drmMinor >= 13);
- screen->drmSupportsCubeMapsR100 = (sPriv->drmMinor >= 15);
- screen->drmSupportsVertexProgram = (sPriv->drmMinor >= 25);
+ screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
+ screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
+ screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
+ screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
+ screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
+ screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
+ screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
}
screen->mmio.handle = dri_priv->registerHandle;
@@ -536,7 +553,11 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
screen->chip_family = CHIP_FAMILY_RS300;
break;
+ /* 9500 with 1 pipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */
case PCI_CHIP_R300_AD:
+ screen->chip_family = CHIP_FAMILY_RV350;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
case PCI_CHIP_R300_AE:
case PCI_CHIP_R300_AF:
case PCI_CHIP_R300_AG:
@@ -585,7 +606,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
case PCI_CHIP_RV370_5B63:
case PCI_CHIP_RV370_5B64:
case PCI_CHIP_RV370_5B65:
- case PCI_CHIP_RV370_5657:
case PCI_CHIP_RV380_3150:
case PCI_CHIP_RV380_3152:
case PCI_CHIP_RV380_3154:
@@ -635,18 +655,14 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
- /* RV410 SE chips have half the pipes of regular RV410 */
case PCI_CHIP_RV410_5E4C:
case PCI_CHIP_RV410_5E4F:
- screen->chip_family = CHIP_FAMILY_RV380;
- screen->chip_flags = RADEON_CHIPSET_TCL;
- break;
-
case PCI_CHIP_RV410_564A:
case PCI_CHIP_RV410_564B:
case PCI_CHIP_RV410_564F:
case PCI_CHIP_RV410_5652:
case PCI_CHIP_RV410_5653:
+ case PCI_CHIP_RV410_5657:
case PCI_CHIP_RV410_5E48:
case PCI_CHIP_RV410_5E4A:
case PCI_CHIP_RV410_5E4B:
@@ -664,7 +680,132 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
case PCI_CHIP_RC410_5A61:
case PCI_CHIP_RC410_5A62:
screen->chip_family = CHIP_FAMILY_RS400;
- fprintf(stderr, "Warning, xpress200 detected.\n");
+ break;
+
+ case PCI_CHIP_RS690_791E:
+ case PCI_CHIP_RS690_791F:
+ screen->chip_family = CHIP_FAMILY_RS690;
+ break;
+ case PCI_CHIP_RS740_796C:
+ case PCI_CHIP_RS740_796D:
+ case PCI_CHIP_RS740_796E:
+ case PCI_CHIP_RS740_796F:
+ screen->chip_family = CHIP_FAMILY_RS740;
+ break;
+
+ case PCI_CHIP_R520_7100:
+ case PCI_CHIP_R520_7101:
+ case PCI_CHIP_R520_7102:
+ case PCI_CHIP_R520_7103:
+ case PCI_CHIP_R520_7104:
+ case PCI_CHIP_R520_7105:
+ case PCI_CHIP_R520_7106:
+ case PCI_CHIP_R520_7108:
+ case PCI_CHIP_R520_7109:
+ case PCI_CHIP_R520_710A:
+ case PCI_CHIP_R520_710B:
+ case PCI_CHIP_R520_710C:
+ case PCI_CHIP_R520_710E:
+ case PCI_CHIP_R520_710F:
+ screen->chip_family = CHIP_FAMILY_R520;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV515_7140:
+ case PCI_CHIP_RV515_7141:
+ case PCI_CHIP_RV515_7142:
+ case PCI_CHIP_RV515_7143:
+ case PCI_CHIP_RV515_7144:
+ case PCI_CHIP_RV515_7145:
+ case PCI_CHIP_RV515_7146:
+ case PCI_CHIP_RV515_7147:
+ case PCI_CHIP_RV515_7149:
+ case PCI_CHIP_RV515_714A:
+ case PCI_CHIP_RV515_714B:
+ case PCI_CHIP_RV515_714C:
+ case PCI_CHIP_RV515_714D:
+ case PCI_CHIP_RV515_714E:
+ case PCI_CHIP_RV515_714F:
+ case PCI_CHIP_RV515_7151:
+ case PCI_CHIP_RV515_7152:
+ case PCI_CHIP_RV515_7153:
+ case PCI_CHIP_RV515_715E:
+ case PCI_CHIP_RV515_715F:
+ case PCI_CHIP_RV515_7180:
+ case PCI_CHIP_RV515_7181:
+ case PCI_CHIP_RV515_7183:
+ case PCI_CHIP_RV515_7186:
+ case PCI_CHIP_RV515_7187:
+ case PCI_CHIP_RV515_7188:
+ case PCI_CHIP_RV515_718A:
+ case PCI_CHIP_RV515_718B:
+ case PCI_CHIP_RV515_718C:
+ case PCI_CHIP_RV515_718D:
+ case PCI_CHIP_RV515_718F:
+ case PCI_CHIP_RV515_7193:
+ case PCI_CHIP_RV515_7196:
+ case PCI_CHIP_RV515_719B:
+ case PCI_CHIP_RV515_719F:
+ case PCI_CHIP_RV515_7200:
+ case PCI_CHIP_RV515_7210:
+ case PCI_CHIP_RV515_7211:
+ screen->chip_family = CHIP_FAMILY_RV515;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV530_71C0:
+ case PCI_CHIP_RV530_71C1:
+ case PCI_CHIP_RV530_71C2:
+ case PCI_CHIP_RV530_71C3:
+ case PCI_CHIP_RV530_71C4:
+ case PCI_CHIP_RV530_71C5:
+ case PCI_CHIP_RV530_71C6:
+ case PCI_CHIP_RV530_71C7:
+ case PCI_CHIP_RV530_71CD:
+ case PCI_CHIP_RV530_71CE:
+ case PCI_CHIP_RV530_71D2:
+ case PCI_CHIP_RV530_71D4:
+ case PCI_CHIP_RV530_71D5:
+ case PCI_CHIP_RV530_71D6:
+ case PCI_CHIP_RV530_71DA:
+ case PCI_CHIP_RV530_71DE:
+ screen->chip_family = CHIP_FAMILY_RV530;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_R580_7240:
+ case PCI_CHIP_R580_7243:
+ case PCI_CHIP_R580_7244:
+ case PCI_CHIP_R580_7245:
+ case PCI_CHIP_R580_7246:
+ case PCI_CHIP_R580_7247:
+ case PCI_CHIP_R580_7248:
+ case PCI_CHIP_R580_7249:
+ case PCI_CHIP_R580_724A:
+ case PCI_CHIP_R580_724B:
+ case PCI_CHIP_R580_724C:
+ case PCI_CHIP_R580_724D:
+ case PCI_CHIP_R580_724E:
+ case PCI_CHIP_R580_724F:
+ case PCI_CHIP_R580_7284:
+ screen->chip_family = CHIP_FAMILY_R580;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_RV570_7280:
+ case PCI_CHIP_RV560_7281:
+ case PCI_CHIP_RV560_7283:
+ case PCI_CHIP_RV560_7287:
+ case PCI_CHIP_RV570_7288:
+ case PCI_CHIP_RV570_7289:
+ case PCI_CHIP_RV570_728B:
+ case PCI_CHIP_RV570_728C:
+ case PCI_CHIP_RV560_7290:
+ case PCI_CHIP_RV560_7291:
+ case PCI_CHIP_RV560_7293:
+ case PCI_CHIP_RV560_7297:
+ screen->chip_family = CHIP_FAMILY_RV560;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
break;
default:
@@ -673,11 +814,19 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
return NULL;
}
if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) &&
- sPriv->ddxMinor < 2) {
+ sPriv->ddx_version.minor < 2) {
fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n");
return NULL;
}
+ if ((sPriv->drm_version.minor < 29) && (screen->chip_family >= CHIP_FAMILY_RV515)) {
+ fprintf(stderr, "R500 support requires a newer drm.\n");
+ return NULL;
+ }
+
+ if (getenv("R300_NO_TCL"))
+ screen->chip_flags &= ~RADEON_CHIPSET_TCL;
+
if (screen->chip_family <= CHIP_FAMILY_RS200)
screen->chip_flags |= RADEON_CLASS_R100;
else if (screen->chip_family <= CHIP_FAMILY_RV280)
@@ -688,9 +837,51 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
screen->cpp = dri_priv->bpp / 8;
screen->AGPMode = dri_priv->AGPMode;
- screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff ) << 16;
+ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
+ &temp);
+ if (ret) {
+ if (screen->chip_family < CHIP_FAMILY_RS690)
+ screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
+ else {
+ FREE( screen );
+ fprintf(stderr, "Unable to get fb location need newer drm\n");
+ return NULL;
+ }
+ } else {
+ screen->fbLocation = (temp & 0xffff) << 16;
+ }
+
+ if (screen->chip_family >= CHIP_FAMILY_RV515) {
+ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES,
+ &temp);
+ if (ret) {
+ fprintf(stderr, "Unable to get num_pipes, need newer drm\n");
+ switch (screen->chip_family) {
+ case CHIP_FAMILY_R300:
+ case CHIP_FAMILY_R350:
+ screen->num_gb_pipes = 2;
+ break;
+ case CHIP_FAMILY_R420:
+ case CHIP_FAMILY_R520:
+ case CHIP_FAMILY_R580:
+ case CHIP_FAMILY_RV560:
+ case CHIP_FAMILY_RV570:
+ screen->num_gb_pipes = 4;
+ break;
+ case CHIP_FAMILY_RV350:
+ case CHIP_FAMILY_RV515:
+ case CHIP_FAMILY_RV530:
+ case CHIP_FAMILY_RV410:
+ default:
+ screen->num_gb_pipes = 1;
+ break;
+ }
+ } else {
+ screen->num_gb_pipes = temp;
+ }
+ }
- if ( sPriv->drmMinor >= 10 ) {
+ if ( sPriv->drm_version.minor >= 10 ) {
drm_radeon_setparam_t sp;
sp.param = RADEON_SETPARAM_FB_LOCATION;
@@ -708,7 +899,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
screen->depthPitch = dri_priv->depthPitch;
/* Check if ddx has set up a surface reg to cover depth buffer */
- screen->depthHasSurface = (sPriv->ddxMajor > 4) ||
+ screen->depthHasSurface = (sPriv->ddx_version.major > 4) ||
/* these chips don't use tiled z without hyperz. So always pretend
we have set up a surface which will cause linear reads/writes */
((screen->chip_family & RADEON_CLASS_R100) &&
@@ -741,29 +932,34 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
dri_priv->log2GARTTexGran;
}
- if ( glx_enable_extension != NULL ) {
- if ( screen->irq != 0 ) {
- (*glx_enable_extension)( psc, "GLX_SGI_swap_control" );
- (*glx_enable_extension)( psc, "GLX_SGI_video_sync" );
- (*glx_enable_extension)( psc, "GLX_MESA_swap_control" );
- }
+ i = 0;
+ screen->extensions[i++] = &driCopySubBufferExtension.base;
+ screen->extensions[i++] = &driFrameTrackingExtension.base;
+ screen->extensions[i++] = &driReadDrawableExtension;
- (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" );
- if (IS_R200_CLASS(screen))
- (*glx_enable_extension)( psc, "GLX_MESA_allocate_memory" );
-
- (*glx_enable_extension)( psc, "GLX_MESA_copy_sub_buffer" );
- (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" );
+ if ( screen->irq != 0 ) {
+ screen->extensions[i++] = &driSwapControlExtension.base;
+ screen->extensions[i++] = &driMediaStreamCounterExtension.base;
}
+#if !RADEON_COMMON
+ screen->extensions[i++] = &radeonTexOffsetExtension.base;
+#endif
+
#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
- if (IS_R200_CLASS(screen)) {
- sPriv->psc->allocateMemory = (void *) r200AllocateMemoryMESA;
- sPriv->psc->freeMemory = (void *) r200FreeMemoryMESA;
- sPriv->psc->memoryOffset = (void *) r200GetMemoryOffsetMESA;
- }
+ if (IS_R200_CLASS(screen))
+ screen->extensions[i++] = &r200AllocateExtension.base;
+
+ screen->extensions[i++] = &r200texOffsetExtension.base;
#endif
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+ screen->extensions[i++] = &r300texOffsetExtension.base;
+#endif
+
+ screen->extensions[i++] = NULL;
+ sPriv->extensions = screen->extensions;
+
screen->driScreen = sPriv;
screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
return screen;
@@ -946,72 +1142,16 @@ static void radeonDestroyContext(__DRIcontextPrivate * driContextPriv)
#endif
-#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
-static struct __DriverAPIRec radeonAPI = {
- .InitDriver = radeonInitDriver,
- .DestroyScreen = radeonDestroyScreen,
- .CreateContext = radeonCreateContext,
- .DestroyContext = radeonDestroyContext,
- .CreateBuffer = radeonCreateBuffer,
- .DestroyBuffer = radeonDestroyBuffer,
- .SwapBuffers = radeonSwapBuffers,
- .MakeCurrent = radeonMakeCurrent,
- .UnbindContext = radeonUnbindContext,
- .GetSwapInfo = getSwapInfo,
- .GetMSC = driGetMSC32,
- .WaitForMSC = driWaitForMSC32,
- .WaitForSBC = NULL,
- .SwapBuffersMSC = NULL,
- .CopySubBuffer = radeonCopySubBuffer,
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
- .setTexOffset = r300SetTexOffset,
-#endif
-};
-#else
-static const struct __DriverAPIRec r200API = {
- .InitDriver = radeonInitDriver,
- .DestroyScreen = radeonDestroyScreen,
- .CreateContext = r200CreateContext,
- .DestroyContext = r200DestroyContext,
- .CreateBuffer = radeonCreateBuffer,
- .DestroyBuffer = radeonDestroyBuffer,
- .SwapBuffers = r200SwapBuffers,
- .MakeCurrent = r200MakeCurrent,
- .UnbindContext = r200UnbindContext,
- .GetSwapInfo = getSwapInfo,
- .GetMSC = driGetMSC32,
- .WaitForMSC = driWaitForMSC32,
- .WaitForSBC = NULL,
- .SwapBuffersMSC = NULL,
- .CopySubBuffer = r200CopySubBuffer,
- .setTexOffset = r200SetTexOffset
-};
-#endif
-
/**
- * This is the bootstrap function for the driver. libGL supplies all of the
- * requisite information about the system, and the driver initializes itself.
- * This routine also fills in the linked list pointed to by \c driver_modes
- * with the \c __GLcontextModes that the driver can support for windows or
- * pbuffers.
+ * This is the driver specific part of the createNewScreen entry point.
*
- * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on
- * failure.
+ * \todo maybe fold this into intelInitDriver
+ *
+ * \return the __GLcontextModes supported by this driver
*/
-PUBLIC void *
-__driCreateNewScreen_20050727( __DRInativeDisplay *dpy,
- int scrn, __DRIscreen *psc,
- const __GLcontextModes * modes,
- const __DRIversion * ddx_version,
- const __DRIversion * dri_version,
- const __DRIversion * drm_version,
- const __DRIframebuffer * frame_buffer,
- drmAddress pSAREA, int fd,
- int internal_api_version,
- const __DRIinterfaceMethods * interface,
- __GLcontextModes ** driver_modes )
+static const __DRIconfig **
+radeonInitScreen(__DRIscreenPrivate *psp)
{
- __DRIscreenPrivate *psp;
#if !RADEON_COMMON
static const char *driver_name = "Radeon";
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
@@ -1028,57 +1168,42 @@ __driCreateNewScreen_20050727( __DRInativeDisplay *dpy,
static const __DRIversion dri_expected = { 4, 0, 0 };
static const __DRIversion drm_expected = { 1, 24, 0 };
#endif
-
- dri_interface = interface;
+ RADEONDRIPtr dri_priv = (RADEONDRIPtr) psp->pDevPriv;
if ( ! driCheckDriDdxDrmVersions3( driver_name,
- dri_version, & dri_expected,
- ddx_version, & ddx_expected,
- drm_version, & drm_expected ) ) {
+ &psp->dri_version, & dri_expected,
+ &psp->ddx_version, & ddx_expected,
+ &psp->drm_version, & drm_expected ) ) {
return NULL;
}
-#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
- psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL,
- ddx_version, dri_version, drm_version,
- frame_buffer, pSAREA, fd,
- internal_api_version, &radeonAPI);
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
- psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL,
- ddx_version, dri_version, drm_version,
- frame_buffer, pSAREA, fd,
- internal_api_version, &r200API);
-#endif
-
- if ( psp != NULL ) {
- RADEONDRIPtr dri_priv = (RADEONDRIPtr) psp->pDevPriv;
- if (driver_modes) {
- *driver_modes = radeonFillInModes( dri_priv->bpp,
- (dri_priv->bpp == 16) ? 16 : 24,
- (dri_priv->bpp == 16) ? 0 : 8,
- (dri_priv->backOffset != dri_priv->depthOffset) );
- }
- /* Calling driInitExtensions here, with a NULL context pointer,
- * does not actually enable the extensions. It just makes sure
- * that all the dispatch offsets for all the extensions that
- * *might* be enables are known. This is needed because the
- * dispatch offsets need to be known when _mesa_context_create
- * is called, but we can't enable the extensions until we have a
- * context pointer.
- *
- * Hello chicken. Hello egg. How are you two today?
- */
- driInitExtensions( NULL, card_extensions, GL_FALSE );
+ /* Calling driInitExtensions here, with a NULL context pointer,
+ * does not actually enable the extensions. It just makes sure
+ * that all the dispatch offsets for all the extensions that
+ * *might* be enables are known. This is needed because the
+ * dispatch offsets need to be known when _mesa_context_create
+ * is called, but we can't enable the extensions until we have a
+ * context pointer.
+ *
+ * Hello chicken. Hello egg. How are you two today?
+ */
+ driInitExtensions( NULL, card_extensions, GL_FALSE );
#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
- driInitExtensions( NULL, blend_extensions, GL_FALSE );
- driInitSingleExtension( NULL, ARB_vp_extension );
- driInitSingleExtension( NULL, NV_vp_extension );
- driInitSingleExtension( NULL, ATI_fs_extension );
- driInitExtensions( NULL, point_extensions, GL_FALSE );
+ driInitExtensions( NULL, blend_extensions, GL_FALSE );
+ driInitSingleExtension( NULL, ARB_vp_extension );
+ driInitSingleExtension( NULL, NV_vp_extension );
+ driInitSingleExtension( NULL, ATI_fs_extension );
+ driInitExtensions( NULL, point_extensions, GL_FALSE );
#endif
- }
- return (void *) psp;
+ if (!radeonInitDriver(psp))
+ return NULL;
+
+ return radeonFillInModes( psp,
+ dri_priv->bpp,
+ (dri_priv->bpp == 16) ? 16 : 24,
+ (dri_priv->bpp == 16) ? 0 : 8,
+ (dri_priv->backOffset != dri_priv->depthOffset) );
}
@@ -1111,3 +1236,41 @@ getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
return 0;
}
+
+#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
+const struct __DriverAPIRec driDriverAPI = {
+ .InitScreen = radeonInitScreen,
+ .DestroyScreen = radeonDestroyScreen,
+ .CreateContext = radeonCreateContext,
+ .DestroyContext = radeonDestroyContext,
+ .CreateBuffer = radeonCreateBuffer,
+ .DestroyBuffer = radeonDestroyBuffer,
+ .SwapBuffers = radeonSwapBuffers,
+ .MakeCurrent = radeonMakeCurrent,
+ .UnbindContext = radeonUnbindContext,
+ .GetSwapInfo = getSwapInfo,
+ .GetDrawableMSC = driDrawableGetMSC32,
+ .WaitForMSC = driWaitForMSC32,
+ .WaitForSBC = NULL,
+ .SwapBuffersMSC = NULL,
+ .CopySubBuffer = radeonCopySubBuffer,
+};
+#else
+const struct __DriverAPIRec driDriverAPI = {
+ .InitScreen = radeonInitScreen,
+ .DestroyScreen = radeonDestroyScreen,
+ .CreateContext = r200CreateContext,
+ .DestroyContext = r200DestroyContext,
+ .CreateBuffer = radeonCreateBuffer,
+ .DestroyBuffer = radeonDestroyBuffer,
+ .SwapBuffers = r200SwapBuffers,
+ .MakeCurrent = r200MakeCurrent,
+ .UnbindContext = r200UnbindContext,
+ .GetSwapInfo = getSwapInfo,
+ .GetDrawableMSC = driDrawableGetMSC32,
+ .WaitForMSC = driWaitForMSC32,
+ .WaitForSBC = NULL,
+ .SwapBuffersMSC = NULL,
+ .CopySubBuffer = r200CopySubBuffer,
+};
+#endif
diff --git a/radeon/radeon_screen.h b/radeon/radeon_screen.h
index 25e6fcf..ab859d5 100644
--- a/radeon/radeon_screen.h
+++ b/radeon/radeon_screen.h
@@ -103,6 +103,10 @@ typedef struct {
/* Configuration cache with default values for all contexts */
driOptionCache optionCache;
+
+ const __DRIextension *extensions[8];
+
+ int num_gb_pipes;
} radeonScreenRec, *radeonScreenPtr;
#define IS_R100_CLASS(screen) \
diff --git a/radeon/radeon_span.c b/radeon/radeon_span.c
index 732a85e..e7ab367 100644
--- a/radeon/radeon_span.c
+++ b/radeon/radeon_span.c
@@ -173,6 +173,8 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
/* 16-bit depth buffer functions
*/
+#define VALUE_TYPE GLushort
+
#define WRITE_DEPTH( _x, _y, d ) \
*(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
@@ -187,6 +189,8 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
* Careful: It looks like the R300 uses ZZZS byte order while the R200
* uses SZZZ for 24 bit depth, 8 bit stencil mode.
*/
+#define VALUE_TYPE GLuint
+
#ifdef COMPILE_R300
#define WRITE_DEPTH( _x, _y, d ) \
do { \
diff --git a/radeon/radeon_state.c b/radeon/radeon_state.c
index 4de05c7..ae83c91 100644
--- a/radeon/radeon_state.c
+++ b/radeon/radeon_state.c
@@ -1640,8 +1640,7 @@ void radeonSetCliprects( radeonContextPtr rmesa )
GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
- if (draw_fb->_ColorDrawBufferMask[0]
- == BUFFER_BIT_BACK_LEFT) {
+ if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
/* Can't ignore 2d windows if we are page flipping.
*/
if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
@@ -1693,17 +1692,18 @@ static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
RADEON_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */
- /*
- * _ColorDrawBufferMask is easier to cope with than <mode>.
- * Check for software fallback, update cliprects.
- */
- switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
- case BUFFER_BIT_FRONT_LEFT:
- case BUFFER_BIT_BACK_LEFT:
+ if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+ /* 0 (GL_NONE) buffers or multiple color drawing buffers */
+ FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
+ return;
+ }
+
+ switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+ case BUFFER_FRONT_LEFT:
+ case BUFFER_BACK_LEFT:
FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE );
break;
default:
- /* 0 (GL_NONE) buffers or multiple color drawing buffers */
FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
return;
}
@@ -2222,11 +2222,11 @@ radeonUpdateDrawBuffer(GLcontext *ctx)
struct gl_framebuffer *fb = ctx->DrawBuffer;
driRenderbuffer *drb;
- if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
+ if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
/* draw to front */
drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
}
- else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) {
+ else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
/* draw to back */
drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
}
diff --git a/radeon/radeon_tex.h b/radeon/radeon_tex.h
index a806981..0b955ed 100644
--- a/radeon/radeon_tex.h
+++ b/radeon/radeon_tex.h
@@ -38,6 +38,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#ifndef __RADEON_TEX_H__
#define __RADEON_TEX_H__
+extern void radeonSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth,
+ GLuint pitch);
+
extern void radeonUpdateTextureState( GLcontext *ctx );
extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t,
diff --git a/radeon/radeon_texmem.c b/radeon/radeon_texmem.c
index 20f25dd..0a3d745 100644
--- a/radeon/radeon_texmem.c
+++ b/radeon/radeon_texmem.c
@@ -334,7 +334,7 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint fac
{
int numLevels;
- if ( !t || t->base.totalSize == 0 )
+ if ( !t || t->base.totalSize == 0 || t->image_override )
return 0;
if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
diff --git a/radeon/radeon_texstate.c b/radeon/radeon_texstate.c
index 37bb749..ecd3754 100644
--- a/radeon/radeon_texstate.c
+++ b/radeon/radeon_texstate.c
@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "context.h"
#include "macros.h"
#include "texformat.h"
+#include "texobj.h"
#include "enums.h"
#include "radeon_context.h"
@@ -84,7 +85,7 @@ tx_table[] =
_ALPHA_REV(RGBA8888),
_ALPHA(ARGB8888),
_ALPHA_REV(ARGB8888),
- _INVALID(RGB888),
+ [ MESA_FORMAT_RGB888 ] = { RADEON_TXFORMAT_ARGB8888, 0 },
_COLOR(RGB565),
_COLOR_REV(RGB565),
_ALPHA(ARGB4444),
@@ -134,18 +135,19 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
/* Set the hardware texture format
*/
-
- t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
- RADEON_TXFORMAT_ALPHA_IN_MAP);
- t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
-
- if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
- t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
- t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
- }
- else {
- _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
- return;
+ if ( !t->image_override ) {
+ t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
+ RADEON_TXFORMAT_ALPHA_IN_MAP);
+ t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
+
+ if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
+ t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
+ t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
+ }
+ else {
+ _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
+ return;
+ }
}
texelBytes = baseImage->TexFormat->TexelBytes;
@@ -341,11 +343,13 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
* requires 64-byte aligned pitches, and we may/may not need the
* blitter. NPOT only!
*/
- if (baseImage->IsCompressed)
- t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
- else
- t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
- t->pp_txpitch -= 32;
+ if ( !t->image_override ) {
+ if (baseImage->IsCompressed)
+ t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
+ else
+ t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
+ t->pp_txpitch -= 32;
+ }
t->dirty_state = TEX_ALL;
@@ -840,6 +844,44 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
return GL_TRUE;
}
+void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth, GLuint pitch)
+{
+ radeonContextPtr rmesa = pDRICtx->driverPrivate;
+ struct gl_texture_object *tObj =
+ _mesa_lookup_texture(rmesa->glCtx, texname);
+ radeonTexObjPtr t;
+
+ if (tObj == NULL)
+ return;
+
+ t = (radeonTexObjPtr) tObj->DriverData;
+
+ t->image_override = GL_TRUE;
+
+ if (!offset)
+ return;
+
+ t->pp_txoffset = offset;
+ t->pp_txpitch = pitch - 32;
+
+ switch (depth) {
+ case 32:
+ t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format;
+ t->pp_txfilter |= tx_table[MESA_FORMAT_ARGB8888].filter;
+ break;
+ case 24:
+ default:
+ t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format;
+ t->pp_txfilter |= tx_table[MESA_FORMAT_RGB888].filter;
+ break;
+ case 16:
+ t->pp_txformat = tx_table[MESA_FORMAT_RGB565].format;
+ t->pp_txfilter |= tx_table[MESA_FORMAT_RGB565].filter;
+ break;
+ }
+}
+
#define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK | \
RADEON_MIN_FILTER_MASK | \
RADEON_MAG_FILTER_MASK | \
@@ -1136,7 +1178,7 @@ static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
RADEON_FIREVERTICES( rmesa );
radeonSetTexImages( rmesa, tObj );
radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
- if ( !t->base.memBlock )
+ if ( !t->base.memBlock && !t->image_override )
return GL_FALSE;
}
@@ -1203,7 +1245,8 @@ static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
RADEON_FIREVERTICES( rmesa );
radeonSetTexImages( rmesa, tObj );
radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
- if ( !t->base.memBlock /* && !rmesa->prefer_gart_client_texturing FIXME */ ) {
+ if ( !t->base.memBlock &&
+ !t->image_override /* && !rmesa->prefer_gart_client_texturing FIXME */ ) {
fprintf(stderr, "%s: upload failed\n", __FUNCTION__);
return GL_FALSE;
}