diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-03-14 07:16:11 +0100 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-03-14 07:16:11 +0100 |
commit | 7cf4bebfd2c646b649d14e31c40098fec0d2278b (patch) | |
tree | 083117c8b3567cd09471d486d53c972fb9d203e9 | |
parent | 50d4922305e925896a71e705c438ededbaedb80f (diff) |
Import radeon, r200 and r300 dri drivers from Mesa 7.1.0.7.1.0
89 files changed, 12023 insertions, 6382 deletions
diff --git a/configure.ac b/configure.ac index c6e6dfe..46dce4c 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Process this file with autoconf to produce a configure script AC_PREREQ(2.57) -AC_INIT([mesa-dri-radeon], 7.0.3, [], mesa-dri-radeon) +AC_INIT([mesa-dri-radeon], 7.1.0, [], mesa-dri-radeon) AM_INIT_AUTOMAKE([dist-bzip2]) @@ -16,8 +16,8 @@ AC_PROG_CC AC_HEADER_STDC PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0]) -PKG_CHECK_MODULES([DRI], [libmesadri >= 7.0.3 libmesadri < 7.1.0 - libmesadricommon >= 7.0.3 libmesadricommon < 7.1.0]) +PKG_CHECK_MODULES([DRI], [libmesadri >= 7.1.0 libmesadri < 7.3.0 + libmesadricommon >= 7.1.0 libmesadricommon < 7.3.0]) AC_OUTPUT([ Makefile diff --git a/r200/r200_cmdbuf.c b/r200/r200_cmdbuf.c index 2920cea..c1d51e8 100644 --- a/r200/r200_cmdbuf.c +++ b/r200/r200_cmdbuf.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_cmdbuf.c,v 1.1 2002/10/30 12:51:51 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_context.c b/r200/r200_context.c index 786a298..c567349 100644 --- a/r200/r200_context.c +++ b/r200/r200_context.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_context.c,v 1.3 2003/05/06 23:52:08 daenzer Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. @@ -70,6 +69,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_ATI_fragment_shader #define need_GL_EXT_blend_minmax #define need_GL_EXT_fog_coord +#define need_GL_EXT_multi_draw_arrays #define need_GL_EXT_secondary_color #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate @@ -133,6 +133,7 @@ const struct dri_extension card_extensions[] = { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, { "GL_EXT_blend_subtract", NULL }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_edge_clamp", NULL }, @@ -278,14 +279,14 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, "def_max_anisotropy"); if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) { - if ( sPriv->drmMinor < 13 ) + if ( sPriv->drm_version.minor < 13 ) fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " - "disabling.\n",sPriv->drmMinor ); + "disabling.\n", sPriv->drm_version.minor ); else rmesa->using_hyperz = GL_TRUE; } - if ( sPriv->drmMinor >= 15 ) + if ( sPriv->drm_version.minor >= 15 ) rmesa->texmicrotile = GL_TRUE; /* Init default driver functions then plug in our R200-specific functions @@ -318,7 +319,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, rmesa->dri.hwContext = driContextPriv->hHWContext; rmesa->dri.hwLock = &sPriv->pSAREA->lock; rmesa->dri.fd = sPriv->fd; - rmesa->dri.drmMinor = sPriv->drmMinor; + rmesa->dri.drmMinor = sPriv->drm_version.minor; rmesa->r200Screen = screen; rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA + @@ -500,13 +501,10 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, fthrottle_mode, rmesa->r200Screen->irq); - rmesa->vblank_flags = (rmesa->r200Screen->irq != 0) - ? driGetDefaultVBlankFlags(&rmesa->optionCache) : VBLANK_FLAG_NO_IRQ; - rmesa->prefer_gart_client_texturing = (getenv("R200_GART_CLIENT_TEXTURES") != 0); - (*dri_interface->getUST)( & rmesa->swap_ust ); + (*sPriv->systemTime->getUST)( & rmesa->swap_ust ); #if DO_DEBUG @@ -667,15 +665,18 @@ r200MakeCurrent( __DRIcontextPrivate *driContextPriv, if (R200_DEBUG & DEBUG_DRI) fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)newCtx->glCtx); - if ( newCtx->dri.drawable != driDrawPriv ) { - driDrawableInitVBlank( driDrawPriv, newCtx->vblank_flags, - &newCtx->vbl_seq ); - } - newCtx->dri.readable = driReadPriv; if ( newCtx->dri.drawable != driDrawPriv || newCtx->lastStamp != driDrawPriv->lastStamp ) { + if (driDrawPriv->swap_interval == (unsigned)-1) { + driDrawPriv->vblFlags = (newCtx->r200Screen->irq != 0) + ? driGetDefaultVBlankFlags(&newCtx->optionCache) + : VBLANK_FLAG_NO_IRQ; + + driDrawableInitVBlank( driDrawPriv ); + } + newCtx->dri.drawable = driDrawPriv; r200SetCliprects(newCtx); diff --git a/r200/r200_context.h b/r200/r200_context.h index a06a2f5..ee478e7 100644 --- a/r200/r200_context.h +++ b/r200/r200_context.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_context.h,v 1.2 2002/12/16 16:18:54 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. @@ -894,11 +893,8 @@ struct r200_context { GLuint TexGenCompSel; GLmatrix tmpmat; - /* VBI / buffer swap + /* buffer swap */ - GLuint vbl_seq; - GLuint vblank_flags; - int64_t swap_ust; int64_t swap_missed_ust; @@ -931,7 +927,7 @@ struct r200_context { #define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx)) -static __inline GLuint r200PackColor( GLuint cpp, +static INLINE GLuint r200PackColor( GLuint cpp, GLubyte r, GLubyte g, GLubyte b, GLubyte a ) { diff --git a/r200/r200_fragshader.c b/r200/r200_fragshader.c index 5dd3ada..d514b28 100644 --- a/r200/r200_fragshader.c +++ b/r200/r200_fragshader.c @@ -24,13 +24,13 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ -#include "glheader.h" -#include "macros.h" -#include "enums.h" +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "tnl/t_context.h" -#include "atifragshader.h" -#include "program.h" +#include "shader/atifragshader.h" +#include "shader/program.h" #include "r200_context.h" #include "r200_ioctl.h" #include "r200_tex.h" diff --git a/r200/r200_ioctl.c b/r200/r200_ioctl.c index 463bd64..20482a4 100644 --- a/r200/r200_ioctl.c +++ b/r200/r200_ioctl.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_ioctl.c,v 1.4 2002/12/17 00:32:56 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. @@ -420,13 +419,14 @@ static void r200WaitForFrameCompletion( r200ContextPtr rmesa ) /* Copy the back color buffer to the front color buffer. */ -void r200CopyBuffer( const __DRIdrawablePrivate *dPriv, +void r200CopyBuffer( __DRIdrawablePrivate *dPriv, const drm_clip_rect_t *rect) { r200ContextPtr rmesa; GLint nbox, i, ret; GLboolean missed_target; int64_t ust; + __DRIscreenPrivate *psp = dPriv->driScreenPriv; assert(dPriv); assert(dPriv->driContextPriv); @@ -450,7 +450,7 @@ void r200CopyBuffer( const __DRIdrawablePrivate *dPriv, if (!rect) { UNLOCK_HARDWARE( rmesa ); - driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target ); + driWaitForVBlank( dPriv, & missed_target ); LOCK_HARDWARE( rmesa ); } @@ -477,16 +477,18 @@ void r200CopyBuffer( const __DRIdrawablePrivate *dPriv, if (rect->y2 < b->y2) b->y2 = rect->y2; - if (b->x1 < b->x2 && b->y1 < b->y2) - b++; + if (b->x1 >= b->x2 || b->y1 >= b->y2) + continue; } - else - b++; + b++; n++; } rmesa->sarea->nbox = n; + if (!n) + continue; + ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); if ( ret ) { @@ -502,7 +504,7 @@ void r200CopyBuffer( const __DRIdrawablePrivate *dPriv, rmesa->hw.all_dirty = GL_TRUE; rmesa->swap_count++; - (*dri_interface->getUST)( & ust ); + (*psp->systemTime->getUST)( & ust ); if ( missed_target ) { rmesa->swap_missed_count++; rmesa->swap_missed_ust = ust - rmesa->swap_ust; @@ -514,11 +516,12 @@ void r200CopyBuffer( const __DRIdrawablePrivate *dPriv, } } -void r200PageFlip( const __DRIdrawablePrivate *dPriv ) +void r200PageFlip( __DRIdrawablePrivate *dPriv ) { r200ContextPtr rmesa; GLint ret; GLboolean missed_target; + __DRIscreenPrivate *psp = dPriv->driScreenPriv; assert(dPriv); assert(dPriv->driContextPriv); @@ -554,10 +557,10 @@ void r200PageFlip( const __DRIdrawablePrivate *dPriv ) */ r200WaitForFrameCompletion( rmesa ); UNLOCK_HARDWARE( rmesa ); - driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target ); + driWaitForVBlank( dPriv, & missed_target ); if ( missed_target ) { rmesa->swap_missed_count++; - (void) (*dri_interface->getUST)( & rmesa->swap_missed_ust ); + (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust ); } LOCK_HARDWARE( rmesa ); @@ -571,7 +574,7 @@ void r200PageFlip( const __DRIdrawablePrivate *dPriv ) } rmesa->swap_count++; - (void) (*dri_interface->getUST)( & rmesa->swap_ust ); + (void) (*psp->systemTime->getUST)( & rmesa->swap_ust ); #if 000 if ( rmesa->sarea->pfCurrentPage == 1 ) { @@ -858,7 +861,7 @@ void r200Finish( GLcontext *ctx ) * the kernel data structures, and the current context to get the * device fd. */ -void *r200AllocateMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLsizei size, +void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size, GLfloat readfreq, GLfloat writefreq, GLfloat priority) { @@ -900,7 +903,7 @@ void *r200AllocateMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLsizei size, /* Called via glXFreeMemoryMESA() */ -void r200FreeMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLvoid *pointer) +void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer) { GET_CURRENT_CONTEXT(ctx); r200ContextPtr rmesa; @@ -937,7 +940,7 @@ void r200FreeMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLvoid *pointer) } /* Called via glXGetMemoryOffsetMESA() */ -GLuint r200GetMemoryOffsetMESA(__DRInativeDisplay *dpy, int scrn, const GLvoid *pointer) +GLuint r200GetMemoryOffsetMESA(__DRIscreen *screen, const GLvoid *pointer) { GET_CURRENT_CONTEXT(ctx); r200ContextPtr rmesa; diff --git a/r200/r200_ioctl.h b/r200/r200_ioctl.h index f537527..31431b7 100644 --- a/r200/r200_ioctl.h +++ b/r200/r200_ioctl.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_ioctl.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. @@ -90,19 +89,19 @@ extern void r200ReleaseDmaRegion( r200ContextPtr rmesa, struct r200_dma_region *region, const char *caller ); -extern void r200CopyBuffer( const __DRIdrawablePrivate *drawable, +extern void r200CopyBuffer( __DRIdrawablePrivate *drawable, const drm_clip_rect_t *rect); -extern void r200PageFlip( const __DRIdrawablePrivate *drawable ); +extern void r200PageFlip( __DRIdrawablePrivate *drawable ); extern void r200Flush( GLcontext *ctx ); extern void r200Finish( GLcontext *ctx ); extern void r200WaitForIdleLocked( r200ContextPtr rmesa ); extern void r200WaitForVBlank( r200ContextPtr rmesa ); extern void r200InitIoctlFuncs( struct dd_function_table *functions ); -extern void *r200AllocateMemoryMESA( __DRInativeDisplay *dpy, int scrn, GLsizei size, GLfloat readfreq, +extern void *r200AllocateMemoryMESA( __DRIscreen *screen, GLsizei size, GLfloat readfreq, GLfloat writefreq, GLfloat priority ); -extern void r200FreeMemoryMESA( __DRInativeDisplay *dpy, int scrn, GLvoid *pointer ); -extern GLuint r200GetMemoryOffsetMESA( __DRInativeDisplay *dpy, int scrn, const GLvoid *pointer ); +extern void r200FreeMemoryMESA( __DRIscreen *screen, GLvoid *pointer ); +extern GLuint r200GetMemoryOffsetMESA( __DRIscreen *screen, const GLvoid *pointer ); extern GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer, GLint size ); @@ -138,7 +137,7 @@ do { \ memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \ rmesa->hw.ATOM.cmd_size * 4) -static __inline int R200_DB_STATECHANGE( +static INLINE int R200_DB_STATECHANGE( r200ContextPtr rmesa, struct r200_state_atom *atom ) { @@ -184,7 +183,7 @@ do { \ * and hang on to the lock until the critical section is finished and we flush * the buffer again and unlock. */ -static __inline void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes ) +static INLINE void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes ) { if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ) r200FlushCmdBuf( rmesa, __FUNCTION__ ); @@ -193,7 +192,7 @@ static __inline void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes ) /* Alloc space in the command buffer */ -static __inline char *r200AllocCmdBuf( r200ContextPtr rmesa, +static INLINE char *r200AllocCmdBuf( r200ContextPtr rmesa, int bytes, const char *where ) { char * head; diff --git a/r200/r200_lock.c b/r200/r200_lock.c index b050dd7..f89b526 100644 --- a/r200/r200_lock.c +++ b/r200/r200_lock.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_lock.c,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_lock.h b/r200/r200_lock.h index e4c3a7e..4ff9890 100644 --- a/r200/r200_lock.h +++ b/r200/r200_lock.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_lock.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_maos.h b/r200/r200_maos.h index 4998f67..d3ed06d 100644 --- a/r200/r200_maos.h +++ b/r200/r200_maos.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_maos.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_maos_arrays.c b/r200/r200_maos_arrays.c index 3162b50..7bc05e2 100644 --- a/r200/r200_maos_arrays.c +++ b/r200/r200_maos_arrays.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_maos_arrays.c,v 1.3 2003/02/23 23:59:01 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_pixel.c b/r200/r200_pixel.c index 7b060f9..46d2307 100644 --- a/r200/r200_pixel.c +++ b/r200/r200_pixel.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_pixel.c,v 1.2 2002/12/16 16:18:54 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. @@ -295,7 +294,7 @@ static void do_draw_pix( GLcontext *ctx, r200ContextPtr rmesa = R200_CONTEXT(ctx); __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; drm_clip_rect_t *box = dPriv->pClipRects; - struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0][0]; + struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0]; driRenderbuffer *drb = (driRenderbuffer *) rb; int nbox = dPriv->numClipRects; int i; @@ -383,13 +382,13 @@ r200TryDrawPixels( GLcontext *ctx, GLint pitch = unpack->RowLength ? unpack->RowLength : width; GLuint planemask; GLuint cpp = rmesa->r200Screen->cpp; - GLint size = width * pitch * cpp; + GLint size = height * pitch * cpp; if (R200_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); /* check that we're drawing to exactly one color buffer */ - if (ctx->DrawBuffer->_NumColorDrawBuffers[0] != 1) + if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) return GL_FALSE; switch (format) { diff --git a/r200/r200_pixel.h b/r200/r200_pixel.h index 8f3923b..e62aa05 100644 --- a/r200/r200_pixel.h +++ b/r200/r200_pixel.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_pixel.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_reg.h b/r200/r200_reg.h index a88ea4c..5ce287f 100644 --- a/r200/r200_reg.h +++ b/r200/r200_reg.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_reg.h,v 1.2 2002/12/16 16:18:54 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_sanity.c b/r200/r200_sanity.c index 3f2a866..00d2f65 100644 --- a/r200/r200_sanity.c +++ b/r200/r200_sanity.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_sanity.c,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ /************************************************************************** Copyright 2002 ATI Technologies Inc., Ontario, Canada, and diff --git a/r200/r200_span.c b/r200/r200_span.c index 6e99dfe..3d7a000 100644 --- a/r200/r200_span.c +++ b/r200/r200_span.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_span.c,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. @@ -173,6 +172,7 @@ r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y ) /* 16-bit depth buffer functions */ +#define VALUE_TYPE GLushort #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d; @@ -186,6 +186,7 @@ r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y ) /* 24 bit depth, 8 bit stencil depthbuffer functions */ +#define VALUE_TYPE GLuint #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -256,7 +257,7 @@ static void r200SpanRenderStart( GLcontext *ctx ) { int p; driRenderbuffer *drb = - (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0][0]; + (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; volatile int *buf = (volatile int *)(rmesa->dri.screen->pFB + drb->offset); p = *buf; diff --git a/r200/r200_span.h b/r200/r200_span.h index 5e7d3e4..bae5644 100644 --- a/r200/r200_span.h +++ b/r200/r200_span.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_span.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_state.c b/r200/r200_state.c index 16726d7..cf37517 100644 --- a/r200/r200_state.c +++ b/r200/r200_state.c @@ -1,4 +1,3 @@ -/* $XFree86$ */ /************************************************************************** Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. @@ -773,9 +772,11 @@ static void r200LineWidth( GLcontext *ctx, GLfloat widthf ) R200_STATECHANGE( rmesa, set ); /* Line width is stored in U6.4 format. + * Same min/max limits for AA, non-AA lines. */ rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= ~0xffff; - rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)(ctx->Line._Width * 16.0); + rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint) + (CLAMP(widthf, ctx->Const.MinLineWidth, ctx->Const.MaxLineWidth) * 16.0); if ( widthf > 1.0 ) { rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_WIDELINE_ENABLE; @@ -1858,8 +1859,7 @@ void r200SetCliprects( r200ContextPtr rmesa ) GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate; GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate; - if (draw_fb->_ColorDrawBufferMask[0] - == BUFFER_BIT_BACK_LEFT) { + if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BIT_BACK_LEFT) { /* Can't ignore 2d windows if we are page flipping. */ if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) { @@ -1909,17 +1909,18 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode ) R200_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */ - /* - * _ColorDrawBufferMask is easier to cope with than <mode>. - * Check for software fallback, update cliprects. - */ - switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) { - case BUFFER_BIT_FRONT_LEFT: - case BUFFER_BIT_BACK_LEFT: + if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) { + /* 0 (GL_NONE) buffers or multiple color drawing buffers */ + FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE ); + return; + } + + switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) { + case BUFFER_FRONT_LEFT: + case BUFFER_BACK_LEFT: FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE ); break; default: - /* 0 (GL_NONE) buffers or multiple color drawing buffers */ FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE ); return; } @@ -2444,11 +2445,11 @@ r200UpdateDrawBuffer(GLcontext *ctx) struct gl_framebuffer *fb = ctx->DrawBuffer; driRenderbuffer *drb; - if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) { + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { /* draw to front */ drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; } - else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) { + else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { /* draw to back */ drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; } diff --git a/r200/r200_state.h b/r200/r200_state.h index f34090b..a917163 100644 --- a/r200/r200_state.h +++ b/r200/r200_state.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_state.h,v 1.2 2002/11/05 17:46:08 tsi Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_state_init.c b/r200/r200_state_init.c index b40d0bd..0c36cef 100644 --- a/r200/r200_state_init.c +++ b/r200/r200_state_init.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_state_init.c,v 1.4 2003/02/22 06:21:11 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_swtcl.c b/r200/r200_swtcl.c index 25d229d..a1ea019 100644 --- a/r200/r200_swtcl.c +++ b/r200/r200_swtcl.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_swtcl.c,v 1.5 2003/05/06 23:52:08 daenzer Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_swtcl.h b/r200/r200_swtcl.h index ccf8179..7458c54 100644 --- a/r200/r200_swtcl.h +++ b/r200/r200_swtcl.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_swtcl.h,v 1.3 2003/05/06 23:52:08 daenzer Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_tcl.c b/r200/r200_tcl.c index 1ff0cf9..78347d3 100644 --- a/r200/r200_tcl.c +++ b/r200/r200_tcl.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tcl.c,v 1.2 2002/12/16 16:18:55 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_tcl.h b/r200/r200_tcl.h index ac5bc11..f191ddc 100644 --- a/r200/r200_tcl.h +++ b/r200/r200_tcl.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tcl.h,v 1.2 2002/12/16 16:18:55 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_tex.c b/r200/r200_tex.c index 6c6450c..24b9b3b 100644 --- a/r200/r200_tex.c +++ b/r200/r200_tex.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tex.c,v 1.2 2002/11/05 17:46:08 tsi Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. @@ -103,37 +102,39 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__); } - switch ( twrap ) { - case GL_REPEAT: - t->pp_txfilter |= R200_CLAMP_T_WRAP; - break; - case GL_CLAMP: - t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL; - is_clamp = GL_TRUE; - break; - case GL_CLAMP_TO_EDGE: - t->pp_txfilter |= R200_CLAMP_T_CLAMP_LAST; - break; - case GL_CLAMP_TO_BORDER: - t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL; - is_clamp_to_border = GL_TRUE; - break; - case GL_MIRRORED_REPEAT: - t->pp_txfilter |= R200_CLAMP_T_MIRROR; - break; - case GL_MIRROR_CLAMP_EXT: - t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL; - is_clamp = GL_TRUE; - break; - case GL_MIRROR_CLAMP_TO_EDGE_EXT: - t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_LAST; - break; - case GL_MIRROR_CLAMP_TO_BORDER_EXT: - t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL; - is_clamp_to_border = GL_TRUE; - break; - default: - _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__); + if (t->base.tObj->Target != GL_TEXTURE_1D) { + switch ( twrap ) { + case GL_REPEAT: + t->pp_txfilter |= R200_CLAMP_T_WRAP; + break; + case GL_CLAMP: + t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_CLAMP_TO_EDGE: + t->pp_txfilter |= R200_CLAMP_T_CLAMP_LAST; + break; + case GL_CLAMP_TO_BORDER: + t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + case GL_MIRRORED_REPEAT: + t->pp_txfilter |= R200_CLAMP_T_MIRROR; + break; + case GL_MIRROR_CLAMP_EXT: + t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: + t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_LAST; + break; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: + t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + default: + _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__); + } } t->pp_txformat_x &= ~R200_CLAMP_Q_MASK; @@ -182,7 +183,7 @@ static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max ) { t->pp_txfilter &= ~R200_MAX_ANISO_MASK; - if ( max == 1.0 ) { + if ( max <= 1.0 ) { t->pp_txfilter |= R200_MAX_ANISO_1_TO_1; } else if ( max <= 2.0 ) { t->pp_txfilter |= R200_MAX_ANISO_2_TO_1; @@ -483,7 +484,7 @@ r200ValidateClientStorage( GLcontext *ctx, GLenum target, { r200ContextPtr rmesa = R200_CONTEXT(ctx); - if (0) + if ( R200_DEBUG & DEBUG_TEXTURE ) fprintf(stderr, "intformat %s format %s type %s\n", _mesa_lookup_enum_by_nr( internalFormat ), _mesa_lookup_enum_by_nr( format ), @@ -549,7 +550,7 @@ r200ValidateClientStorage( GLcontext *ctx, GLenum target, format, type); - if (0) + if ( R200_DEBUG & DEBUG_TEXTURE ) fprintf(stderr, "%s: srcRowStride %d/%x\n", __FUNCTION__, srcRowStride, srcRowStride); diff --git a/r200/r200_tex.h b/r200/r200_tex.h index 68e9a0e..10ff8e8 100644 --- a/r200/r200_tex.h +++ b/r200/r200_tex.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tex.h,v 1.1 2002/10/30 12:51:53 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/r200/r200_texmem.c b/r200/r200_texmem.c index 9daafcf..183c4ca 100644 --- a/r200/r200_texmem.c +++ b/r200/r200_texmem.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_texmem.c,v 1.5 2002/12/17 00:32:56 dawes Exp $ */ /************************************************************************** Copyright (C) Tungsten Graphics 2002. All Rights Reserved. diff --git a/r200/r200_texstate.c b/r200/r200_texstate.c index d12c3bc..4edf304 100644 --- a/r200/r200_texstate.c +++ b/r200/r200_texstate.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_texstate.c,v 1.3 2003/02/15 22:18:47 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. @@ -980,9 +979,7 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch) { - r200ContextPtr rmesa = - (r200ContextPtr) ((__DRIcontextPrivate *) pDRICtx->private)-> - driverPrivate; + r200ContextPtr rmesa = pDRICtx->driverPrivate; struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->glCtx, texname); r200TexObjPtr t; @@ -1818,6 +1815,12 @@ void r200UpdateTextureState( GLcontext *ctx ) GLboolean ok; GLuint dbg; + /* NOTE: must not manipulate rmesa->state.texture.unit[].unitneeded or + rmesa->state.envneeded before a R200_STATECHANGE (or R200_NEWPRIM) since + we use these to determine if we want to emit the corresponding state + atoms. */ + R200_NEWPRIM( rmesa ); + if (ctx->ATIFragmentShader._Enabled) { GLuint i; for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) { diff --git a/r200/r200_vertprog.c b/r200/r200_vertprog.c index 6089d61..562992f 100644 --- a/r200/r200_vertprog.c +++ b/r200/r200_vertprog.c @@ -30,10 +30,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Aapo Tahkola <aet@rasterburn.org> * Roland Scheidegger <rscheidegger_lists@hispeed.ch> */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" -#include "program.h" +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" @@ -154,7 +154,7 @@ static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_p return GL_TRUE; } -static __inline unsigned long t_dst_mask(GLuint mask) +static INLINE unsigned long t_dst_mask(GLuint mask) { /* WRITEMASK_* is equivalent to VSF_FLAG_* */ return mask & VSF_FLAG_ALL; @@ -215,6 +215,7 @@ static unsigned long t_src_class(enum register_file file) case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: case PROGRAM_STATE_VAR: return VSF_IN_CLASS_PARAM; /* @@ -228,7 +229,7 @@ static unsigned long t_src_class(enum register_file file) } } -static __inline unsigned long t_swizzle(GLubyte swizzle) +static INLINE unsigned long t_swizzle(GLubyte swizzle) { /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ return swizzle; @@ -408,6 +409,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte int fog_temp_i = 0; int free_inputs; int array_count = 0; + int u_temp_used; vp->native = GL_FALSE; vp->translated = GL_TRUE; @@ -744,9 +746,16 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte goto next; case OPCODE_MAD: + /* only 2 read ports into temp memory thus may need the macro op MAD_2 + instead (requiring 2 clocks) if all inputs are in temp memory + (and, only if they actually reference 3 distinct temps) */ hw_op=(src[0].File == PROGRAM_TEMPORARY && src[1].File == PROGRAM_TEMPORARY && - src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; + src[2].File == PROGRAM_TEMPORARY && + (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) && + (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) && + (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ? + R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst), t_dst_mask(dst.WriteMask)); @@ -874,8 +883,11 @@ else { case OPCODE_XPD: /* mul r0, r1.yzxw, r2.zxyw mad r0, -r2.yzxw, r1.zxyw, r0 - NOTE: might need MAD_2 */ + hw_op=(src[0].File == PROGRAM_TEMPORARY && + src[1].File == PROGRAM_TEMPORARY && + (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ? + R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, @@ -901,7 +913,7 @@ else { o_inst++; u_temp_i--; - o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst), + o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), @@ -1051,14 +1063,15 @@ else { dofogfix = 0; } + u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i; if (mesa_vp->Base.NumNativeTemporaries < - (mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) { + (mesa_vp->Base.NumTemporaries + u_temp_used)) { mesa_vp->Base.NumNativeTemporaries = - mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i); + mesa_vp->Base.NumTemporaries + u_temp_used; } - if (u_temp_i < mesa_vp->Base.NumTemporaries) { + if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) { if (R200_DEBUG & DEBUG_FALLBACKS) { - fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i); + fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used); } return GL_FALSE; } diff --git a/r300/Makefile.am b/r300/Makefile.am index 0992115..a1fa2ef 100644 --- a/r300/Makefile.am +++ b/r300/Makefile.am @@ -24,7 +24,16 @@ r300_dri_la_SOURCES = \ r300_texmem.c \ r300_tex.c \ r300_texstate.c \ + radeon_program.c \ + radeon_program_alu.c \ + radeon_program_pair.c \ + radeon_nqssadce.c \ r300_vertprog.c \ r300_fragprog.c \ + r300_fragprog_swizzle.c \ + r300_fragprog_emit.c \ + r500_fragprog.c \ + r500_fragprog_emit.c \ r300_shader.c \ - r300_emit.c + r300_emit.c \ + r300_swtcl.c diff --git a/r300/r300_cmdbuf.c b/r300/r300_cmdbuf.c index 3befa58..c069660 100644 --- a/r300/r300_cmdbuf.c +++ b/r300/r300_cmdbuf.c @@ -133,13 +133,15 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat int i; int dwords = (*state->check) (r300, state); - fprintf(stderr, " emit %s/%d/%d\n", state->name, dwords, + fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); - if (RADEON_DEBUG & DEBUG_VERBOSE) - for (i = 0; i < dwords; i++) - fprintf(stderr, " %s[%d]: %08X\n", + if (RADEON_DEBUG & DEBUG_VERBOSE) { + for (i = 0; i < dwords; i++) { + fprintf(stderr, " %s[%d]: %08x\n", state->name, i, state->cmd[i]); + } + } } /** @@ -148,35 +150,21 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat * The caller must have ensured that there is enough space in the command * buffer. */ -static __inline__ void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) +static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) { struct r300_state_atom *atom; uint32_t *dest; + int dwords; dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used; - if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { - foreach(atom, &r300->hw.atomlist) { - if ((atom->dirty || r300->hw.all_dirty) == dirty) { - int dwords = (*atom->check) (r300, atom); - - if (dwords) - r300PrintStateAtom(r300, atom); - else - fprintf(stderr, - " skip state %s\n", - atom->name); - } - } - } - /* Emit WAIT */ *dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN); dest++; r300->cmdbuf.count_used++; /* Emit cache flush */ - *dest = cmdpacket0(R300_TX_CNTL, 1); + *dest = cmdpacket0(R300_TX_INVALTAGS, 1); dest++; r300->cmdbuf.count_used++; @@ -193,13 +181,20 @@ static __inline__ void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) foreach(atom, &r300->hw.atomlist) { if ((atom->dirty || r300->hw.all_dirty) == dirty) { - int dwords = (*atom->check) (r300, atom); - + dwords = (*atom->check) (r300, atom); if (dwords) { + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { + r300PrintStateAtom(r300, atom); + } memcpy(dest, atom->cmd, dwords * 4); dest += dwords; r300->cmdbuf.count_used += dwords; atom->dirty = GL_FALSE; + } else { + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { + fprintf(stderr, " skip state %s\n", + atom->name); + } } } } @@ -245,22 +240,43 @@ void r300EmitState(r300ContextPtr r300) r300->hw.all_dirty = GL_FALSE; } -#define CHECK( NM, COUNT ) \ -static int check_##NM( r300ContextPtr r300, \ - struct r300_state_atom* atom ) \ -{ \ - (void) atom; (void) r300; \ - return (COUNT); \ -} - #define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) +#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) + +static int check_always(r300ContextPtr r300, struct r300_state_atom *atom) +{ + return atom->cmd_size; +} + +static int check_variable(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = packet0_count(atom->cmd); + return cnt ? cnt + 1 : 0; +} + +static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = vpu_count(atom->cmd); + return cnt ? (cnt * 4) + 1 : 0; +} + +static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 6) + 1 : 0; +} + +static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 4) + 1 : 0; +} -CHECK(always, atom->cmd_size) - CHECK(variable, packet0_count(atom->cmd) ? (1 + packet0_count(atom->cmd)) : 0) - CHECK(vpu, vpu_count(atom->cmd) ? (1 + vpu_count(atom->cmd) * 4) : 0) -#undef packet0_count -#undef vpu_count #define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ do { \ r300->hw.ATOM.cmd_size = (SZ); \ @@ -280,10 +296,15 @@ void r300InitCmdBuf(r300ContextPtr r300) { int size, mtu; int has_tcl = 1; + int is_r500 = 0; + int i; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ mtu = r300->radeon.glCtx->Const.MaxTextureUnits; @@ -298,30 +319,41 @@ void r300InitCmdBuf(r300ContextPtr r300) /* Initialize state atoms */ ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0); r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6); - ALLOC_STATE(vap_cntl, always, 2, 0); - r300->hw.vap_cntl.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1); + ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0; + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1); + if (is_r500) { + ALLOC_STATE(vap_index_offset, always, 2, 0); + r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1); + r300->hw.vap_index_offset.cmd[1] = 0; + } ALLOC_STATE(vte, always, 3, 0); r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2); - ALLOC_STATE(unk2134, always, 3, 0); - r300->hw.unk2134.cmd[0] = cmdpacket0(0x2134, 2); + ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0); + r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(R300_VAP_VF_MAX_VTX_INDX, 2); ALLOC_STATE(vap_cntl_status, always, 2, 0); r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1); ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0); r300->hw.vir[0].cmd[R300_VIR_CMD_0] = - cmdpacket0(R300_VAP_INPUT_ROUTE_0_0, 1); + cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1); ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1); r300->hw.vir[1].cmd[R300_VIR_CMD_0] = - cmdpacket0(R300_VAP_INPUT_ROUTE_1_0, 1); + cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0); - r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_INPUT_CNTL_0, 2); - ALLOC_STATE(unk21DC, always, 2, 0); - r300->hw.unk21DC.cmd[0] = cmdpacket0(0x21DC, 1); - ALLOC_STATE(vap_clip_cntl, always, 2, 0); - r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1); - ALLOC_STATE(unk2220, always, 5, 0); - r300->hw.unk2220.cmd[0] = cmdpacket0(0x2220, 4); - ALLOC_STATE(unk2288, always, 2, 0); - r300->hw.unk2288.cmd[0] = cmdpacket0(R300_VAP_UNKNOWN_2288, 1); + r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2); + ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0); + r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE); + + if (has_tcl) { + ALLOC_STATE(vap_clip_cntl, always, 2, 0); + r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1); + ALLOC_STATE(vap_clip, always, 5, 0); + r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4); + ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0); + r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1); + } + ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0); r300->hw.vof.cmd[R300_VOF_CMD_0] = cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2); @@ -329,7 +361,7 @@ void r300InitCmdBuf(r300ContextPtr r300) if (has_tcl) { ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0); r300->hw.pvs.cmd[R300_PVS_CMD_0] = - cmdpacket0(R300_VAP_PVS_CNTL_1, 3); + cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3); } ALLOC_STATE(gb_enable, always, 2, 0); @@ -338,130 +370,183 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5); ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1); - ALLOC_STATE(unk4200, always, 5, 0); - r300->hw.unk4200.cmd[0] = cmdpacket0(0x4200, 4); - ALLOC_STATE(unk4214, always, 2, 0); - r300->hw.unk4214.cmd[0] = cmdpacket0(0x4214, 1); + ALLOC_STATE(ga_point_s0, always, 5, 0); + r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4); + ALLOC_STATE(ga_triangle_stipple, always, 2, 0); + r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1); ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0); - r300->hw.ps.cmd[0] = cmdpacket0(R300_RE_POINTSIZE, 1); - ALLOC_STATE(unk4230, always, 4, 0); - r300->hw.unk4230.cmd[0] = cmdpacket0(0x4230, 3); + r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1); + ALLOC_STATE(ga_point_minmax, always, 4, 0); + r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3); ALLOC_STATE(lcntl, always, 2, 0); - r300->hw.lcntl.cmd[0] = cmdpacket0(R300_RE_LINE_CNT, 1); - ALLOC_STATE(unk4260, always, 4, 0); - r300->hw.unk4260.cmd[0] = cmdpacket0(0x4260, 3); + r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1); + ALLOC_STATE(ga_line_stipple, always, 4, 0); + r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3); ALLOC_STATE(shade, always, 5, 0); - r300->hw.shade.cmd[0] = cmdpacket0(R300_RE_SHADE, 4); + r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4); ALLOC_STATE(polygon_mode, always, 4, 0); - r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3); + r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3); ALLOC_STATE(fogp, always, 3, 0); - r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2); + r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2); ALLOC_STATE(zbias_cntl, always, 2, 0); - r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_RE_ZBIAS_CNTL, 1); + r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1); ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0); r300->hw.zbs.cmd[R300_ZBS_CMD_0] = - cmdpacket0(R300_RE_ZBIAS_T_FACTOR, 4); + cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); ALLOC_STATE(occlusion_cntl, always, 2, 0); - r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1); + r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1); ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0); - r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_RE_CULL_CNTL, 1); - ALLOC_STATE(unk42C0, always, 3, 0); - r300->hw.unk42C0.cmd[0] = cmdpacket0(0x42C0, 2); + r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1); + ALLOC_STATE(su_depth_scale, always, 3, 0); + r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2); ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); - r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_CNTL_0, 2); - ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); - r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_INTERP_0, 8); - ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, 1); - ALLOC_STATE(unk43A4, always, 3, 0); - r300->hw.unk43A4.cmd[0] = cmdpacket0(0x43A4, 2); - ALLOC_STATE(unk43E8, always, 2, 0); - r300->hw.unk43E8.cmd[0] = cmdpacket0(0x43E8, 1); - ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); - r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); - r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4); - ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); - r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); - ALLOC_STATE(unk46A4, always, 6, 0); - r300->hw.unk46A4.cmd[0] = cmdpacket0(0x46A4, 5); - ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); - r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); - ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); - r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1); - ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); - r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); - ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); - r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2); + if (is_r500) { + ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16); + for (i = 0; i < 8; i++) { + r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] = + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); + } + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1); + } else { + ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1); + } + ALLOC_STATE(sc_hyperz, always, 3, 0); + r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2); + ALLOC_STATE(sc_screendoor, always, 2, 0); + r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1); + ALLOC_STATE(us_out_fmt, always, 6, 0); + r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5); + + if (is_r500) { + ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2); + r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO; + r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3); + r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1); + r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */ + + ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); + ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); + r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); + } else { + ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3); + r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4); + ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); + r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0); + + ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); + r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1); + ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); + r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1); + ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); + r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1); + ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); + r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1); + ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); + r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); + } ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); - r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_RE_FOG_STATE, 1); + r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1); ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0); - r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FOG_COLOR_R, 3); + r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3); ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0); - r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_PP_ALPHA_TEST, 2); - ALLOC_STATE(unk4BD8, always, 2, 0); - r300->hw.unk4BD8.cmd[0] = cmdpacket0(0x4BD8, 1); - ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); - r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); - ALLOC_STATE(unk4E00, always, 2, 0); - r300->hw.unk4E00.cmd[0] = cmdpacket0(0x4E00, 1); + r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2); + ALLOC_STATE(fg_depth_src, always, 2, 0); + r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1); + ALLOC_STATE(rb3d_cctl, always, 2, 0); + r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1); ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0); r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2); ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0); - r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(R300_RB3D_COLORMASK, 1); - ALLOC_STATE(blend_color, always, 4, 0); - r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 3); + r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(RB3D_COLOR_CHANNEL_MASK, 1); + if (is_r500) { + ALLOC_STATE(blend_color, always, 3, 0); + r300->hw.blend_color.cmd[0] = cmdpacket0(R500_RB3D_CONSTANT_COLOR_AR, 2); + } else { + ALLOC_STATE(blend_color, always, 2, 0); + r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 1); + } + ALLOC_STATE(rop, always, 2, 0); + r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1); ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0); r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1); r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1); - ALLOC_STATE(unk4E50, always, 10, 0); - r300->hw.unk4E50.cmd[0] = cmdpacket0(0x4E50, 9); - ALLOC_STATE(unk4E88, always, 2, 0); - r300->hw.unk4E88.cmd[0] = cmdpacket0(0x4E88, 1); - ALLOC_STATE(unk4EA0, always, 3, 0); - r300->hw.unk4EA0.cmd[0] = cmdpacket0(0x4EA0, 2); + ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); + r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9); + ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); + r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1); + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); r300->hw.zs.cmd[R300_ZS_CMD_0] = - cmdpacket0(R300_RB3D_ZSTENCIL_CNTL_0, 3); + cmdpacket0(R300_ZB_CNTL, 3); ALLOC_STATE(zstencil_format, always, 5, 0); r300->hw.zstencil_format.cmd[0] = - cmdpacket0(R300_RB3D_ZSTENCIL_FORMAT, 4); + cmdpacket0(R300_ZB_FORMAT, 4); ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); - r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_RB3D_DEPTHOFFSET, 2); - ALLOC_STATE(unk4F28, always, 2, 0); - r300->hw.unk4F28.cmd[0] = cmdpacket0(0x4F28, 1); + r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2); + ALLOC_STATE(zb_depthclearvalue, always, 2, 0); + r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1); ALLOC_STATE(unk4F30, always, 3, 0); r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2); - ALLOC_STATE(unk4F44, always, 2, 0); - r300->hw.unk4F44.cmd[0] = cmdpacket0(0x4F44, 1); - ALLOC_STATE(unk4F54, always, 2, 0); - r300->hw.unk4F54.cmd[0] = cmdpacket0(0x4F54, 1); + ALLOC_STATE(zb_hiz_offset, always, 2, 0); + r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1); + ALLOC_STATE(zb_hiz_pitch, always, 2, 0); + r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1); /* VPU only on TCL */ if (has_tcl) { int i; ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); r300->hw.vpi.cmd[R300_VPI_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_PROGRAM, 0); + cmdvpu(R300_PVS_CODE_START, 0); - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[R300_VPP_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_PARAMETERS, 0); + if (is_r500) { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[R300_VPP_CMD_0] = + cmdvpu(R500_PVS_CONST_START, 0); - ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[R300_VPS_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1); + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[R300_VPS_CMD_0] = + cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1); - for (i = 0; i < 6; i++) { - ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); - r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1); + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = + cmdvpu(R500_PVS_UCP_START + i, 1); + } + } else { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[R300_VPP_CMD_0] = + cmdvpu(R300_PVS_CONST_START, 0); + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[R300_VPS_CMD_0] = + cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1); + + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = + cmdvpu(R300_PVS_UCP_START + i, 1); + } } } /* Textures */ ALLOC_STATE(tex.filter, variable, mtu + 1, 0); r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_FILTER_0, 0); + cmdpacket0(R300_TX_FILTER0_0, 0); ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0); r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = @@ -475,7 +560,7 @@ void r300InitCmdBuf(r300ContextPtr r300) cmdpacket0(R300_TX_FORMAT_0, 0); ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); - r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_PITCH_0, 0); + r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0); ALLOC_STATE(tex.offset, variable, mtu + 1, 0); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = diff --git a/r300/r300_cmdbuf.h b/r300/r300_cmdbuf.h index bfb2eda..a8eaa58 100644 --- a/r300/r300_cmdbuf.h +++ b/r300/r300_cmdbuf.h @@ -52,7 +52,7 @@ extern void r300DestroyCmdBuf(r300ContextPtr r300); * * \param dwords The number of dwords we need to be free on the command buffer */ -static __inline__ void r300EnsureCmdBufSpace(r300ContextPtr r300, +static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller) { assert(dwords < r300->cmdbuf.size); @@ -68,7 +68,7 @@ static __inline__ void r300EnsureCmdBufSpace(r300ContextPtr r300, * causes state reemission after a flush. This is necessary to ensure * correct hardware state after an unlock. */ -static __inline__ uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, +static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, int dwords, const char *caller) { uint32_t *ptr; @@ -80,7 +80,7 @@ static __inline__ uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, return ptr; } -static __inline__ uint32_t *r300AllocCmdBuf(r300ContextPtr r300, +static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300, int dwords, const char *caller) { uint32_t *ptr; diff --git a/r300/r300_context.c b/r300/r300_context.c index 9ea14ab..fcf571d 100644 --- a/r300/r300_context.c +++ b/r300/r300_context.c @@ -63,6 +63,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" +#include "r300_swtcl.h" #ifdef USER_BUFFERS #include "r300_mem.h" @@ -78,11 +79,13 @@ int hw_tcl_on = 1; #define need_GL_EXT_stencil_two_side #define need_GL_ARB_multisample +#define need_GL_ARB_point_parameters #define need_GL_ARB_texture_compression #define need_GL_ARB_vertex_buffer_object #define need_GL_ARB_vertex_program #define need_GL_EXT_blend_minmax //#define need_GL_EXT_fog_coord +#define need_GL_EXT_multi_draw_arrays #define need_GL_EXT_secondary_color #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate @@ -92,8 +95,13 @@ int hw_tcl_on = 1; const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ + {"GL_ARB_depth_texture", NULL}, + {"GL_ARB_fragment_program", NULL}, {"GL_ARB_multisample", GL_ARB_multisample_functions}, {"GL_ARB_multitexture", NULL}, + {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, + {"GL_ARB_shadow", NULL}, + {"GL_ARB_shadow_ambient", NULL}, {"GL_ARB_texture_border_clamp", NULL}, {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, {"GL_ARB_texture_cube_map", NULL}, @@ -104,14 +112,15 @@ const struct dri_extension card_extensions[] = { {"GL_ARB_texture_mirrored_repeat", NULL}, {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, - {"GL_ARB_fragment_program", NULL}, {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, {"GL_EXT_blend_subtract", NULL}, // {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_shadow_funcs", NULL}, {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, {"GL_EXT_stencil_wrap", NULL}, {"GL_EXT_texture_edge_clamp", NULL}, @@ -129,6 +138,7 @@ const struct dri_extension card_extensions[] = { {"GL_NV_blend_square", NULL}, {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, {"GL_SGIS_generate_mipmap", NULL}, + {"GL_SGIX_depth_texture", NULL}, {NULL, NULL} /* *INDENT-ON* */ }; @@ -272,6 +282,12 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, MIN2(ctx->Const.MaxTextureImageUnits, ctx->Const.MaxTextureCoordUnits); ctx->Const.MaxTextureMaxAnisotropy = 16.0; + ctx->Const.MaxTextureLodBias = 16.0; + + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ctx->Const.MaxTextureLevels = 13; + ctx->Const.MaxTextureRectSize = 4096; + } ctx->Const.MinPointSize = 1.0; ctx->Const.MinPointSizeAA = 1.0; @@ -317,15 +333,17 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _tnl_allow_vertex_fog(ctx, GL_TRUE); /* currently bogus data */ - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + if (screen->chip_flags & RADEON_CHIPSET_TCL) { + ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeInstructions = + VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ + ctx->Const.VertexProgram.MaxTemps = 32; + ctx->Const.VertexProgram.MaxNativeTemps = + /*VSF_MAX_FRAGMENT_TEMPS */ 32; + ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + } ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ @@ -363,6 +381,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, radeonInitSpanFuncs(ctx); r300InitCmdBuf(r300); r300InitState(r300); + if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) + r300InitSwtcl(ctx); TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; diff --git a/r300/r300_context.h b/r300/r300_context.h index 6b0a588..d2017f8 100644 --- a/r300/r300_context.h +++ b/r300/r300_context.h @@ -49,8 +49,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define USER_BUFFERS -//#define OPTIMIZE_ELTS - struct r300_context; typedef struct r300_context r300ContextRec; typedef struct r300_context *r300ContextPtr; @@ -75,12 +73,12 @@ typedef struct r300_context *r300ContextPtr; } #include "r300_vertprog.h" -#include "r300_fragprog.h" +#include "r500_fragprog.h" /** * This function takes a float and packs it into a uint32_t */ -static __inline__ uint32_t r300PackFloat32(float fl) +static INLINE uint32_t r300PackFloat32(float fl) { union { float fl; @@ -97,7 +95,7 @@ static __inline__ uint32_t r300PackFloat32(float fl) * But it works for most things. I'll fix it later if someone * else with a better clue doesn't */ -static __inline__ uint32_t r300PackFloat24(float f) +static INLINE uint32_t r300PackFloat24(float f) { float mantissa; int exponent; @@ -149,7 +147,6 @@ struct r300_dma_region { int aos_offset; /* address in GART memory */ int aos_stride; /* distance between elements, in dwords */ int aos_size; /* number of components (1-4) */ - int aos_reg; /* VAP register assignment */ }; struct r300_dma { @@ -181,13 +178,6 @@ struct r300_tex_obj { GLuint bufAddr; /* Offset to start of locally shared texture block */ - GLuint dirty_state; /* Flags (1 per texunit) for - whether or not this texobj - has dirty hardware state - (pp_*) that needs to be - brought into the - texunit. */ - drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; /* Six, for the cube faces */ @@ -333,15 +323,17 @@ struct r300_state_atom { #define R300_RI_INTERP_7 8 #define R300_RI_CMDSIZE 9 +#define R500_RI_CMDSIZE 17 + #define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */ -#define R300_RR_ROUTE_0 1 -#define R300_RR_ROUTE_1 2 -#define R300_RR_ROUTE_2 3 -#define R300_RR_ROUTE_3 4 -#define R300_RR_ROUTE_4 5 -#define R300_RR_ROUTE_5 6 -#define R300_RR_ROUTE_6 7 -#define R300_RR_ROUTE_7 8 +#define R300_RR_INST_0 1 +#define R300_RR_INST_1 2 +#define R300_RR_INST_2 3 +#define R300_RR_INST_3 4 +#define R300_RR_INST_4 5 +#define R300_RR_INST_5 6 +#define R300_RR_INST_6 7 +#define R300_RR_INST_7 8 #define R300_RR_CMDSIZE 9 #define R300_FP_CMD_0 0 @@ -355,6 +347,17 @@ struct r300_state_atom { #define R300_FP_NODE3 8 #define R300_FP_CMDSIZE 9 +#define R500_FP_CMD_0 0 +#define R500_FP_CNTL 1 +#define R500_FP_PIXSIZE 2 +#define R500_FP_CMD_1 3 +#define R500_FP_CODE_ADDR 4 +#define R500_FP_CODE_RANGE 5 +#define R500_FP_CODE_OFFSET 6 +#define R500_FP_CMD_2 7 +#define R500_FP_FC_CNTL 8 +#define R500_FP_CMDSIZE 9 + #define R300_FPT_CMD_0 0 #define R300_FPT_INSTR_0 1 #define R300_FPT_CMDSIZE 65 @@ -362,10 +365,14 @@ struct r300_state_atom { #define R300_FPI_CMD_0 0 #define R300_FPI_INSTR_0 1 #define R300_FPI_CMDSIZE 65 +/* R500 has space for 512 instructions - 6 dwords per instruction */ +#define R500_FPI_CMDSIZE (512*6+1) #define R300_FPP_CMD_0 0 #define R300_FPP_PARAM_0 1 #define R300_FPP_CMDSIZE (32*4+1) +/* R500 has spcae for 256 constants - 4 dwords per constant */ +#define R500_FPP_CMDSIZE (256*4+1) #define R300_FOGS_CMD_0 0 #define R300_FOGS_STATE 1 @@ -413,6 +420,12 @@ struct r300_state_atom { #define R300_ZB_PITCH 2 #define R300_ZB_CMDSIZE 3 +#define R300_VAP_CNTL_FLUSH 0 +#define R300_VAP_CNTL_FLUSH_1 1 +#define R300_VAP_CNTL_CMD 2 +#define R300_VAP_CNTL_INSTR 3 +#define R300_VAP_CNTL_SIZE 4 + #define R300_VPI_CMD_0 0 #define R300_VPI_INSTR_0 1 #define R300_VPI_CMDSIZE 1025 /* 256 16 byte instructions */ @@ -454,63 +467,67 @@ struct r300_hw_state { struct r300_state_atom vpt; /* viewport (1D98) */ struct r300_state_atom vap_cntl; + struct r300_state_atom vap_index_offset; /* 0x208c r5xx only */ struct r300_state_atom vof; /* VAP output format register 0x2090 */ struct r300_state_atom vte; /* (20B0) */ - struct r300_state_atom unk2134; /* (2134) */ + struct r300_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */ struct r300_state_atom vap_cntl_status; struct r300_state_atom vir[2]; /* vap input route (2150/21E0) */ struct r300_state_atom vic; /* vap input control (2180) */ - struct r300_state_atom unk21DC; /* (21DC) */ + struct r300_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */ struct r300_state_atom vap_clip_cntl; - struct r300_state_atom unk2220; /* (2220) */ - struct r300_state_atom unk2288; /* (2288) */ + struct r300_state_atom vap_clip; + struct r300_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */ struct r300_state_atom pvs; /* pvs_cntl (22D0) */ struct r300_state_atom gb_enable; /* (4008) */ struct r300_state_atom gb_misc; /* Multisampling position shifts ? (4010) */ - struct r300_state_atom unk4200; /* (4200) */ - struct r300_state_atom unk4214; /* (4214) */ + struct r300_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */ + struct r300_state_atom ga_triangle_stipple; /* (4214) */ struct r300_state_atom ps; /* pointsize (421C) */ - struct r300_state_atom unk4230; /* (4230) */ + struct r300_state_atom ga_point_minmax; /* (4230) */ struct r300_state_atom lcntl; /* line control */ - struct r300_state_atom unk4260; /* (4260) */ + struct r300_state_atom ga_line_stipple; /* (4260) */ struct r300_state_atom shade; struct r300_state_atom polygon_mode; struct r300_state_atom fogp; /* fog parameters (4294) */ - struct r300_state_atom unk429C; /* (429C) */ + struct r300_state_atom ga_soft_reset; /* (429C) */ struct r300_state_atom zbias_cntl; struct r300_state_atom zbs; /* zbias (42A4) */ struct r300_state_atom occlusion_cntl; struct r300_state_atom cul; /* cull cntl (42B8) */ - struct r300_state_atom unk42C0; /* (42C0) */ + struct r300_state_atom su_depth_scale; /* (42C0) */ struct r300_state_atom rc; /* rs control (4300) */ struct r300_state_atom ri; /* rs interpolators (4310) */ struct r300_state_atom rr; /* rs route (4330) */ - struct r300_state_atom unk43A4; /* (43A4) */ - struct r300_state_atom unk43E8; /* (43E8) */ + struct r300_state_atom sc_hyperz; /* (43A4) */ + struct r300_state_atom sc_screendoor; /* (43E8) */ struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */ struct r300_state_atom fpt; /* texi - (4620) */ - struct r300_state_atom unk46A4; /* (46A4) */ + struct r300_state_atom us_out_fmt; /* (46A4) */ + struct r300_state_atom r500fp; /* r500 fp instructions */ + struct r300_state_atom r500fp_const; /* r500 fp constants */ struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ struct r300_state_atom fogs; /* fog state (4BC0) */ struct r300_state_atom fogc; /* fog color (4BC8) */ struct r300_state_atom at; /* alpha test (4BD4) */ - struct r300_state_atom unk4BD8; /* (4BD8) */ + struct r300_state_atom fg_depth_src; /* (4BD8) */ struct r300_state_atom fpp; /* 0x4C00 and following */ - struct r300_state_atom unk4E00; /* (4E00) */ + struct r300_state_atom rb3d_cctl; /* (4E00) */ struct r300_state_atom bld; /* blending (4E04) */ struct r300_state_atom cmk; /* colormask (4E0C) */ struct r300_state_atom blend_color; /* constant blend color */ + struct r300_state_atom rop; /* ropcntl */ struct r300_state_atom cb; /* colorbuffer (4E28) */ - struct r300_state_atom unk4E50; /* (4E50) */ - struct r300_state_atom unk4E88; /* (4E88) */ - struct r300_state_atom unk4EA0; /* (4E88) I saw it only written on RV350 hardware.. */ + struct r300_state_atom rb3d_dither_ctl; /* (4E50) */ + struct r300_state_atom rb3d_aaresolve_ctl; /* (4E88) */ + struct r300_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */ struct r300_state_atom zs; /* zstencil control (4F00) */ struct r300_state_atom zstencil_format; struct r300_state_atom zb; /* z buffer (4F20) */ - struct r300_state_atom unk4F28; /* (4F28) */ + struct r300_state_atom zb_depthclearvalue; /* (4F28) */ struct r300_state_atom unk4F30; /* (4F30) */ - struct r300_state_atom unk4F44; /* (4F44) */ - struct r300_state_atom unk4F54; /* (4F54) */ + struct r300_state_atom zb_hiz_offset; /* (4F44) */ + struct r300_state_atom zb_hiz_pitch; /* (4F54) */ struct r300_state_atom vpi; /* vp instructions */ struct r300_state_atom vpp; /* vp parameters */ @@ -557,9 +574,7 @@ struct r300_depthbuffer_state { }; struct r300_stencilbuffer_state { - GLuint clear; GLboolean hw_stencil; - }; /* Vertex shader state */ @@ -579,38 +594,21 @@ struct r300_vertex_shader_fragment { union { GLuint d[VSF_MAX_FRAGMENT_LENGTH]; float f[VSF_MAX_FRAGMENT_LENGTH]; - VERTEX_SHADER_INSTRUCTION i[VSF_MAX_FRAGMENT_LENGTH / 4]; + GLuint i[VSF_MAX_FRAGMENT_LENGTH]; } body; }; -#define VSF_DEST_PROGRAM 0x0 -#define VSF_DEST_MATRIX0 0x200 -#define VSF_DEST_MATRIX1 0x204 -#define VSF_DEST_MATRIX2 0x208 -#define VSF_DEST_VECTOR0 0x20c -#define VSF_DEST_VECTOR1 0x20d -#define VSF_DEST_UNKNOWN1 0x400 -#define VSF_DEST_UNKNOWN2 0x406 - struct r300_vertex_shader_state { struct r300_vertex_shader_fragment program; - - struct r300_vertex_shader_fragment unknown1; - struct r300_vertex_shader_fragment unknown2; - - int program_start; - int unknown_ptr1; /* pointer within program space */ - int program_end; - - int param_offset; - int param_count; - - int unknown_ptr2; /* pointer within program space */ - int unknown_ptr3; /* pointer within program space */ }; extern int hw_tcl_on; +#define COLOR_IS_RGBA +#define TAG(x) r300##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + //#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current) #define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp) @@ -624,6 +622,7 @@ extern int hw_tcl_on; struct r300_vertex_program_key { GLuint InputsRead; GLuint OutputsWritten; + GLuint OutputsAdded; }; struct r300_vertex_program { @@ -655,102 +654,55 @@ struct r300_vertex_program_cont { #define PFS_NUM_TEMP_REGS 32 #define PFS_NUM_CONST_REGS 16 -/* Mapping Mesa registers to R300 temporaries */ -struct reg_acc { - int reg; /* Assigned hw temp */ - unsigned int refcount; /* Number of uses by mesa program */ -}; +struct r300_pfs_compile_state; -/** - * Describe the current lifetime information for an R300 temporary - */ -struct reg_lifetime { - /* Index of the first slot where this register is free in the sense - that it can be used as a new destination register. - This is -1 if the register has been assigned to a Mesa register - and the last access to the register has not yet been emitted */ - int free; - - /* Index of the first slot where this register is currently reserved. - This is used to stop e.g. a scalar operation from being moved - before the allocation time of a register that was first allocated - for a vector operation. */ - int reserved; - - /* Index of the first slot in which the register can be used as a - source without losing the value that is written by the last - emitted instruction that writes to the register */ - int vector_valid; - int scalar_valid; - - /* Index to the slot where the register was last read. - This is also the first slot in which the register may be written again */ - int vector_lastread; - int scalar_lastread; -}; /** - * Store usage information about an ALU instruction slot during the - * compilation of a fragment program. + * Stores state that influences the compilation of a fragment program. */ -#define SLOT_SRC_VECTOR (1<<0) -#define SLOT_SRC_SCALAR (1<<3) -#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) -#define SLOT_OP_VECTOR (1<<16) -#define SLOT_OP_SCALAR (1<<17) -#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) - -struct r300_pfs_compile_slot { - /* Bitmask indicating which parts of the slot are used, using SLOT_ constants - defined above */ - unsigned int used; - - /* Selected sources */ - int vsrc[3]; - int ssrc[3]; +struct r300_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + GLuint depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + GLuint texture_compare_func : 3; + } unit[16]; }; -/** - * Store information during compilation of fragment programs. - */ -struct r300_pfs_compile_state { - int nrslots; /* number of ALU slots used so far */ - /* Track which (parts of) slots are already filled with instructions */ - struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST]; - - /* Track the validity of R300 temporaries */ - struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; - - /* Used to map Mesa's inputs/temps onto hardware temps */ - int temp_in_use; - struct reg_acc temps[PFS_NUM_TEMP_REGS]; - struct reg_acc inputs[32]; /* don't actually need 32... */ - - /* Track usage of hardware temps, for register allocation, - * indirection detection, etc. */ - GLuint used_in_node; - GLuint dest_in_node; +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; }; /** - * Store everything about a fragment program that is needed - * to render with that program. + * Stores an R300 fragment program in its compiled-to-hardware form. */ -struct r300_fragment_program { - struct gl_fragment_program mesa_program; - - GLcontext *ctx; - GLboolean translated; - GLboolean error; - struct r300_pfs_compile_state *cs; - +struct r300_fragment_program_code { struct { - int length; + int length; /**< total # of texture instructions used */ GLuint inst[PFS_MAX_TEX_INST]; } tex; struct { + int length; /**< total # of ALU instructions used */ struct { GLuint inst0; GLuint inst1; @@ -759,89 +711,121 @@ struct r300_fragment_program { } inst[PFS_MAX_ALU_INST]; } alu; - struct { - int tex_offset; - int tex_end; - int alu_offset; - int alu_end; - int flags; - } node[4]; + struct r300_fragment_program_node node[4]; int cur_node; int first_node_has_tex; - int alu_offset; - int alu_end; - int tex_offset; - int tex_end; - - /* Hardware constants. - * Contains a pointer to the value. The destination of the pointer - * is supposed to be updated when GL state changes. - * Typically, this is either a pointer into - * gl_program_parameter_list::ParameterValues, or a pointer to a - * global constant (e.g. for sin/cos-approximation) + /** + * Remember which program register a given hardware constant + * belongs to. */ - const GLfloat *constant[PFS_NUM_CONST_REGS]; + struct prog_src_register constant[PFS_NUM_CONST_REGS]; int const_nr; int max_temp_idx; +}; + +/** + * Store everything about a fragment program that is needed + * to render with that program. + */ +struct r300_fragment_program { + struct gl_fragment_program mesa_program; + + GLboolean translated; + GLboolean error; + + struct r300_fragment_program_external_state state; + struct r300_fragment_program_code code; + GLboolean WritesDepth; GLuint optimization; }; -#define R300_MAX_AOS_ARRAYS 16 +struct r500_pfs_compile_state; -#define AOS_FORMAT_USHORT 0 -#define AOS_FORMAT_FLOAT 1 -#define AOS_FORMAT_UBYTE 2 -#define AOS_FORMAT_FLOAT_COLOR 3 +struct r500_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + GLuint depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + GLuint texture_compare_func : 3; + } unit[16]; +}; -#define REG_COORDS 0 -#define REG_COLOR0 1 -#define REG_TEX0 2 +struct r500_fragment_program_code { + struct { + GLuint inst0; + GLuint inst1; + GLuint inst2; + GLuint inst3; + GLuint inst4; + GLuint inst5; + } inst[512]; + + int inst_offset; + int inst_end; + + /** + * Remember which program register a given hardware constant + * belongs to. + */ + struct prog_src_register constant[PFS_NUM_CONST_REGS]; + int const_nr; -struct dt { - GLint size; - GLenum type; - GLsizei stride; - void *data; + int max_temp_idx; }; -struct radeon_vertex_buffer { - int Count; - void *Elts; - int elt_size; - int elt_min, elt_max; /* debug */ +struct r500_fragment_program { + struct gl_fragment_program mesa_program; - struct dt AttribPtr[VERT_ATTRIB_MAX]; + GLcontext *ctx; + GLboolean translated; + GLboolean error; + + struct r500_fragment_program_external_state state; + struct r500_fragment_program_code code; - const struct _mesa_prim *Primitive; - GLuint PrimitiveCount; - GLint LockFirst; - GLsizei LockCount; - int lock_uptodate; + GLboolean writes_depth; + + GLuint optimization; }; +#define R300_MAX_AOS_ARRAYS 16 + +#define REG_COORDS 0 +#define REG_COLOR0 1 +#define REG_TEX0 2 + struct r300_state { struct r300_depthbuffer_state depth; struct r300_texture_state texture; int sw_tcl_inputs[VERT_ATTRIB_MAX]; struct r300_vertex_shader_state vertex_shader; - struct r300_pfs_compile_state pfs_compile; struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; int aos_count; - struct radeon_vertex_buffer VB; GLuint *Elts; struct r300_dma_region elt_dma; - DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. + struct r300_dma_region swtcl_dma; + DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. They are the same as tnl->render_inputs for fixed pipeline */ - struct { - int transform_offset; /* Transform matrix offset, -1 if none */ - } vap_param; /* vertex processor parameter allocation - tells where to write parameters */ - struct r300_stencilbuffer_state stencil; }; @@ -850,6 +834,62 @@ struct r300_state { #define R300_FALLBACK_TCL 1 #define R300_FALLBACK_RAST 2 +/* r300_swtcl.c + */ +struct r300_swtcl_info { + GLuint RenderIndex; + + /** + * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is + * installed in the Mesa state vector. + */ + GLuint vertex_size; + + /** + * Attributes instructing the Mesa TCL pipeline where / how to put vertex + * data in the hardware buffer. + */ + struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; + + /** + * Number of elements of \c ::vertex_attrs that are actually used. + */ + GLuint vertex_attr_count; + + /** + * Cached pointer to the buffer where Mesa will store vertex data. + */ + GLubyte *verts; + + /* Fallback rasterization functions + */ + // r200_point_func draw_point; + // r200_line_func draw_line; + // r200_tri_func draw_tri; + + GLuint hw_primitive; + GLenum render_primitive; + GLuint numverts; + + /** + * Offset of the 4UB color data within a hardware (swtcl) vertex. + */ + GLuint coloroffset; + + /** + * Offset of the 3UB specular color data within a hardware (swtcl) vertex. + */ + GLuint specoffset; + + /** + * Should Mesa project vertex data or will the hardware do it? + */ + GLboolean needproj; + + struct r300_dma_region indexed_verts; +}; + + /** * \brief R300 context structure. */ @@ -888,6 +928,9 @@ struct r300_context { GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; GLboolean disable_lowimpact_fallback; + + DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ + struct r300_swtcl_info swtcl; }; struct r300_buffer_object { diff --git a/r300/r300_emit.c b/r300/r300_emit.c index 2c26069..2ea17ad 100644 --- a/r300/r300_emit.c +++ b/r300/r300_emit.c @@ -86,16 +86,15 @@ do { \ } while (0) #endif -static void r300EmitVec4(GLcontext * ctx, - struct r300_dma_region *rvb, +static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb, GLvoid * data, int stride, int count) { int i; int *out = (int *)(rvb->address + rvb->start); if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); if (stride == 4) COPY_DWORDS(out, data, count); @@ -107,16 +106,15 @@ static void r300EmitVec4(GLcontext * ctx, } } -static void r300EmitVec8(GLcontext * ctx, - struct r300_dma_region *rvb, +static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb, GLvoid * data, int stride, int count) { int i; int *out = (int *)(rvb->address + rvb->start); if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); if (stride == 8) COPY_DWORDS(out, data, count * 2); @@ -129,8 +127,7 @@ static void r300EmitVec8(GLcontext * ctx, } } -static void r300EmitVec12(GLcontext * ctx, - struct r300_dma_region *rvb, +static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb, GLvoid * data, int stride, int count) { int i; @@ -152,16 +149,15 @@ static void r300EmitVec12(GLcontext * ctx, } } -static void r300EmitVec16(GLcontext * ctx, - struct r300_dma_region *rvb, +static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb, GLvoid * data, int stride, int count) { int i; int *out = (int *)(rvb->address + rvb->start); if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); if (stride == 16) COPY_DWORDS(out, data, count * 4); @@ -176,32 +172,22 @@ static void r300EmitVec16(GLcontext * ctx, } } -static void r300EmitVec(GLcontext * ctx, - struct r300_dma_region *rvb, +static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb, GLvoid * data, int size, int stride, int count) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d size %d stride %d\n", - __FUNCTION__, count, size, stride); - - /* Gets triggered when playing with future_hw_tcl_on ... */ - //assert(!rvb->buf); - if (stride == 0) { r300AllocDmaRegion(rmesa, rvb, size * 4, 4); count = 1; rvb->aos_offset = GET_START(rvb); rvb->aos_stride = 0; } else { - r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); /* alignment? */ + r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); rvb->aos_offset = GET_START(rvb); rvb->aos_stride = size; } - /* Emit the data - */ switch (size) { case 1: r300EmitVec4(ctx, rvb, data, stride, count); @@ -217,128 +203,76 @@ static void r300EmitVec(GLcontext * ctx, break; default: assert(0); - _mesa_exit(-1); break; } - } -static GLuint t_type(struct dt *dt) -{ - switch (dt->type) { - case GL_UNSIGNED_BYTE: - return AOS_FORMAT_UBYTE; - case GL_SHORT: - return AOS_FORMAT_USHORT; - case GL_FLOAT: - return AOS_FORMAT_FLOAT; - default: - assert(0); - break; - } +#define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \ + (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT) - return AOS_FORMAT_FLOAT; -} - -static GLuint t_vir0_size(struct dt *dt) -{ - switch (dt->type) { - case GL_UNSIGNED_BYTE: - return 4; - case GL_SHORT: - return 7; - case GL_FLOAT: - return dt->size - 1; - default: - assert(0); - break; - } - - return 0; -} - -static GLuint t_aos_size(struct dt *dt) -{ - switch (dt->type) { - case GL_UNSIGNED_BYTE: - return 1; - case GL_SHORT: - return 2; - case GL_FLOAT: - return dt->size; - default: - assert(0); - break; - } - - return 0; -} - -static GLuint t_vir0(uint32_t * dst, struct dt *dt, int *inputs, - GLint * tab, GLuint nr) +GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, + int *inputs, GLint * tab, GLuint nr) { GLuint i, dw; - for (i = 0; i + 1 < nr; i += 2) { - dw = t_vir0_size(&dt[tab[i]]) | (inputs[tab[i]] << 8) | - (t_type(&dt[tab[i]]) << 14); - dw |= - (t_vir0_size(&dt[tab[i + 1]]) | - (inputs[tab[i + 1]] << 8) | (t_type(&dt[tab[i + 1]]) - << 14)) << 16; - - if (i + 2 == nr) { - dw |= (1 << (13 + 16)); + /* type, inputs, stop bit, size */ + for (i = 0; i < nr; i += 2) { + /* make sure input is valid, would lockup the gpu */ + assert(inputs[tab[i]] != -1); + dw = (R300_SIGNED | DW_SIZE(i)); + if (i + 1 == nr) { + dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; + } else { + assert(inputs[tab[i + 1]] != -1); + dw |= (R300_SIGNED | + DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT; + if (i + 2 == nr) { + dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; + } } dst[i >> 1] = dw; } - if (nr & 1) { - dw = t_vir0_size(&dt[tab[nr - 1]]) | (inputs[tab[nr - 1]] - << 8) | - (t_type(&dt[tab[nr - 1]]) << 14); - dw |= 1 << 13; - - dst[nr >> 1] = dw; - } - return (nr + 1) >> 1; } -static GLuint t_swizzle(int swizzle[4]) +static GLuint r300VAPInputRoute1Swizzle(int swizzle[4]) { - return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) | - (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) | - (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) | - (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT); + return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT); } -static GLuint t_vir1(uint32_t * dst, int swizzle[][4], GLuint nr) +GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) { - GLuint i; + GLuint i, dw; - for (i = 0; i + 1 < nr; i += 2) { - dst[i >> 1] = t_swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE; - dst[i >> 1] |= - (t_swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) - << 16; + for (i = 0; i < nr; i += 2) { + dw = (r300VAPInputRoute1Swizzle(swizzle[i]) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | + R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT; + if (i + 1 < nr) { + dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | + R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; + } + dst[i >> 1] = dw; } - if (nr & 1) - dst[nr >> 1] = - t_swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE; - return (nr + 1) >> 1; } -static GLuint t_emit_size(struct dt *dt) +GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) { - return dt->size; + /* No idea what this value means. I have seen other values written to + * this register... */ + return 0x5555; } -static GLuint t_vic(GLcontext * ctx, GLuint InputsRead) +GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) { - r300ContextPtr r300 = R300_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint i, vic_1 = 0; if (InputsRead & (1 << VERT_ATTRIB_POS)) @@ -350,177 +284,188 @@ static GLuint t_vic(GLcontext * ctx, GLuint InputsRead) if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) vic_1 |= R300_INPUT_CNTL_COLOR; - r300->state.texture.tc_count = 0; + rmesa->state.texture.tc_count = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) { - r300->state.texture.tc_count++; + rmesa->state.texture.tc_count++; vic_1 |= R300_INPUT_CNTL_TC0 << i; } return vic_1; } +GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) +{ + GLuint ret = 0; + + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_COL0)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_COL1)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_BFC0) + || OutputsWritten & (1 << VERT_RESULT_BFC1)) + ret |= + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; + +#if 0 + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ; +#endif + + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + + return ret; +} + +GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten) +{ + GLuint i, ret = 0; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) { + ret |= (4 << (3 * i)); + } + } + + return ret; +} + /* Emit vertex data to GART memory * Route inputs to the vertex processor * This function should never return R300_FALLBACK_TCL when using software tcl. */ - int r300EmitArrays(GLcontext * ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - r300ContextPtr r300 = rmesa; - struct radeon_vertex_buffer *VB = &rmesa->state.VB; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; GLuint nr; - GLuint count = VB->Count; + GLuint count = vb->Count; GLuint i; GLuint InputsRead = 0, OutputsWritten = 0; int *inputs = NULL; int vir_inputs[VERT_ATTRIB_MAX]; GLint tab[VERT_ATTRIB_MAX]; int swizzle[VERT_ATTRIB_MAX][4]; + struct r300_vertex_program *prog = + (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); if (hw_tcl_on) { - struct r300_vertex_program *prog = - (struct r300_vertex_program *) - CURRENT_VERTEX_SHADER(ctx); inputs = prog->inputs; - InputsRead = CURRENT_VERTEX_SHADER(ctx)->key.InputsRead; - OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + InputsRead = prog->key.InputsRead; + OutputsWritten = prog->key.OutputsWritten; } else { - DECLARE_RENDERINPUTS(inputs_bitset); - inputs = r300->state.sw_tcl_inputs; + inputs = rmesa->state.sw_tcl_inputs; - RENDERINPUTS_COPY(inputs_bitset, - TNL_CONTEXT(ctx)->render_inputs_bitset); + DECLARE_RENDERINPUTS(render_inputs_bitset); + RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); - assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_POS)); - InputsRead |= 1 << VERT_ATTRIB_POS; - OutputsWritten |= 1 << VERT_RESULT_HPOS; + vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr; - assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_NORMAL) - == 0); + assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)); + assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0); + //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)); - assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR0)); - InputsRead |= 1 << VERT_ATTRIB_COLOR0; - OutputsWritten |= 1 << VERT_RESULT_COL0; + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) { + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + } - if (RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR1)) { + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) { + InputsRead |= 1 << VERT_ATTRIB_COLOR0; + OutputsWritten |= 1 << VERT_RESULT_COL0; + } + + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) { InputsRead |= 1 << VERT_ATTRIB_COLOR1; OutputsWritten |= 1 << VERT_RESULT_COL1; } - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (RENDERINPUTS_TEST - (inputs_bitset, _TNL_ATTRIB_TEX(i))) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) { InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); } + } - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) - if (InputsRead & (1 << i)) + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { inputs[i] = nr++; - else + } else { inputs[i] = -1; - - if (! - (r300->radeon.radeonScreen-> - chip_flags & RADEON_CHIPSET_TCL)) { - /* Fixed, apply to vir0 only */ - memcpy(vir_inputs, inputs, - VERT_ATTRIB_MAX * sizeof(int)); - inputs = vir_inputs; - - if (InputsRead & VERT_ATTRIB_POS) - inputs[VERT_ATTRIB_POS] = 0; - - if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) - inputs[VERT_ATTRIB_COLOR0] = 2; - - if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) - inputs[VERT_ATTRIB_COLOR1] = 3; - - for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) - if (InputsRead & (1 << i)) - inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); + } } - RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, - inputs_bitset); + /* Fixed, apply to vir0 only */ + memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int)); + inputs = vir_inputs; + if (InputsRead & VERT_ATTRIB_POS) + inputs[VERT_ATTRIB_POS] = 0; + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + inputs[VERT_ATTRIB_COLOR0] = 2; + if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) + inputs[VERT_ATTRIB_COLOR1] = 3; + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) + if (InputsRead & (1 << i)) + inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); + + RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); } + assert(InputsRead); assert(OutputsWritten); - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) - if (InputsRead & (1 << i)) + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { tab[nr++] = i; + } + } - if (nr > R300_MAX_AOS_ARRAYS) + if (nr > R300_MAX_AOS_ARRAYS) { return R300_FALLBACK_TCL; + } for (i = 0; i < nr; i++) { - int ci; - int comp_size, fix, found = 0; + int ci, fix, found = 0; swizzle[i][0] = SWIZZLE_ZERO; swizzle[i][1] = SWIZZLE_ZERO; swizzle[i][2] = SWIZZLE_ZERO; swizzle[i][3] = SWIZZLE_ONE; - for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++) + for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { swizzle[i][ci] = ci; - -#if MESA_BIG_ENDIAN -#define SWAP_INT(a, b) do { \ - int __temp; \ - __temp = a;\ - a = b; \ - b = __temp; \ -} while (0) - - if (VB->AttribPtr[tab[i]].type == GL_UNSIGNED_BYTE) { - SWAP_INT(swizzle[i][0], swizzle[i][3]); - SWAP_INT(swizzle[i][1], swizzle[i][2]); } -#endif /* MESA_BIG_ENDIAN */ - if (r300IsGartMemory(rmesa, VB->AttribPtr[tab[i]].data, - /*(count-1)*stride */ 4)) { - if (VB->AttribPtr[tab[i]].stride % 4) + if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) { + if (vb->AttribPtr[tab[i]]->stride % 4) { return R300_FALLBACK_TCL; - - rmesa->state.aos[i].address = - VB->AttribPtr[tab[i]].data; + } + rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data); rmesa->state.aos[i].start = 0; - rmesa->state.aos[i].aos_offset = - r300GartOffsetFromVirtual(rmesa, - VB-> - AttribPtr[tab[i]].data); - rmesa->state.aos[i].aos_stride = - VB->AttribPtr[tab[i]].stride / 4; - - rmesa->state.aos[i].aos_size = - t_emit_size(&VB->AttribPtr[tab[i]]); + rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data); + rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4; + rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; } else { - /* TODO: r300EmitVec can only handle 4 byte vectors */ - if (VB->AttribPtr[tab[i]].type != GL_FLOAT) - return R300_FALLBACK_TCL; - r300EmitVec(ctx, &rmesa->state.aos[i], - VB->AttribPtr[tab[i]].data, - t_emit_size(&VB->AttribPtr[tab[i]]), - VB->AttribPtr[tab[i]].stride, count); + vb->AttribPtr[tab[i]]->data, + vb->AttribPtr[tab[i]]->size, + vb->AttribPtr[tab[i]]->stride, count); } - rmesa->state.aos[i].aos_size = - t_aos_size(&VB->AttribPtr[tab[i]]); - - comp_size = _mesa_sizeof_type(VB->AttribPtr[tab[i]].type); + rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; - for (fix = 0; fix <= 4 - VB->AttribPtr[tab[i]].size; fix++) { - if ((rmesa->state.aos[i].aos_offset - - comp_size * fix) % 4) + for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) { + if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) { continue; - + } found = 1; break; } @@ -529,70 +474,41 @@ int r300EmitArrays(GLcontext * ctx) if (fix > 0) { WARN_ONCE("Feeling lucky?\n"); } - - rmesa->state.aos[i].aos_offset -= comp_size * fix; - - for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++) + rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix; + for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { swizzle[i][ci] += fix; + } } else { WARN_ONCE ("Cannot handle offset %x with stride %d, comp %d\n", rmesa->state.aos[i].aos_offset, rmesa->state.aos[i].aos_stride, - VB->AttribPtr[tab[i]].size); + vb->AttribPtr[tab[i]]->size); return R300_FALLBACK_TCL; } } - /* setup INPUT_ROUTE */ - R300_STATECHANGE(r300, vir[0]); - ((drm_r300_cmd_header_t *) r300->hw.vir[0].cmd)->packet0.count = - t_vir0(&r300->hw.vir[0].cmd[R300_VIR_CNTL_0], VB->AttribPtr, - inputs, tab, nr); - - R300_STATECHANGE(r300, vir[1]); - ((drm_r300_cmd_header_t *) r300->hw.vir[1].cmd)->packet0.count = - t_vir1(&r300->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr); - - /* Set up input_cntl */ - /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */ - R300_STATECHANGE(r300, vic); - r300->hw.vic.cmd[R300_VIC_CNTL_0] = 0x5555; /* Hard coded value, no idea what it means */ - r300->hw.vic.cmd[R300_VIC_CNTL_1] = t_vic(ctx, InputsRead); - - /* Stage 3: VAP output */ - - R300_STATECHANGE(r300, vof); - - r300->hw.vof.cmd[R300_VOF_CNTL_0] = 0; - r300->hw.vof.cmd[R300_VOF_CNTL_1] = 0; - - if (OutputsWritten & (1 << VERT_RESULT_HPOS)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= - R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - - if (OutputsWritten & (1 << VERT_RESULT_COL0)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT; - - if (OutputsWritten & (1 << VERT_RESULT_COL1)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; - - /*if(OutputsWritten & (1 << VERT_RESULT_BFC0)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT; - - if(OutputsWritten & (1 << VERT_RESULT_BFC1)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; */ - //if(OutputsWritten & (1 << VERT_RESULT_FOGC)) - - if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= - R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) - r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4 << (3 * i)); + /* Setup INPUT_ROUTE. */ + R300_STATECHANGE(rmesa, vir[0]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = + r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], + vb->AttribPtr, inputs, tab, nr); + R300_STATECHANGE(rmesa, vir[1]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = + r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr); + + /* Setup INPUT_CNTL. */ + R300_STATECHANGE(rmesa, vic); + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); + + /* Setup OUTPUT_VTX_FMT. */ + R300_STATECHANGE(rmesa, vof); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = + r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = + r300VAPOutputCntl1(ctx, OutputsWritten); rmesa->state.aos_count = nr; @@ -625,3 +541,19 @@ void r300ReleaseArrays(GLcontext * ctx) r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__); } } + +void r300EmitCacheFlush(r300ContextPtr rmesa) +{ + int cmd_reserved = 0; + int cmd_written = 0; + + drm_radeon_cmd_header_t *cmd = NULL; + + reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); + e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + + reg_start(R300_ZB_ZCACHE_CTLSTAT, 0); + e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); +} diff --git a/r300/r300_emit.h b/r300/r300_emit.h index 7be098f..5950539 100644 --- a/r300/r300_emit.h +++ b/r300/r300_emit.h @@ -44,22 +44,13 @@ #include "r300_cmdbuf.h" #include "radeon_reg.h" -/* - * CP type-3 packets - */ -#define RADEON_CP_PACKET3_UNK1B 0xC0001B00 -#define RADEON_CP_PACKET3_INDX_BUFFER 0xC0003300 -#define RADEON_CP_PACKET3_3D_DRAW_VBUF_2 0xC0003400 -#define RADEON_CP_PACKET3_3D_DRAW_IMMD_2 0xC0003500 -#define RADEON_CP_PACKET3_3D_DRAW_INDX_2 0xC0003600 -#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00 -#define RADEON_CP_PACKET3_3D_CLEAR_ZMASK 0xC0003202 -#define RADEON_CP_PACKET3_3D_CLEAR_CMASK 0xC0003802 -#define RADEON_CP_PACKET3_3D_CLEAR_HIZ 0xC0003702 - +/* TODO: move these defines (and the ones from DRM) into r300_reg.h and sync up + * with DRM */ #define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2)) +#define CP_PACKET3( pkt, n ) \ + (RADEON_CP_PACKET3 | (pkt) | ((n) << 16)) -static __inline__ uint32_t cmdpacket0(int reg, int count) +static INLINE uint32_t cmdpacket0(int reg, int count) { drm_r300_cmd_header_t cmd; @@ -71,7 +62,7 @@ static __inline__ uint32_t cmdpacket0(int reg, int count) return cmd.u; } -static __inline__ uint32_t cmdvpu(int addr, int count) +static INLINE uint32_t cmdvpu(int addr, int count) { drm_r300_cmd_header_t cmd; @@ -83,7 +74,21 @@ static __inline__ uint32_t cmdvpu(int addr, int count) return cmd.u; } -static __inline__ uint32_t cmdpacket3(int packet) +static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp) +{ + drm_r300_cmd_header_t cmd; + + cmd.r500fp.cmd_type = R300_CMD_R500FP; + cmd.r500fp.count = count; + cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8; + cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0; + cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0; + cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF); + + return cmd.u; +} + +static INLINE uint32_t cmdpacket3(int packet) { drm_r300_cmd_header_t cmd; @@ -93,7 +98,7 @@ static __inline__ uint32_t cmdpacket3(int packet) return cmd.u; } -static __inline__ uint32_t cmdcpdelay(unsigned short count) +static INLINE uint32_t cmdcpdelay(unsigned short count) { drm_r300_cmd_header_t cmd; @@ -103,7 +108,7 @@ static __inline__ uint32_t cmdcpdelay(unsigned short count) return cmd.u; } -static __inline__ uint32_t cmdwait(unsigned char flags) +static INLINE uint32_t cmdwait(unsigned char flags) { drm_r300_cmd_header_t cmd; @@ -113,7 +118,7 @@ static __inline__ uint32_t cmdwait(unsigned char flags) return cmd.u; } -static __inline__ uint32_t cmdpacify(void) +static INLINE uint32_t cmdpacify(void) { drm_r300_cmd_header_t cmd; @@ -175,6 +180,19 @@ static __inline__ uint32_t cmdpacify(void) cmd[0].i = cmdvpu((dest), _n/4); \ } while (0); +#define r500fp_start_fragment(dest, length) \ + do { \ + int _n; \ + _n = (length); \ + cmd = (drm_radeon_cmd_header_t*) \ + r300AllocCmdBuf(rmesa, \ + (_n+1), \ + __FUNCTION__); \ + cmd_reserved = _n+1; \ + cmd_written =1; \ + cmd[0].i = cmdr500fp((dest), _n/6, 0, 0); \ + } while (0); + #define start_packet3(packet, count) \ { \ int _n; \ @@ -200,7 +218,7 @@ static __inline__ uint32_t cmdpacify(void) /** * Must be sent to switch to 2d commands */ -void static inline end_3d(r300ContextPtr rmesa) +void static INLINE end_3d(r300ContextPtr rmesa) { drm_radeon_cmd_header_t *cmd = NULL; @@ -209,7 +227,7 @@ void static inline end_3d(r300ContextPtr rmesa) cmd[0].header.cmd_type = R300_CMD_END3D; } -void static inline cp_delay(r300ContextPtr rmesa, unsigned short count) +void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count) { drm_radeon_cmd_header_t *cmd = NULL; @@ -218,7 +236,7 @@ void static inline cp_delay(r300ContextPtr rmesa, unsigned short count) cmd[0].i = cmdcpdelay(count); } -void static inline cp_wait(r300ContextPtr rmesa, unsigned char flags) +void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags) { drm_radeon_cmd_header_t *cmd = NULL; @@ -234,5 +252,17 @@ void r300UseArrays(GLcontext * ctx); #endif extern void r300ReleaseArrays(GLcontext * ctx); +extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); +extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); + +extern void r300EmitCacheFlush(r300ContextPtr rmesa); + +extern GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, + int *inputs, GLint * tab, GLuint nr); +extern GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr); +extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); +extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); +extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten); +extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten); #endif diff --git a/r300/r300_fragprog.c b/r300/r300_fragprog.c index cce8e68..453dda7 100644 --- a/r300/r300_fragprog.c +++ b/r300/r300_fragprog.c @@ -28,16 +28,12 @@ /** * \file * - * \author Ben Skeggs <darktama@iinet.net.au> + * Fragment program compiler. Perform transformations on the intermediate + * representation until the program is in a form where we can translate + * it more or less directly into machine-readable form. * + * \author Ben Skeggs <darktama@iinet.net.au> * \author Jerome Glisse <j.glisse@gmail.com> - * - * \todo Depth write, WPOS/FOGC inputs - * - * \todo FogOption - * - * \todo Verify results of opcodes for accuracy, I've only checked them in - * specific cases. */ #include "glheader.h" @@ -49,1953 +45,233 @@ #include "r300_context.h" #include "r300_fragprog.h" -#include "r300_reg.h" +#include "r300_fragprog_swizzle.h" #include "r300_state.h" -/* - * Usefull macros and values - */ -#define ERROR(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - fp->error = GL_TRUE; \ - } while(0) - -#define PFS_INVAL 0xFFFFFFFF -#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs - -#define SWIZZLE_XYZ 0 -#define SWIZZLE_XXX 1 -#define SWIZZLE_YYY 2 -#define SWIZZLE_ZZZ 3 -#define SWIZZLE_WWW 4 -#define SWIZZLE_YZX 5 -#define SWIZZLE_ZXY 6 -#define SWIZZLE_WZY 7 -#define SWIZZLE_111 8 -#define SWIZZLE_000 9 -#define SWIZZLE_HHH 10 - -#define swizzle(r, x, y, z, w) do_swizzle(fp, r, \ - ((SWIZZLE_##x<<0)| \ - (SWIZZLE_##y<<3)| \ - (SWIZZLE_##z<<6)| \ - (SWIZZLE_##w<<9)), \ - 0) - -#define REG_TYPE_INPUT 0 -#define REG_TYPE_OUTPUT 1 -#define REG_TYPE_TEMP 2 -#define REG_TYPE_CONST 3 - -#define REG_TYPE_SHIFT 0 -#define REG_INDEX_SHIFT 2 -#define REG_VSWZ_SHIFT 8 -#define REG_SSWZ_SHIFT 13 -#define REG_NEGV_SHIFT 18 -#define REG_NEGS_SHIFT 19 -#define REG_ABS_SHIFT 20 -#define REG_NO_USE_SHIFT 21 // Hack for refcounting -#define REG_VALID_SHIFT 22 // Does the register contain a defined value? -#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)? - -#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT) -#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT) -#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT) -#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT) -#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT) -#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT) -#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT) -#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT) -#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT) -#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT) - -#define REG(type, index, vswz, sswz, nouse, valid, builtin) \ - (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \ - ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \ - ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \ - ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \ - ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \ - ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \ - ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) -#define REG_GET_TYPE(reg) \ - ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT) -#define REG_GET_INDEX(reg) \ - ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT) -#define REG_GET_VSWZ(reg) \ - ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT) -#define REG_GET_SSWZ(reg) \ - ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT) -#define REG_GET_NO_USE(reg) \ - ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT) -#define REG_GET_VALID(reg) \ - ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT) -#define REG_GET_BUILTIN(reg) \ - ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT) -#define REG_SET_TYPE(reg, type) \ - reg = ((reg & ~REG_TYPE_MASK) | \ - ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK)) -#define REG_SET_INDEX(reg, index) \ - reg = ((reg & ~REG_INDEX_MASK) | \ - ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK)) -#define REG_SET_VSWZ(reg, vswz) \ - reg = ((reg & ~REG_VSWZ_MASK) | \ - ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK)) -#define REG_SET_SSWZ(reg, sswz) \ - reg = ((reg & ~REG_SSWZ_MASK) | \ - ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) -#define REG_SET_NO_USE(reg, nouse) \ - reg = ((reg & ~REG_NO_USE_MASK) | \ - ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK)) -#define REG_SET_VALID(reg, valid) \ - reg = ((reg & ~REG_VALID_MASK) | \ - ((valid << REG_VALID_SHIFT) & REG_VALID_MASK)) -#define REG_SET_BUILTIN(reg, builtin) \ - reg = ((reg & ~REG_BUILTIN_MASK) | \ - ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK)) -#define REG_ABS(reg) \ - reg = (reg | REG_ABS_MASK) -#define REG_NEGV(reg) \ - reg = (reg | REG_NEGV_MASK) -#define REG_NEGS(reg) \ - reg = (reg | REG_NEGS_MASK) - -/* - * Datas structures for fragment program generation - */ - -/* description of r300 native hw instructions */ -static const struct { - const char *name; - int argc; - int v_op; - int s_op; -} r300_fpop[] = { - /* *INDENT-OFF* */ - {"MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD}, - {"DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4}, - {"DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4}, - {"MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN}, - {"MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX}, - {"CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP}, - {"FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC}, - {"EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2}, - {"LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2}, - {"RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP}, - {"RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ}, - {"REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL}, - {"CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL}, - /* *INDENT-ON* */ -}; - -/* vector swizzles r300 can support natively, with a couple of - * cases we handle specially - * - * REG_VSWZ/REG_SSWZ is an index into this table - */ - -/* mapping from SWIZZLE_* to r300 native values for scalar insns */ -#define SWIZZLE_HALF 6 - -#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ - SWIZZLE_##y, \ - SWIZZLE_##z, \ - SWIZZLE_ZERO)) -/* native swizzles */ -static const struct r300_pfs_swizzle { - GLuint hash; /* swizzle value this matches */ - GLuint base; /* base value for hw swizzle */ - GLuint stride; /* difference in base between arg0/1/2 */ - GLuint flags; -} v_swiz[] = { - /* *INDENT-OFF* */ - {MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, - {MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, - {MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, - {MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0}, - {PFS_INVAL, 0, 0, 0}, - /* *INDENT-ON* */ -}; - -/* used during matching of non-native swizzles */ -#define SWZ_X_MASK (7 << 0) -#define SWZ_Y_MASK (7 << 3) -#define SWZ_Z_MASK (7 << 6) -#define SWZ_W_MASK (7 << 9) -static const struct { - GLuint hash; /* used to mask matching swizzle components */ - int mask; /* actual outmask */ - int count; /* count of components matched */ -} s_mask[] = { - /* *INDENT-OFF* */ - {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3}, - {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2}, - {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2}, - {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2}, - {SWZ_X_MASK, 1, 1}, - {SWZ_Y_MASK, 2, 1}, - {SWZ_Z_MASK, 4, 1}, - {PFS_INVAL, PFS_INVAL, PFS_INVAL} - /* *INDENT-ON* */ -}; - -static const struct { - int base; /* hw value of swizzle */ - int stride; /* difference between SRC0/1/2 */ - GLuint flags; -} s_swiz[] = { - /* *INDENT-OFF* */ - {R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, - {R300_FPI2_ARGA_ZERO, 0, 0}, - {R300_FPI2_ARGA_ONE, 0, 0}, - {R300_FPI2_ARGA_HALF, 0, 0} - /* *INDENT-ON* */ -}; - -/* boiler-plate reg, for convenience */ -static const GLuint undef = REG(REG_TYPE_TEMP, - 0, - SWIZZLE_XYZ, - SWIZZLE_W, - GL_FALSE, - GL_FALSE, - GL_FALSE); - -/* constant one source */ -static const GLuint pfs_one = REG(REG_TYPE_CONST, - 0, - SWIZZLE_111, - SWIZZLE_ONE, - GL_FALSE, - GL_TRUE, - GL_TRUE); - -/* constant half source */ -static const GLuint pfs_half = REG(REG_TYPE_CONST, - 0, - SWIZZLE_HHH, - SWIZZLE_HALF, - GL_FALSE, - GL_TRUE, - GL_TRUE); - -/* constant zero source */ -static const GLuint pfs_zero = REG(REG_TYPE_CONST, - 0, - SWIZZLE_000, - SWIZZLE_ZERO, - GL_FALSE, - GL_TRUE, - GL_TRUE); +#include "radeon_nqssadce.h" +#include "radeon_program_alu.h" -/* - * Common functions prototypes - */ -static void dump_program(struct r300_fragment_program *fp); -static void emit_arith(struct r300_fragment_program *fp, int op, - GLuint dest, int mask, - GLuint src0, GLuint src1, GLuint src2, int flags); -/** - * Get an R300 temporary that can be written to in the given slot. - */ -static int get_hw_temp(struct r300_fragment_program *fp, int slot) +static void reset_srcreg(struct prog_src_register* reg) { - COMPILE_STATE; - int r; - - for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { - if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot) - break; - } - - if (r >= PFS_NUM_TEMP_REGS) { - ERROR("Out of hardware temps\n"); - return 0; - } - // Reserved is used to avoid the following scenario: - // R300 temporary X is first assigned to Mesa temporary Y during vector ops - // R300 temporary X is then assigned to Mesa temporary Z for further vector ops - // Then scalar ops on Mesa temporary Z are emitted and move back in time - // to overwrite the value of temporary Y. - // End scenario. - cs->hwtemps[r].reserved = cs->hwtemps[r].free; - cs->hwtemps[r].free = -1; - - // Reset to some value that won't mess things up when the user - // tries to read from a temporary that hasn't been assigned a value yet. - // In the normal case, vector_valid and scalar_valid should be set to - // a sane value by the first emit that writes to this temporary. - cs->hwtemps[r].vector_valid = 0; - cs->hwtemps[r].scalar_valid = 0; - - if (r > fp->max_temp_idx) - fp->max_temp_idx = r; - - return r; + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; } -/** - * Get an R300 temporary that will act as a TEX destination register. - */ -static int get_hw_temp_tex(struct r300_fragment_program *fp) +static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) { - COMPILE_STATE; - int r; - - for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { - if (cs->used_in_node & (1 << r)) - continue; - - // Note: Be very careful here - if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0) - break; - } - - if (r >= PFS_NUM_TEMP_REGS) - return get_hw_temp(fp, 0); /* Will cause an indirection */ - - cs->hwtemps[r].reserved = cs->hwtemps[r].free; - cs->hwtemps[r].free = -1; - - // Reset to some value that won't mess things up when the user - // tries to read from a temporary that hasn't been assigned a value yet. - // In the normal case, vector_valid and scalar_valid should be set to - // a sane value by the first emit that writes to this temporary. - cs->hwtemps[r].vector_valid = cs->nrslots; - cs->hwtemps[r].scalar_valid = cs->nrslots; - - if (r > fp->max_temp_idx) - fp->max_temp_idx = r; - - return r; -} - -/** - * Mark the given hardware register as free. - */ -static void free_hw_temp(struct r300_fragment_program *fp, int idx) -{ - COMPILE_STATE; - - // Be very careful here. Consider sequences like - // MAD r0, r1,r2,r3 - // TEX r4, ... - // The TEX instruction may be moved in front of the MAD instruction - // due to the way nodes work. We don't want to alias r1 and r4 in - // this case. - // I'm certain the register allocation could be further sanitized, - // but it's tricky because of stuff that can happen inside emit_tex - // and emit_arith. - cs->hwtemps[idx].free = cs->nrslots + 1; -} - -/** - * Create a new Mesa temporary register. - */ -static GLuint get_temp_reg(struct r300_fragment_program *fp) -{ - COMPILE_STATE; - GLuint r = undef; - GLuint index; - - index = ffs(~cs->temp_in_use); - if (!index) { - ERROR("Out of program temps\n"); - return r; - } - - cs->temp_in_use |= (1 << --index); - cs->temps[index].refcount = 0xFFFFFFFF; - cs->temps[index].reg = -1; - - REG_SET_TYPE(r, REG_TYPE_TEMP); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; -} - -/** - * Create a new Mesa temporary register that will act as the destination - * register for a texture read. - */ -static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) -{ - COMPILE_STATE; - GLuint r = undef; - GLuint index; - - index = ffs(~cs->temp_in_use); - if (!index) { - ERROR("Out of program temps\n"); - return r; - } - - cs->temp_in_use |= (1 << --index); - cs->temps[index].refcount = 0xFFFFFFFF; - cs->temps[index].reg = get_hw_temp_tex(fp); - - REG_SET_TYPE(r, REG_TYPE_TEMP); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; -} - -/** - * Free a Mesa temporary and the associated R300 temporary. - */ -static void free_temp(struct r300_fragment_program *fp, GLuint r) -{ - COMPILE_STATE; - GLuint index = REG_GET_INDEX(r); - - if (!(cs->temp_in_use & (1 << index))) - return; + gl_state_index fail_value_tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 + }; + struct prog_src_register reg = { 0, }; - if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { - free_hw_temp(fp, cs->temps[index].reg); - cs->temps[index].reg = -1; - cs->temp_in_use &= ~(1 << index); - } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) { - free_hw_temp(fp, cs->inputs[index].reg); - cs->inputs[index].reg = -1; - } + fail_value_tokens[2] = tmu; + reg.File = PROGRAM_STATE_VAR; + reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); + reg.Swizzle = SWIZZLE_WWWW; + return reg; } /** - * Emit a hardware constant/parameter. + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed * - * \p cp Stable pointer to an array of 4 floats. - * The pointer must be stable in the sense that it remains to be valid - * and hold the contents of the constant/parameter throughout the lifetime - * of the fragment program (actually, up until the next time the fragment - * program is translated). - */ -static GLuint emit_const4fv(struct r300_fragment_program *fp, - const GLfloat * cp) -{ - GLuint reg = undef; - int index; - - for (index = 0; index < fp->const_nr; ++index) { - if (fp->constant[index] == cp) - break; - } - - if (index >= fp->const_nr) { - if (index >= PFS_NUM_CONST_REGS) { - ERROR("Out of hw constants!\n"); - return reg; - } - - fp->const_nr++; - fp->constant[index] = cp; - } - - REG_SET_TYPE(reg, REG_TYPE_CONST); - REG_SET_INDEX(reg, index); - REG_SET_VALID(reg, GL_TRUE); - return reg; -} - -static inline GLuint negate(GLuint r) -{ - REG_NEGS(r); - REG_NEGV(r); - return r; -} - -/* Hack, to prevent clobbering sources used multiple times when - * emulating non-native instructions + * \todo If/when r5xx uses the radeon_program architecture, this can probably + * be reused. */ -static inline GLuint keep(GLuint r) -{ - REG_SET_NO_USE(r, GL_TRUE); - return r; -} - -static inline GLuint absolute(GLuint r) +static GLboolean transform_TEX( + struct radeon_transform_context *t, + struct prog_instruction* orig_inst, void* data) { - REG_ABS(r); - return r; -} - -static int swz_native(struct r300_fragment_program *fp, - GLuint src, GLuint * r, GLuint arbneg) -{ - /* Native swizzle, handle negation */ - src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT); - - if ((arbneg & 0x7) == 0x0) { - src = src & ~REG_NEGV_MASK; - *r = src; - } else if ((arbneg & 0x7) == 0x7) { - src |= REG_NEGV_MASK; - *r = src; - } else { - if (!REG_GET_VALID(*r)) - *r = get_temp_reg(fp); - src |= REG_NEGV_MASK; - emit_arith(fp, - PFS_OP_MAD, - *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0); - src = src & ~REG_NEGV_MASK; - emit_arith(fp, - PFS_OP_MAD, - *r, - (arbneg ^ 0x7) | WRITEMASK_W, - src, pfs_one, pfs_zero, 0); - } - - return 3; -} - -static int swz_emit_partial(struct r300_fragment_program *fp, - GLuint src, - GLuint * r, int mask, int mc, GLuint arbneg) -{ - GLuint tmp; - GLuint wmask = 0; - - if (!REG_GET_VALID(*r)) - *r = get_temp_reg(fp); - - /* A partial match, VSWZ/mask define what parts of the - * desired swizzle we match - */ - if (mc + s_mask[mask].count == 3) { - wmask = WRITEMASK_W; - src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT; - } - - tmp = arbneg & s_mask[mask].mask; - if (tmp) { - tmp = tmp ^ s_mask[mask].mask; - if (tmp) { - emit_arith(fp, - PFS_OP_MAD, - *r, - arbneg & s_mask[mask].mask, - keep(src) | REG_NEGV_MASK, - pfs_one, pfs_zero, 0); - if (!wmask) { - REG_SET_NO_USE(src, GL_TRUE); - } else { - REG_SET_NO_USE(src, GL_FALSE); - } - emit_arith(fp, - PFS_OP_MAD, - *r, tmp | wmask, src, pfs_one, pfs_zero, 0); - } else { - if (!wmask) { - REG_SET_NO_USE(src, GL_TRUE); - } else { - REG_SET_NO_USE(src, GL_FALSE); - } - emit_arith(fp, - PFS_OP_MAD, - *r, - (arbneg & s_mask[mask].mask) | wmask, - src | REG_NEGV_MASK, pfs_one, pfs_zero, 0); - } - } else { - if (!wmask) { - REG_SET_NO_USE(src, GL_TRUE); - } else { - REG_SET_NO_USE(src, GL_FALSE); - } - emit_arith(fp, PFS_OP_MAD, - *r, - s_mask[mask].mask | wmask, - src, pfs_one, pfs_zero, 0); - } - - return s_mask[mask].count; -} - -static GLuint do_swizzle(struct r300_fragment_program *fp, - GLuint src, GLuint arbswz, GLuint arbneg) -{ - GLuint r = undef; - GLuint vswz; - int c_mask = 0; - int v_match = 0; - - /* If swizzling from something without an XYZW native swizzle, - * emit result to a temp, and do new swizzle from the temp. - */ -#if 0 - if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { - GLuint temp = get_temp_reg(fp); - emit_arith(fp, - PFS_OP_MAD, - temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0); - src = temp; - } -#endif - - if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { - GLuint vsrcswz = - (v_swiz[REG_GET_VSWZ(src)]. - hash & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK)) | - REG_GET_SSWZ(src) << 9; - GLint i; - - GLuint newswz = 0; - GLuint offset; - for (i = 0; i < 4; ++i) { - offset = GET_SWZ(arbswz, i); - - newswz |= - (offset <= 3) ? GET_SWZ(vsrcswz, - offset) << i * - 3 : offset << i * 3; - } - - arbswz = newswz & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK); - REG_SET_SSWZ(src, GET_SWZ(newswz, 3)); - } else { - /* set scalar swizzling */ - REG_SET_SSWZ(src, GET_SWZ(arbswz, 3)); - - } - do { - vswz = REG_GET_VSWZ(src); - do { - int chash; - - REG_SET_VSWZ(src, vswz); - chash = v_swiz[REG_GET_VSWZ(src)].hash & - s_mask[c_mask].hash; - - if (chash == (arbswz & s_mask[c_mask].hash)) { - if (s_mask[c_mask].count == 3) { - v_match += swz_native(fp, - src, &r, arbneg); - } else { - v_match += swz_emit_partial(fp, - src, - &r, - c_mask, - v_match, - arbneg); - } - - if (v_match == 3) - return r; - - /* Fill with something invalid.. all 0's was - * wrong before, matched SWIZZLE_X. So all - * 1's will be okay for now - */ - arbswz |= (PFS_INVAL & s_mask[c_mask].hash); - } - } while (v_swiz[++vswz].hash != PFS_INVAL); - REG_SET_VSWZ(src, SWIZZLE_XYZ); - } while (s_mask[++c_mask].hash != PFS_INVAL); - - ERROR("should NEVER get here\n"); - return r; -} - -static GLuint t_src(struct r300_fragment_program *fp, - struct prog_src_register fpsrc) -{ - GLuint r = undef; - - switch (fpsrc.File) { - case PROGRAM_TEMPORARY: - REG_SET_INDEX(r, fpsrc.Index); - REG_SET_VALID(r, GL_TRUE); - REG_SET_TYPE(r, REG_TYPE_TEMP); - break; - case PROGRAM_INPUT: - REG_SET_INDEX(r, fpsrc.Index); - REG_SET_VALID(r, GL_TRUE); - REG_SET_TYPE(r, REG_TYPE_INPUT); - break; - case PROGRAM_LOCAL_PARAM: - r = emit_const4fv(fp, - fp->mesa_program.Base.LocalParams[fpsrc. - Index]); - break; - case PROGRAM_ENV_PARAM: - r = emit_const4fv(fp, - fp->ctx->FragmentProgram.Parameters[fpsrc. - Index]); - break; - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - r = emit_const4fv(fp, - fp->mesa_program.Base.Parameters-> - ParameterValues[fpsrc.Index]); - break; - default: - ERROR("unknown SrcReg->File %x\n", fpsrc.File); - return r; - } - - /* no point swizzling ONE/ZERO/HALF constants... */ - if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) - r = do_swizzle(fp, r, fpsrc.Swizzle, fpsrc.NegateBase); - return r; -} - -static GLuint t_scalar_src(struct r300_fragment_program *fp, - struct prog_src_register fpsrc) -{ - struct prog_src_register src = fpsrc; - int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */ - - src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9)); - - return t_src(fp, src); -} - -static GLuint t_dst(struct r300_fragment_program *fp, - struct prog_dst_register dest) -{ - GLuint r = undef; - - switch (dest.File) { - case PROGRAM_TEMPORARY: - REG_SET_INDEX(r, dest.Index); - REG_SET_VALID(r, GL_TRUE); - REG_SET_TYPE(r, REG_TYPE_TEMP); - return r; - case PROGRAM_OUTPUT: - REG_SET_TYPE(r, REG_TYPE_OUTPUT); - switch (dest.Index) { - case FRAG_RESULT_COLR: - case FRAG_RESULT_DEPR: - REG_SET_INDEX(r, dest.Index); - REG_SET_VALID(r, GL_TRUE); - return r; - default: - ERROR("Bad DstReg->Index 0x%x\n", dest.Index); - return r; - } - default: - ERROR("Bad DstReg->File 0x%x\n", dest.File); - return r; - } -} - -static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) -{ - COMPILE_STATE; - int idx; - int index = REG_GET_INDEX(src); - - switch (REG_GET_TYPE(src)) { - case REG_TYPE_TEMP: - /* NOTE: if reg==-1 here, a source is being read that - * hasn't been written to. Undefined results. - */ - if (cs->temps[index].reg == -1) - cs->temps[index].reg = get_hw_temp(fp, cs->nrslots); - - idx = cs->temps[index].reg; - - if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0)) - free_temp(fp, src); - break; - case REG_TYPE_INPUT: - idx = cs->inputs[index].reg; - - if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0)) - free_hw_temp(fp, cs->inputs[index].reg); - break; - case REG_TYPE_CONST: - return (index | SRC_CONST); - default: - ERROR("Invalid type for source reg\n"); - return (0 | SRC_CONST); - } + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; - if (!tex) - cs->used_in_node |= (1 << idx); + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - return idx; -} + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonAppendInstructions(t->Program, 1); -static int t_hw_dst(struct r300_fragment_program *fp, - GLuint dest, GLboolean tex, int slot) -{ - COMPILE_STATE; - int idx; - GLuint index = REG_GET_INDEX(dest); - assert(REG_GET_VALID(dest)); - - switch (REG_GET_TYPE(dest)) { - case REG_TYPE_TEMP: - if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { - if (!tex) { - cs->temps[index].reg = get_hw_temp(fp, slot); + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = inst.DstReg; + if (comparefunc == GL_ALWAYS) { + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_1111; } else { - cs->temps[index].reg = get_hw_temp_tex(fp); + tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); } + return GL_TRUE; } - idx = cs->temps[index].reg; - - if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0)) - free_temp(fp, dest); - - cs->dest_in_node |= (1 << idx); - cs->used_in_node |= (1 << idx); - break; - case REG_TYPE_OUTPUT: - switch (index) { - case FRAG_RESULT_COLR: - fp->node[fp->cur_node].flags |= - R300_PFS_NODE_OUTPUT_COLOR; - break; - case FRAG_RESULT_DEPR: - fp->node[fp->cur_node].flags |= - R300_PFS_NODE_OUTPUT_DEPTH; - break; - } - return index; - break; - default: - ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); - return 0; - } - return idx; -} - -static void emit_nop(struct r300_fragment_program *fp) -{ - COMPILE_STATE; - - if (cs->nrslots >= PFS_MAX_ALU_INST) { - ERROR("Out of ALU instruction slots\n"); - return; + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = radeonFindFreeTemporary(t); + inst.DstReg.WriteMask = WRITEMASK_XYZW; } - fp->alu.inst[cs->nrslots].inst0 = NOP_INST0; - fp->alu.inst[cs->nrslots].inst1 = NOP_INST1; - fp->alu.inst[cs->nrslots].inst2 = NOP_INST2; - fp->alu.inst[cs->nrslots].inst3 = NOP_INST3; - cs->nrslots++; -} - -static void emit_tex(struct r300_fragment_program *fp, - struct prog_instruction *fpi, int opcode) -{ - COMPILE_STATE; - GLuint coord = t_src(fp, fpi->SrcReg[0]); - GLuint dest = undef, rdest = undef; - GLuint din, uin; - int unit = fpi->TexSrcUnit; - int hwsrc, hwdest; - GLuint tempreg = 0; - - uin = cs->used_in_node; - din = cs->dest_in_node; - - /* Resolve source/dest to hardware registers */ - if (opcode != R300_FPITX_OP_KIL) { - if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { - /** - * Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - * - * \todo Refactor this once we have proper rewriting/optimization - * support for programs. - */ - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - int factor_index; - GLuint factorreg; - - tokens[2] = unit; - factor_index = - _mesa_add_state_reference(fp->mesa_program.Base. - Parameters, tokens); - factorreg = - emit_const4fv(fp, - fp->mesa_program.Base.Parameters-> - ParameterValues[factor_index]); - tempreg = keep(get_temp_reg(fp)); - - emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, - coord, factorreg, pfs_zero, 0); - - /* Ensure correct node indirection */ - uin = cs->used_in_node; - din = cs->dest_in_node; - - hwsrc = t_hw_src(fp, tempreg, GL_TRUE); - } else { - hwsrc = t_hw_src(fp, coord, GL_TRUE); - } - - dest = t_dst(fp, fpi->DstReg); - /* r300 doesn't seem to be able to do TEX->output reg */ - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - rdest = dest; - dest = get_temp_reg_tex(fp); - } - hwdest = - t_hw_dst(fp, dest, GL_TRUE, - fp->node[fp->cur_node].alu_offset); - - /* Use a temp that hasn't been used in this node, rather - * than causing an indirection - */ - if (uin & (1 << hwdest)) { - free_hw_temp(fp, hwdest); - hwdest = get_hw_temp_tex(fp); - cs->temps[REG_GET_INDEX(dest)].reg = hwdest; - } - } else { - hwdest = 0; - unit = 0; - hwsrc = t_hw_src(fp, coord, GL_TRUE); - } - - /* Indirection if source has been written in this node, or if the - * dest has been read/written in this node + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. */ - if ((REG_GET_TYPE(coord) != REG_TYPE_CONST && - (din & (1 << hwsrc))) || (uin & (1 << hwdest))) { - - /* Finish off current node */ - if (fp->node[fp->cur_node].alu_offset == cs->nrslots) - emit_nop(fp); - - fp->node[fp->cur_node].alu_end = - cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; - assert(fp->node[fp->cur_node].alu_end >= 0); - - if (++fp->cur_node >= PFS_MAX_TEX_INDIRECT) { - ERROR("too many levels of texture indirection\n"); - return; - } - - /* Start new node */ - fp->node[fp->cur_node].tex_offset = fp->tex.length; - fp->node[fp->cur_node].alu_offset = cs->nrslots; - fp->node[fp->cur_node].tex_end = -1; - fp->node[fp->cur_node].alu_end = -1; - fp->node[fp->cur_node].flags = 0; - cs->used_in_node = 0; - cs->dest_in_node = 0; - } + if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; - if (fp->cur_node == 0) - fp->first_node_has_tex = 1; + int tempreg = radeonFindFreeTemporary(t); + int factor_index; - fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT) - | (hwdest << R300_FPITX_DST_SHIFT) - | (unit << R300_FPITX_IMAGE_SHIFT) - /* not entirely sure about this */ - | (opcode << R300_FPITX_OPCODE_SHIFT); + tokens[2] = inst.TexSrcUnit; + factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens); - cs->dest_in_node |= (1 << hwdest); - if (REG_GET_TYPE(coord) != REG_TYPE_CONST) - cs->used_in_node |= (1 << hwsrc); + tgt = radeonAppendInstructions(t->Program, 1); - fp->node[fp->cur_node].tex_end++; + tgt->Opcode = OPCODE_MUL; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_STATE_VAR; + tgt->SrcReg[1].Index = factor_index; - /* Copy from temp to output if needed */ - if (REG_GET_VALID(rdest)) { - emit_arith(fp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest, - pfs_one, pfs_zero, 0); - free_temp(fp, dest); + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; } - /* Free temp register */ - if (tempreg != 0) - free_temp(fp, tempreg); -} - -/** - * Returns the first slot where we could possibly allow writing to dest, - * according to register allocation. - */ -static int get_earliest_allowed_write(struct r300_fragment_program *fp, - GLuint dest, int mask) -{ - COMPILE_STATE; - int idx; - int pos; - GLuint index = REG_GET_INDEX(dest); - assert(REG_GET_VALID(dest)); - - switch (REG_GET_TYPE(dest)) { - case REG_TYPE_TEMP: - if (cs->temps[index].reg == -1) - return 0; - - idx = cs->temps[index].reg; - break; - case REG_TYPE_OUTPUT: - return 0; - default: - ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); - return 0; - } - - pos = cs->hwtemps[idx].reserved; - if (mask & WRITEMASK_XYZ) { - if (pos < cs->hwtemps[idx].vector_lastread) - pos = cs->hwtemps[idx].vector_lastread; - } - if (mask & WRITEMASK_W) { - if (pos < cs->hwtemps[idx].scalar_lastread) - pos = cs->hwtemps[idx].scalar_lastread; - } - - return pos; -} + if (inst.Opcode != OPCODE_KIL) { + if (inst.DstReg.File != PROGRAM_TEMPORARY || + inst.DstReg.WriteMask != WRITEMASK_XYZW) { + int tempreg = radeonFindFreeTemporary(t); -/** - * Allocates a slot for an ALU instruction that can consist of - * a vertex part or a scalar part or both. - * - * Sources from src (src[0] to src[argc-1]) are added to the slot in the - * appropriate position (vector and/or scalar), and their positions are - * recorded in the srcpos array. - * - * This function emits instruction code for the source fetch and the - * argument selection. It does not emit instruction code for the - * opcode or the destination selection. - * - * @return the index of the slot - */ -static int find_and_prepare_slot(struct r300_fragment_program *fp, - GLboolean emit_vop, - GLboolean emit_sop, - int argc, GLuint * src, GLuint dest, int mask) -{ - COMPILE_STATE; - int hwsrc[3]; - int srcpos[3]; - unsigned int used; - int tempused; - int tempvsrc[3]; - int tempssrc[3]; - int pos; - int regnr; - int i, j; - - // Determine instruction slots, whether sources are required on - // vector or scalar side, and the smallest slot number where - // all source registers are available - used = 0; - if (emit_vop) - used |= SLOT_OP_VECTOR; - if (emit_sop) - used |= SLOT_OP_SCALAR; - - pos = get_earliest_allowed_write(fp, dest, mask); - - if (fp->node[fp->cur_node].alu_offset > pos) - pos = fp->node[fp->cur_node].alu_offset; - for (i = 0; i < argc; ++i) { - if (!REG_GET_BUILTIN(src[i])) { - if (emit_vop) - used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i; - if (emit_sop) - used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; - } - - hwsrc[i] = t_hw_src(fp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ - regnr = hwsrc[i] & 31; - - if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { - if (used & (SLOT_SRC_VECTOR << i)) { - if (cs->hwtemps[regnr].vector_valid > pos) - pos = cs->hwtemps[regnr].vector_valid; - } - if (used & (SLOT_SRC_SCALAR << i)) { - if (cs->hwtemps[regnr].scalar_valid > pos) - pos = cs->hwtemps[regnr].scalar_valid; - } + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; } } - // Find a slot that fits - for (;; ++pos) { - if (cs->slot[pos].used & used & SLOT_OP_BOTH) - continue; - - if (pos >= cs->nrslots) { - if (cs->nrslots >= PFS_MAX_ALU_INST) { - ERROR("Out of ALU instruction slots\n"); - return -1; - } - - fp->alu.inst[pos].inst0 = NOP_INST0; - fp->alu.inst[pos].inst1 = NOP_INST1; - fp->alu.inst[pos].inst2 = NOP_INST2; - fp->alu.inst[pos].inst3 = NOP_INST3; - - cs->nrslots++; - } - // Note: When we need both parts (vector and scalar) of a source, - // we always try to put them into the same position. This makes the - // code easier to read, and it is optimal (i.e. one doesn't gain - // anything by splitting the parts). - // It also avoids headaches with swizzles that access both parts (i.e WXY) - tempused = cs->slot[pos].used; - for (i = 0; i < 3; ++i) { - tempvsrc[i] = cs->slot[pos].vsrc[i]; - tempssrc[i] = cs->slot[pos].ssrc[i]; - } - - for (i = 0; i < argc; ++i) { - int flags = (used >> i) & SLOT_SRC_BOTH; - - if (!flags) { - srcpos[i] = 0; - continue; - } - - for (j = 0; j < 3; ++j) { - if ((tempused >> j) & flags & SLOT_SRC_VECTOR) { - if (tempvsrc[j] != hwsrc[i]) - continue; - } - - if ((tempused >> j) & flags & SLOT_SRC_SCALAR) { - if (tempssrc[j] != hwsrc[i]) - continue; - } - - break; - } - - if (j == 3) - break; - - srcpos[i] = j; - tempused |= flags << j; - if (flags & SLOT_SRC_VECTOR) - tempvsrc[j] = hwsrc[i]; - if (flags & SLOT_SRC_SCALAR) - tempssrc[j] = hwsrc[i]; - } - - if (i == argc) - break; - } - - // Found a slot, reserve it - cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH); - for (i = 0; i < 3; ++i) { - cs->slot[pos].vsrc[i] = tempvsrc[i]; - cs->slot[pos].ssrc[i] = tempssrc[i]; - } - - for (i = 0; i < argc; ++i) { - if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { - int regnr = hwsrc[i] & 31; - - if (used & (SLOT_SRC_VECTOR << i)) { - if (cs->hwtemps[regnr].vector_lastread < pos) - cs->hwtemps[regnr].vector_lastread = - pos; - } - if (used & (SLOT_SRC_SCALAR << i)) { - if (cs->hwtemps[regnr].scalar_lastread < pos) - cs->hwtemps[regnr].scalar_lastread = - pos; - } - } - } - - // Emit the source fetch code - fp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; - fp->alu.inst[pos].inst1 |= - ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | - (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | - (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); - - fp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK; - fp->alu.inst[pos].inst3 |= - ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | - (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | - (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); - - // Emit the argument selection code - if (emit_vop) { - int swz[3]; - - for (i = 0; i < 3; ++i) { - if (i < argc) { - swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + - (srcpos[i] * - v_swiz[REG_GET_VSWZ(src[i])]. - stride)) | ((src[i] & REG_NEGV_MASK) - ? ARG_NEG : 0) | ((src[i] - & - REG_ABS_MASK) - ? - ARG_ABS - : 0); - } else { - swz[i] = R300_FPI0_ARGC_ZERO; - } - } - - fp->alu.inst[pos].inst0 &= - ~(R300_FPI0_ARG0C_MASK | R300_FPI0_ARG1C_MASK | - R300_FPI0_ARG2C_MASK); - fp->alu.inst[pos].inst0 |= - (swz[0] << R300_FPI0_ARG0C_SHIFT) | (swz[1] << - R300_FPI0_ARG1C_SHIFT) - | (swz[2] << R300_FPI0_ARG2C_SHIFT); - } - - if (emit_sop) { - int swz[3]; - - for (i = 0; i < 3; ++i) { - if (i < argc) { - swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + - (srcpos[i] * - s_swiz[REG_GET_SSWZ(src[i])]. - stride)) | ((src[i] & REG_NEGV_MASK) - ? ARG_NEG : 0) | ((src[i] - & - REG_ABS_MASK) - ? - ARG_ABS - : 0); - } else { - swz[i] = R300_FPI2_ARGA_ZERO; - } - } - - fp->alu.inst[pos].inst2 &= - ~(R300_FPI2_ARG0A_MASK | R300_FPI2_ARG1A_MASK | - R300_FPI2_ARG2A_MASK); - fp->alu.inst[pos].inst2 |= - (swz[0] << R300_FPI2_ARG0A_SHIFT) | (swz[1] << - R300_FPI2_ARG1A_SHIFT) - | (swz[2] << R300_FPI2_ARG2A_SHIFT); - } - - return pos; -} - -/** - * Append an ALU instruction to the instruction list. - */ -static void emit_arith(struct r300_fragment_program *fp, - int op, - GLuint dest, - int mask, - GLuint src0, GLuint src1, GLuint src2, int flags) -{ - COMPILE_STATE; - GLuint src[3] = { src0, src1, src2 }; - int hwdest; - GLboolean emit_vop, emit_sop; - int vop, sop, argc; - int pos; - - vop = r300_fpop[op].v_op; - sop = r300_fpop[op].s_op; - argc = r300_fpop[op].argc; - - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && - REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { - if (mask & WRITEMASK_Z) { - mask = WRITEMASK_W; - } else { - return; - } - } - - emit_vop = GL_FALSE; - emit_sop = GL_FALSE; - if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) - emit_vop = GL_TRUE; - if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) - emit_sop = GL_TRUE; - - pos = - find_and_prepare_slot(fp, emit_vop, emit_sop, argc, src, dest, - mask); - if (pos < 0) - return; - - hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ - - if (flags & PFS_FLAG_SAT) { - vop |= R300_FPI0_OUTC_SAT; - sop |= R300_FPI2_OUTA_SAT; - } - - /* Throw the pieces together and get FPI0/1 */ - if (emit_vop) { - fp->alu.inst[pos].inst0 |= vop; + tgt = radeonAppendInstructions(t->Program, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; + int rcptemp = radeonFindFreeTemporary(t); + int pass, fail; + + tgt = radeonAppendInstructions(t->Program, 3); + + tgt[0].Opcode = OPCODE_RCP; + tgt[0].DstReg.File = PROGRAM_TEMPORARY; + tgt[0].DstReg.Index = rcptemp; + tgt[0].DstReg.WriteMask = WRITEMASK_W; + tgt[0].SrcReg[0] = inst.SrcReg[0]; + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + + tgt[1].Opcode = OPCODE_MAD; + tgt[1].DstReg = inst.DstReg; + tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[1].SrcReg[0] = inst.SrcReg[0]; + tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[1].Index = rcptemp; + tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; + tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[2].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + else + tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; - fp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; + tgt[2].Opcode = OPCODE_CMP; + tgt[2].DstReg = orig_inst->DstReg; + tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - fp->alu.inst[pos].inst1 |= - (mask & WRITEMASK_XYZ) << - R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; - } else - assert(0); + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; } else { - fp->alu.inst[pos].inst1 |= - (mask & WRITEMASK_XYZ) << - R300_FPI1_DSTC_REG_MASK_SHIFT; - - cs->hwtemps[hwdest].vector_valid = pos + 1; + pass = 2; + fail = 1; } - } - /* And now FPI2/3 */ - if (emit_sop) { - fp->alu.inst[pos].inst2 |= sop; - - if (mask & WRITEMASK_W) { - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - fp->alu.inst[pos].inst3 |= - (hwdest << R300_FPI3_DSTA_SHIFT) | - R300_FPI3_DSTA_OUTPUT; - } else if (REG_GET_INDEX(dest) == - FRAG_RESULT_DEPR) { - fp->alu.inst[pos].inst3 |= - R300_FPI3_DSTA_DEPTH; - } else - assert(0); - } else { - fp->alu.inst[pos].inst3 |= - (hwdest << R300_FPI3_DSTA_SHIFT) | - R300_FPI3_DSTA_REG; + tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; + tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; + tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); + } else if (destredirect) { + tgt = radeonAppendInstructions(t->Program, 1); - cs->hwtemps[hwdest].scalar_valid = pos + 1; - } - } + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = orig_inst->DstReg; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; } - return; + return GL_TRUE; } -#if 0 -static GLuint get_attrib(struct r300_fragment_program *fp, GLuint attr) + +static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) { struct gl_fragment_program *mp = &fp->mesa_program; - GLuint r = undef; - - if (!(mp->Base.InputsRead & (1 << attr))) { - ERROR("Attribute %d was not provided!\n", attr); - return undef; - } - REG_SET_TYPE(r, REG_TYPE_INPUT); - REG_SET_INDEX(r, attr); - REG_SET_VALID(r, GL_TRUE); - return r; + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); } -#endif - -static GLfloat SinCosConsts[2][4] = { - { - 1.273239545, // 4/PI - -0.405284735, // -4/(PI*PI) - 3.141592654, // PI - 0.2225 // weight - }, - { - 0.75, - 0.0, - 0.159154943, // 1/(2*PI) - 6.283185307 // 2*PI - } -}; + /** - * Emit a LIT instruction. - * \p flags may be PFS_FLAG_SAT + * Transform the program to support fragment.position. * - * Definition of LIT (from ARB_fragment_program): - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. * - * The longest path of computation is the one leading to result.z, - * consisting of 5 operations. This implementation of LIT takes - * 5 slots. So unless there's some special undocumented opcode, - * this implementation is potentially optimal. Unfortunately, - * emit_arith is a bit too conservative because it doesn't understand - * partial writes to the vector component. + * \todo if/when r5xx supports the radeon_program architecture, this is a + * likely candidate for code sharing. */ -static const GLfloat LitConst[4] = - { 127.999999, 127.999999, 127.999999, -127.999999 }; - -static void emit_lit(struct r300_fragment_program *fp, - GLuint dest, int mask, GLuint src, int flags) +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) { - COMPILE_STATE; - GLuint cnst; - int needTemporary; - GLuint temp; - - cnst = emit_const4fv(fp, LitConst); - - needTemporary = 0; - if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { - needTemporary = 1; - } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - // LIT is typically followed by DP3/DP4, so there's no point - // in creating special code for this case - needTemporary = 1; - } - - if (needTemporary) { - temp = keep(get_temp_reg(fp)); - } else { - temp = keep(dest); - } - - // Note: The order of emit_arith inside the slots is relevant, - // because emit_arith only looks at scalar vs. vector when resolving - // dependencies, and it does not consider individual vector components, - // so swizzling between the two parts can create fake dependencies. - - // First slot - emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_XY, - keep(src), pfs_zero, undef, 0); - emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); - - // Second slot - emit_arith(fp, PFS_OP_MIN, temp, WRITEMASK_Z, - swizzle(temp, W, W, W, W), cnst, undef, 0); - emit_arith(fp, PFS_OP_LG2, temp, WRITEMASK_W, - swizzle(temp, Y, Y, Y, Y), undef, undef, 0); - - // Third slot - // If desired, we saturate the y result here. - // This does not affect the use as a condition variable in the CMP later - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, - temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_Y, - swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); - - // Fourth slot - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_X, - pfs_one, pfs_one, pfs_zero, 0); - emit_arith(fp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); - - // Fifth slot - emit_arith(fp, PFS_OP_CMP, temp, WRITEMASK_Z, - pfs_zero, swizzle(temp, W, W, W, W), - negate(swizzle(temp, Y, Y, Y, Y)), flags); - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, - pfs_zero, 0); - - if (needTemporary) { - emit_arith(fp, PFS_OP_MAD, dest, mask, - temp, pfs_one, pfs_zero, flags); - free_temp(fp, temp); - } else { - // Decrease refcount of the destination - t_hw_dst(fp, dest, GL_FALSE, cs->nrslots); - } -} - -static GLboolean parse_program(struct r300_fragment_program *fp) -{ - struct gl_fragment_program *mp = &fp->mesa_program; - const struct prog_instruction *inst = mp->Base.Instructions; - struct prog_instruction *fpi; - GLuint src[3], dest, temp[2]; - int flags, mask = 0; - int const_sin[2]; - - if (!inst || inst[0].Opcode == OPCODE_END) { - ERROR("empty program?\n"); - return GL_FALSE; - } + GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; - for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { - if (fpi->SaturateMode == SATURATE_ZERO_ONE) - flags = PFS_FLAG_SAT; - else - flags = 0; - - if (fpi->Opcode != OPCODE_KIL) { - dest = t_dst(fp, fpi->DstReg); - mask = fpi->DstReg.WriteMask; - } - - switch (fpi->Opcode) { - case OPCODE_ABS: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - absolute(src[0]), pfs_one, pfs_zero, flags); - break; - case OPCODE_ADD: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], pfs_one, src[1], flags); - break; - case OPCODE_CMP: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); - /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c - * r300 - if src2.c < 0.0 ? src1.c : src0.c - */ - emit_arith(fp, PFS_OP_CMP, dest, mask, - src[2], src[1], src[0], flags); - break; - case OPCODE_COS: - /* - * cos using a parabola (see SIN): - * cos(x): - * x = (x/(2*PI))+0.75 - * x = frac(x) - * x = (x*2*PI)-PI - * result = sin(x) - */ - temp[0] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - - /* add 0.5*PI and do range reduction */ - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(src[0], X, X, X, X), - swizzle(const_sin[1], Z, Z, Z, Z), - swizzle(const_sin[1], X, X, X, X), 0); - - emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, - swizzle(temp[0], X, X, X, X), - undef, undef, 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI - negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI - 0); - - /* SIN */ - - emit_arith(fp, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, - swizzle(temp[0], X, X, X, X), - absolute(swizzle(temp[0], X, X, X, X)), - negate(swizzle(temp[0], X, X, X, X)), 0); - - emit_arith(fp, PFS_OP_MAD, dest, mask, - swizzle(temp[0], Y, Y, Y, Y), - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[0], X, X, X, X), flags); - - free_temp(fp, temp[0]); - break; - case OPCODE_DP3: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_DP3, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_DP4: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_DP4, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_DPH: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - /* src0.xyz1 -> temp - * DP4 dest, temp, src1 - */ -#if 0 - temp[0] = get_temp_reg(fp); - src[0].s_swz = SWIZZLE_ONE; - emit_arith(fp, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, pfs_zero, 0); - emit_arith(fp, PFS_OP_DP4, dest, mask, - temp[0], src[1], undef, flags); - free_temp(fp, temp[0]); -#else - emit_arith(fp, PFS_OP_DP4, dest, mask, - swizzle(src[0], X, Y, Z, ONE), src[1], - undef, flags); -#endif - break; - case OPCODE_DST: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - /* dest.y = src0.y * src1.y */ - if (mask & WRITEMASK_Y) - emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Y, - keep(src[0]), keep(src[1]), - pfs_zero, flags); - /* dest.z = src0.z */ - if (mask & WRITEMASK_Z) - emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Z, - src[0], pfs_one, pfs_zero, flags); - /* result.x = 1.0 - * result.w = src1.w */ - if (mask & WRITEMASK_XW) { - REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */ - emit_arith(fp, PFS_OP_MAD, dest, - mask & WRITEMASK_XW, - src[1], pfs_one, pfs_zero, flags); - } - break; - case OPCODE_EX2: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_EX2, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_FLR: - src[0] = t_src(fp, fpi->SrcReg[0]); - temp[0] = get_temp_reg(fp); - /* FRC temp, src0 - * MAD dest, src0, 1.0, -temp - */ - emit_arith(fp, PFS_OP_FRC, temp[0], mask, - keep(src[0]), undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], pfs_one, negate(temp[0]), flags); - free_temp(fp, temp[0]); - break; - case OPCODE_FRC: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_FRC, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_KIL: - emit_tex(fp, fpi, R300_FPITX_OP_KIL); - break; - case OPCODE_LG2: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_LG2, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_LIT: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_lit(fp, dest, mask, src[0], flags); - break; - case OPCODE_LRP: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); - /* result = tmp0tmp1 + (1 - tmp0)tmp2 - * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 - * MAD temp, -tmp0, tmp2, tmp2 - * MAD result, tmp0, tmp1, temp - */ - temp[0] = get_temp_reg(fp); - emit_arith(fp, PFS_OP_MAD, temp[0], mask, - negate(keep(src[0])), keep(src[2]), src[2], - 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], src[1], temp[0], flags); - free_temp(fp, temp[0]); - break; - case OPCODE_MAD: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], src[1], src[2], flags); - break; - case OPCODE_MAX: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAX, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_MIN: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MIN, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_MOV: - case OPCODE_SWZ: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], pfs_one, pfs_zero, flags); - break; - case OPCODE_MUL: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], src[1], pfs_zero, flags); - break; - case OPCODE_POW: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - src[1] = t_scalar_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); - emit_arith(fp, PFS_OP_LG2, temp[0], WRITEMASK_W, - src[0], undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, - temp[0], src[1], pfs_zero, 0); - emit_arith(fp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, - temp[0], undef, undef, 0); - free_temp(fp, temp[0]); - break; - case OPCODE_RCP: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_RCP, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_RSQ: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_RSQ, dest, mask, - absolute(src[0]), pfs_zero, pfs_zero, flags); - break; - case OPCODE_SCS: - /* - * scs using a parabola : - * scs(x): - * result.x = sin(-abs(x)+0.5*PI) (cos) - * result.y = sin(x) (sin) - * - */ - temp[0] = get_temp_reg(fp); - temp[1] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - - /* x = -abs(x)+0.5*PI */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI - pfs_half, - negate(abs - (swizzle(keep(src[0]), X, X, X, X))), - 0); - - /* C*x (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, - swizzle(const_sin[0], Y, Y, Y, Y), - swizzle(keep(src[0]), X, X, X, X), - pfs_zero, 0); - - /* B*x, C*x (cos) */ - emit_arith(fp, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - /* B*x (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, - swizzle(const_sin[0], X, X, X, X), - keep(src[0]), pfs_zero, 0); - - /* y = B*x + C*x*abs(x) (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_Z, - absolute(src[0]), - swizzle(temp[0], W, W, W, W), - swizzle(temp[1], W, W, W, W), 0); - - /* y = B*x + C*x*abs(x) (cos) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - /* y*abs(y) - y (cos), y*abs(y) - y (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1], - W, Z, Y, - X), - absolute(swizzle(temp[1], W, Z, Y, X)), - negate(swizzle(temp[1], W, Z, Y, X)), 0); - - /* dest.xy = mad(temp.xy, P, temp2.wz) */ - emit_arith(fp, PFS_OP_MAD, dest, - mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[1], W, Z, Y, X), flags); - - free_temp(fp, temp[0]); - free_temp(fp, temp[1]); - break; - case OPCODE_SGE: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); - /* temp = src0 - src1 - * dest.c = (temp.c < 0.0) ? 0 : 1 - */ - emit_arith(fp, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, negate(src[1]), 0); - emit_arith(fp, PFS_OP_CMP, dest, mask, - pfs_one, pfs_zero, temp[0], 0); - free_temp(fp, temp[0]); - break; - case OPCODE_SIN: - /* - * using a parabola: - * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x) - * extra precision is obtained by weighting against - * itself squared. - */ - - temp[0] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - - /* do range reduction */ - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(keep(src[0]), X, X, X, X), - swizzle(const_sin[1], Z, Z, Z, Z), - pfs_half, 0); - - emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, - swizzle(temp[0], X, X, X, X), - undef, undef, 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI - negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI - 0); - - /* SIN */ - - emit_arith(fp, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, - swizzle(temp[0], X, X, X, X), - absolute(swizzle(temp[0], X, X, X, X)), - negate(swizzle(temp[0], X, X, X, X)), 0); - - emit_arith(fp, PFS_OP_MAD, dest, mask, - swizzle(temp[0], Y, Y, Y, Y), - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[0], X, X, X, X), flags); - - free_temp(fp, temp[0]); - break; - case OPCODE_SLT: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); - /* temp = src0 - src1 - * dest.c = (temp.c < 0.0) ? 1 : 0 - */ - emit_arith(fp, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, negate(src[1]), 0); - emit_arith(fp, PFS_OP_CMP, dest, mask, - pfs_zero, pfs_one, temp[0], 0); - free_temp(fp, temp[0]); - break; - case OPCODE_SUB: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], pfs_one, negate(src[1]), flags); - break; - case OPCODE_TEX: - emit_tex(fp, fpi, R300_FPITX_OP_TEX); - break; - case OPCODE_TXB: - emit_tex(fp, fpi, R300_FPITX_OP_TXB); - break; - case OPCODE_TXP: - emit_tex(fp, fpi, R300_FPITX_OP_TXP); - break; - case OPCODE_XPD:{ - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); - /* temp = src0.zxy * src1.yzx */ - emit_arith(fp, PFS_OP_MAD, temp[0], - WRITEMASK_XYZ, swizzle(keep(src[0]), - Z, X, Y, W), - swizzle(keep(src[1]), Y, Z, X, W), - pfs_zero, 0); - /* dest.xyz = src0.yzx * src1.zxy - temp - * dest.w = undefined - * */ - emit_arith(fp, PFS_OP_MAD, dest, - mask & WRITEMASK_XYZ, swizzle(src[0], - Y, Z, - X, W), - swizzle(src[1], Z, X, Y, W), - negate(temp[0]), flags); - /* cleanup */ - free_temp(fp, temp[0]); - break; - } - default: - ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); - break; - } - - if (fp->error) - return GL_FALSE; - - } - - return GL_TRUE; -} + if (!(InputsRead & FRAG_BIT_WPOS)) + return; -static void insert_wpos(struct gl_program *prog) -{ static gl_state_index tokens[STATE_LENGTH] = { STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 }; struct prog_instruction *fpi; GLuint window_index; int i = 0; - GLuint tempregi = prog->NumTemporaries; - /* should do something else if no temps left... */ - prog->NumTemporaries++; + GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - fpi = _mesa_alloc_instructions(prog->NumInstructions + 3); - _mesa_init_instructions(fpi, prog->NumInstructions + 3); + _mesa_insert_instructions(compiler->program, 0, 3); + fpi = compiler->program->Instructions; /* perspective divide */ fpi[i].Opcode = OPCODE_RCP; @@ -2027,7 +303,7 @@ static void insert_wpos(struct gl_program *prog) i++; /* viewport transformation */ - window_index = _mesa_add_state_reference(prog->Parameters, tokens); + window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); fpi[i].Opcode = OPCODE_MAD; @@ -2052,242 +328,182 @@ static void insert_wpos(struct gl_program *prog) MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); i++; - _mesa_copy_instructions(&fpi[i], prog->Instructions, - prog->NumInstructions); - - free(prog->Instructions); - - prog->Instructions = fpi; - - prog->NumInstructions += i; - fpi = &prog->Instructions[prog->NumInstructions - 1]; - - assert(fpi->Opcode == OPCODE_END); - - for (fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++) { - for (i = 0; i < 3; i++) - if (fpi->SrcReg[i].File == PROGRAM_INPUT && - fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { - fpi->SrcReg[i].File = PROGRAM_TEMPORARY; - fpi->SrcReg[i].Index = tempregi; + for (; i < compiler->program->NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; } + } } } -/* - Init structures - * - Determine what hwregs each input corresponds to - */ -static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) + +static void nqssadce_init(struct nqssadce_state* s) { - struct r300_pfs_compile_state *cs = NULL; - struct gl_fragment_program *mp = &fp->mesa_program; - struct prog_instruction *fpi; - GLuint InputsRead = mp->Base.InputsRead; - GLuint temps_used = 0; /* for fp->temps[] */ - int i, j; - - /* New compile, reset tracking data */ - fp->optimization = - driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); - fp->translated = GL_FALSE; - fp->error = GL_FALSE; - fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); - fp->tex.length = 0; - fp->cur_node = 0; - fp->first_node_has_tex = 0; - fp->const_nr = 0; - fp->max_temp_idx = 0; - fp->node[0].alu_end = -1; - fp->node[0].tex_end = -1; - - _mesa_memset(cs, 0, sizeof(*fp->cs)); - for (i = 0; i < PFS_MAX_ALU_INST; i++) { - for (j = 0; j < 3; j++) { - cs->slot[i].vsrc[j] = SRC_CONST; - cs->slot[i].ssrc[j] = SRC_CONST; - } - } + s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W; +} - /* Work out what temps the Mesa inputs correspond to, this must match - * what setup_rs_unit does, which shouldn't be a problem as rs_unit - * configures itself based on the fragprog's InputsRead - * - * NOTE: this depends on get_hw_temp() allocating registers in order, - * starting from register 0. - */ - /* Texcoords come first */ - for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; - cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = - get_hw_temp(fp, 0); - } +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; } - InputsRead &= ~FRAG_BITS_TEX_ANY; +} - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) { - cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0); - insert_wpos(&mp->Base); - } - InputsRead &= ~FRAG_BIT_WPOS; +static GLuint build_func(GLuint comparefunc) +{ + return comparefunc - GL_NEVER; +} - /* Then primary colour */ - if (InputsRead & FRAG_BIT_COL0) { - cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0); - } - InputsRead &= ~FRAG_BIT_COL0; - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) { - cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0); - } - InputsRead &= ~FRAG_BIT_COL1; - - /* Anything else */ - if (InputsRead) { - WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); - /* force read from hwreg 0 for now */ - for (i = 0; i < 32; i++) - if (InputsRead & (1 << i)) - cs->inputs[i].reg = 0; - } +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct r300_fragment_program *fp, + struct r300_fragment_program_external_state *state) +{ + int unit; - /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. - * That way, we can free up the reg when it's no longer needed - */ - if (!mp->Base.Instructions) { - ERROR("No instructions found in program\n"); - return; - } + _mesa_bzero(state, sizeof(*state)); - for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { - int idx; - - for (i = 0; i < 3; i++) { - idx = fpi->SrcReg[i].Index; - switch (fpi->SrcReg[i].File) { - case PROGRAM_TEMPORARY: - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; - break; - case PROGRAM_INPUT: - cs->inputs[idx].refcount++; - break; - default: - break; - } - } + for(unit = 0; unit < 16; ++unit) { + if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - idx = fpi->DstReg.Index; - if (fpi->DstReg.File == PROGRAM_TEMPORARY) { - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); } } - cs->temp_in_use = temps_used; } -static void update_params(struct r300_fragment_program *fp) -{ - struct gl_fragment_program *mp = &fp->mesa_program; - - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (mp->Base.Parameters) - _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters); -} void r300TranslateFragmentShader(r300ContextPtr r300, struct r300_fragment_program *fp) { - struct r300_pfs_compile_state *cs = NULL; + struct r300_fragment_program_external_state state; + + build_state(r300, fp, &state); + if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { + /* TODO: cache compiled programs */ + fp->translated = GL_FALSE; + _mesa_memcpy(&fp->state, &state, sizeof(state)); + } if (!fp->translated) { + struct r300_fragment_program_compiler compiler; + + compiler.r300 = r300; + compiler.fp = fp; + compiler.code = &fp->code; + compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Fragment Program: Initial program:\n"); + _mesa_print_program(compiler.program); + } - init_program(r300, fp); - cs = fp->cs; + insert_WPOS_trailer(&compiler); + + struct radeon_program_transformation transformations[] = { + { &transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform( + r300->radeon.glCtx, + compiler.program, + 3, transformations); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + _mesa_print_program(compiler.program); + } - if (parse_program(fp) == GL_FALSE) { - dump_program(fp); - return; + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + _mesa_print_program(compiler.program); } - /* Finish off */ - fp->node[fp->cur_node].alu_end = - cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; - if (fp->node[fp->cur_node].tex_end < 0) - fp->node[fp->cur_node].tex_end = 0; - fp->alu_offset = 0; - fp->alu_end = cs->nrslots - 1; - fp->tex_offset = 0; - fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0; - assert(fp->node[fp->cur_node].alu_end >= 0); - assert(fp->alu_end >= 0); - - fp->translated = GL_TRUE; - if (RADEON_DEBUG & DEBUG_PIXEL) - dump_program(fp); - r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); + if (!r300FragmentProgramEmit(&compiler)) + fp->error = GL_TRUE; + + /* Subtle: Rescue any parameters that have been added during transformations */ + _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); + fp->mesa_program.Base.Parameters = compiler.program->Parameters; + compiler.program->Parameters = 0; + + _mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL); + + if (!fp->error) + fp->translated = GL_TRUE; + if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) + r300FragmentProgramDump(fp, &fp->code); + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); } - update_params(fp); + update_params(r300, fp); } /* just some random things... */ -static void dump_program(struct r300_fragment_program *fp) +void r300FragmentProgramDump( + struct r300_fragment_program *fp, + struct r300_fragment_program_code *code) { int n, i, j; static int pc = 0; fprintf(stderr, "pc=%d*************************************\n", pc++); - fprintf(stderr, "Mesa program:\n"); - fprintf(stderr, "-------------\n"); - _mesa_print_program(&fp->mesa_program.Base); - fflush(stdout); - fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); - for (n = 0; n < (fp->cur_node + 1); n++) { + for (n = 0; n < (code->cur_node + 1); n++) { fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d\n", n, - fp->node[n].alu_offset, - fp->node[n].tex_offset, - fp->node[n].alu_end, fp->node[n].tex_end); + "alu_end: %d, tex_end: %d, flags: %08x\n", n, + code->node[n].alu_offset, + code->node[n].tex_offset, + code->node[n].alu_end, code->node[n].tex_end, + code->node[n].flags); - if (fp->tex.length) { + if (n > 0 || code->first_node_has_tex) { fprintf(stderr, " TEX:\n"); - for (i = fp->node[n].tex_offset; - i <= fp->node[n].tex_offset + fp->node[n].tex_end; + for (i = code->node[n].tex_offset; + i <= code->node[n].tex_offset + code->node[n].tex_end; ++i) { const char *instr; - switch ((fp->tex. - inst[i] >> R300_FPITX_OPCODE_SHIFT) & + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & 15) { - case R300_FPITX_OP_TEX: + case R300_TEX_OP_LD: instr = "TEX"; break; - case R300_FPITX_OP_KIL: + case R300_TEX_OP_KIL: instr = "KIL"; break; - case R300_FPITX_OP_TXP: + case R300_TEX_OP_TXP: instr = "TXP"; break; - case R300_FPITX_OP_TXB: + case R300_TEX_OP_TXB: instr = "TXB"; break; default: @@ -2297,22 +513,20 @@ static void dump_program(struct r300_fragment_program *fp) fprintf(stderr, " %s t%i, %c%i, texture[%i] (%08x)\n", instr, - (fp->tex. - inst[i] >> R300_FPITX_DST_SHIFT) & 31, - (fp->tex. - inst[i] & R300_FPITX_SRC_CONST) ? 'c' : + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, 't', - (fp->tex. - inst[i] >> R300_FPITX_SRC_SHIFT) & 31, - (fp->tex. - inst[i] & R300_FPITX_IMAGE_MASK) >> - R300_FPITX_IMAGE_SHIFT, - fp->tex.inst[i]); + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); } } - for (i = fp->node[n].alu_offset; - i <= fp->node[n].alu_offset + fp->node[n].alu_end; ++i) { + for (i = code->node[n].alu_offset; + i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { char srcc[3][10], dstc[20]; char srca[3][10], dsta[20]; char argc[3][20]; @@ -2320,8 +534,8 @@ static void dump_program(struct r300_fragment_program *fp) char flags[5], tmp[10]; for (j = 0; j < 3; ++j) { - int regc = fp->alu.inst[i].inst1 >> (j * 6); - int rega = fp->alu.inst[i].inst3 >> (j * 6); + int regc = code->alu.inst[i].inst1 >> (j * 6); + int rega = code->alu.inst[i].inst3 >> (j * 6); sprintf(srcc[j], "%c%i", (regc & 32) ? 'c' : 't', regc & 31); @@ -2331,46 +545,46 @@ static void dump_program(struct r300_fragment_program *fp) dstc[0] = 0; sprintf(flags, "%s%s%s", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : ""); + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(dstc, "t%i.%s ", - (fp->alu.inst[i]. - inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + (code->alu.inst[i]. + inst1 >> R300_ALU_DSTC_SHIFT) & 31, flags); } sprintf(flags, "%s%s%s", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : ""); + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(tmp, "o%i.%s", - (fp->alu.inst[i]. - inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + (code->alu.inst[i]. + inst1 >> R300_ALU_DSTC_SHIFT) & 31, flags); strcat(dstc, tmp); } dsta[0] = 0; - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) { + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { sprintf(dsta, "t%i.w ", - (fp->alu.inst[i]. - inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + (code->alu.inst[i]. + inst3 >> R300_ALU_DSTA_SHIFT) & 31); } - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) { + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", - (fp->alu.inst[i]. - inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + (code->alu.inst[i]. + inst3 >> R300_ALU_DSTA_SHIFT) & 31); strcat(dsta, tmp); } - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) { + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { strcat(dsta, "Z"); } @@ -2378,31 +592,31 @@ static void dump_program(struct r300_fragment_program *fp) "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" " w: %3s %3s %3s -> %-20s (%08x)\n", i, srcc[0], srcc[1], srcc[2], dstc, - fp->alu.inst[i].inst1, srca[0], srca[1], - srca[2], dsta, fp->alu.inst[i].inst3); + code->alu.inst[i].inst1, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].inst3); for (j = 0; j < 3; ++j) { - int regc = fp->alu.inst[i].inst0 >> (j * 7); - int rega = fp->alu.inst[i].inst2 >> (j * 7); + int regc = code->alu.inst[i].inst0 >> (j * 7); + int rega = code->alu.inst[i].inst2 >> (j * 7); int d; char buf[20]; d = regc & 31; if (d < 12) { switch (d % 4) { - case R300_FPI0_ARGC_SRC0C_XYZ: + case R300_ALU_ARGC_SRC0C_XYZ: sprintf(buf, "%s.xyz", srcc[d / 4]); break; - case R300_FPI0_ARGC_SRC0C_XXX: + case R300_ALU_ARGC_SRC0C_XXX: sprintf(buf, "%s.xxx", srcc[d / 4]); break; - case R300_FPI0_ARGC_SRC0C_YYY: + case R300_ALU_ARGC_SRC0C_YYY: sprintf(buf, "%s.yyy", srcc[d / 4]); break; - case R300_FPI0_ARGC_SRC0C_ZZZ: + case R300_ALU_ARGC_SRC0C_ZZZ: sprintf(buf, "%s.zzz", srcc[d / 4]); break; @@ -2465,8 +679,8 @@ static void dump_program(struct r300_fragment_program *fp) fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" " w: %8s %8s %8s op: %08x\n", argc[0], argc[1], argc[2], - fp->alu.inst[i].inst0, arga[0], arga[1], - arga[2], fp->alu.inst[i].inst2); + code->alu.inst[i].inst0, arga[0], arga[1], + arga[2], code->alu.inst[i].inst2); } } } diff --git a/r300/r300_fragprog.h b/r300/r300_fragprog.h index 72fca77..b3a3cd2 100644 --- a/r300/r300_fragprog.h +++ b/r300/r300_fragprog.h @@ -40,65 +40,93 @@ #include "shader/prog_instruction.h" #include "r300_context.h" - -typedef struct r300_fragment_program_swizzle { - GLuint length; - GLuint src[4]; - GLuint inst[8]; -} r300_fragment_program_swizzle_t; - -/* supported hw opcodes */ -#define PFS_OP_MAD 0 -#define PFS_OP_DP3 1 -#define PFS_OP_DP4 2 -#define PFS_OP_MIN 3 -#define PFS_OP_MAX 4 -#define PFS_OP_CMP 5 -#define PFS_OP_FRC 6 -#define PFS_OP_EX2 7 -#define PFS_OP_LG2 8 -#define PFS_OP_RCP 9 -#define PFS_OP_RSQ 10 -#define PFS_OP_REPL_ALPHA 11 -#define PFS_OP_CMPH 12 -#define MAX_PFS_OP 12 - -#define PFS_FLAG_SAT (1 << 0) -#define PFS_FLAG_ABS (1 << 1) - -#define ARG_NEG (1 << 5) -#define ARG_ABS (1 << 6) -#define ARG_MASK (127 << 0) -#define ARG_STRIDE 7 -#define SRC_CONST (1 << 5) -#define SRC_MASK (63 << 0) -#define SRC_STRIDE 6 - -#define NOP_INST0 ( \ - (R300_FPI0_OUTC_MAD) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT)) -#define NOP_INST1 ( \ - ((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT)) -#define NOP_INST2 ( \ - (R300_FPI2_OUTA_MAD) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT)) -#define NOP_INST3 ( \ - ((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT)) +#include "radeon_program.h" #define DRI_CONF_FP_OPTIMIZATION_SPEED 0 #define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 +#if 1 + +/** + * Fragment program helper macros + */ + +/* Produce unshifted source selectors */ +#define FP_TMP(idx) (idx) +#define FP_CONST(idx) ((idx) | (1 << 5)) + +/* Produce source/dest selector dword */ +#define FP_SELC_MASK_NO 0 +#define FP_SELC_MASK_X 1 +#define FP_SELC_MASK_Y 2 +#define FP_SELC_MASK_XY 3 +#define FP_SELC_MASK_Z 4 +#define FP_SELC_MASK_XZ 5 +#define FP_SELC_MASK_YZ 6 +#define FP_SELC_MASK_XYZ 7 + +#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTC_SHIFT) | \ + (FP_SELC_MASK_##regmask << 23) | \ + (FP_SELC_MASK_##outmask << 26) | \ + ((src0) << R300_ALU_SRC0C_SHIFT) | \ + ((src1) << R300_ALU_SRC1C_SHIFT) | \ + ((src2) << R300_ALU_SRC2C_SHIFT)) + +#define FP_SELA_MASK_NO 0 +#define FP_SELA_MASK_W 1 + +#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTA_SHIFT) | \ + (FP_SELA_MASK_##regmask << 23) | \ + (FP_SELA_MASK_##outmask << 24) | \ + ((src0) << R300_ALU_SRC0A_SHIFT) | \ + ((src1) << R300_ALU_SRC1A_SHIFT) | \ + ((src2) << R300_ALU_SRC2A_SHIFT)) + +/* Produce unshifted argument selectors */ +#define FP_ARGC(source) R300_ALU_ARGC_##source +#define FP_ARGA(source) R300_ALU_ARGA_##source +#define FP_ABS(arg) ((arg) | (1 << 6)) +#define FP_NEG(arg) ((arg) ^ (1 << 5)) + +/* Produce instruction dword */ +#define FP_INSTRC(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTC_##opcode | \ + ((arg0) << R300_ALU_ARG0C_SHIFT) | \ + ((arg1) << R300_ALU_ARG1C_SHIFT) | \ + ((arg2) << R300_ALU_ARG2C_SHIFT)) + +#define FP_INSTRA(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTA_##opcode | \ + ((arg0) << R300_ALU_ARG0A_SHIFT) | \ + ((arg1) << R300_ALU_ARG1A_SHIFT) | \ + ((arg2) << R300_ALU_ARG2A_SHIFT)) + +#endif + struct r300_fragment_program; extern void r300TranslateFragmentShader(r300ContextPtr r300, struct r300_fragment_program *fp); + +/** + * Used internally by the r300 fragment program code to store compile-time + * only data. + */ +struct r300_fragment_program_compiler { + r300ContextPtr r300; + struct r300_fragment_program *fp; + struct r300_fragment_program_code *code; + struct gl_program *program; +}; + +extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); + + +extern void r300FragmentProgramDump( + struct r300_fragment_program *fp, + struct r300_fragment_program_code *code); + #endif diff --git a/r300/r300_fragprog_emit.c b/r300/r300_fragprog_emit.c new file mode 100644 index 0000000..9f0b7e3 --- /dev/null +++ b/r300/r300_fragprog_emit.c @@ -0,0 +1,344 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Emit the r300_fragment_program_code that can be understood by the hardware. + * Input is a pre-transformed radeon_program. + * + * \author Ben Skeggs <darktama@iinet.net.au> + * + * \author Jerome Glisse <j.glisse@gmail.com> + * + * \todo FogOption + */ + +#include "r300_fragprog.h" + +#include "radeon_program_pair.h" +#include "r300_fragprog_swizzle.h" +#include "r300_reg.h" + + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ + struct r300_fragment_program_code *code = c->code + +#define error(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex) +{ + PROG_CODE; + + for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { + if (code->constant[*hwindex].File == file && + code->constant[*hwindex].Index == index) + break; + } + + if (*hwindex >= code->const_nr) { + if (*hwindex >= PFS_NUM_CONST_REGS) { + error("Out of hw constants!\n"); + return GL_FALSE; + } + + code->const_nr++; + code->constant[*hwindex].File = file; + code->constant[*hwindex].Index = index; + } + + return GL_TRUE; +} + + +/** + * Mark a temporary register as used. + */ +static void use_temporary(struct r300_fragment_program_code *code, GLuint index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + + +static GLuint translate_rgb_opcode(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R300_ALU_OUTC_CMP; + case OPCODE_DP3: return R300_ALU_OUTC_DP3; + case OPCODE_DP4: return R300_ALU_OUTC_DP4; + case OPCODE_FRC: return R300_ALU_OUTC_FRC; + default: + error("translate_rgb_opcode(%i): Unknown opcode", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R300_ALU_OUTC_MAD; + case OPCODE_MAX: return R300_ALU_OUTC_MAX; + case OPCODE_MIN: return R300_ALU_OUTC_MIN; + case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + } +} + +static GLuint translate_alpha_opcode(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R300_ALU_OUTA_CMP; + case OPCODE_DP3: return R300_ALU_OUTA_DP4; + case OPCODE_DP4: return R300_ALU_OUTA_DP4; + case OPCODE_EX2: return R300_ALU_OUTA_EX2; + case OPCODE_FRC: return R300_ALU_OUTA_FRC; + case OPCODE_LG2: return R300_ALU_OUTA_LG2; + default: + error("translate_rgb_opcode(%i): Unknown opcode", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R300_ALU_OUTA_MAD; + case OPCODE_MAX: return R300_ALU_OUTA_MAX; + case OPCODE_MIN: return R300_ALU_OUTA_MIN; + case OPCODE_RCP: return R300_ALU_OUTA_RCP; + case OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + } +} + +/** + * Emit one paired ALU instruction. + */ +static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) +{ + PROG_CODE; + + if (code->alu.length >= PFS_MAX_ALU_INST) { + error("Too many ALU instructions"); + return GL_FALSE; + } + + int ip = code->alu.length++; + int j; + code->node[code->cur_node].alu_end++; + + code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode); + code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode); + + for(j = 0; j < 3; ++j) { + GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); + if (!inst->RGB.Src[j].Constant) + use_temporary(code, inst->RGB.Src[j].Index); + code->alu.inst[ip].inst1 |= src << (6*j); + + src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); + if (!inst->Alpha.Src[j].Constant) + use_temporary(code, inst->Alpha.Src[j].Index); + code->alu.inst[ip].inst3 |= src << (6*j); + + GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg |= inst->RGB.Arg[j].Abs << 6; + arg |= inst->RGB.Arg[j].Negate << 5; + code->alu.inst[ip].inst0 |= arg << (7*j); + + arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); + arg |= inst->Alpha.Arg[j].Abs << 6; + arg |= inst->Alpha.Arg[j].Negate << 5; + code->alu.inst[ip].inst2 |= arg << (7*j); + } + + if (inst->RGB.Saturate) + code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP; + if (inst->Alpha.Saturate) + code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP; + + if (inst->RGB.WriteMask) { + use_temporary(code, inst->RGB.DestIndex); + code->alu.inst[ip].inst1 |= + (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | + (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); + } + if (inst->RGB.OutputWriteMask) { + code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); + code->node[code->cur_node].flags |= R300_RGBA_OUT; + } + + if (inst->Alpha.WriteMask) { + use_temporary(code, inst->Alpha.DestIndex); + code->alu.inst[ip].inst3 |= + (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; + } + if (inst->Alpha.OutputWriteMask) { + code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT; + code->node[code->cur_node].flags |= R300_RGBA_OUT; + } + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; + code->node[code->cur_node].flags |= R300_W_OUT; + c->fp->WritesDepth = GL_TRUE; + } + + return GL_TRUE; +} + + +/** + * Finish the current node without advancing to the next one. + */ +static GLboolean finish_node(struct r300_fragment_program_compiler *c) +{ + struct r300_fragment_program_code *code = c->code; + struct r300_fragment_program_node *node = &code->node[code->cur_node]; + + if (node->alu_end < 0) { + /* Generate a single NOP for this node */ + struct radeon_pair_instruction inst; + _mesa_bzero(&inst, sizeof(inst)); + if (!emit_alu(c, &inst)) + return GL_FALSE; + } + + if (node->tex_end < 0) { + if (code->cur_node == 0) { + node->tex_end = 0; + } else { + error("Node %i has no TEX instructions", code->cur_node); + return GL_FALSE; + } + } else { + if (code->cur_node == 0) + code->first_node_has_tex = 1; + } + + return GL_TRUE; +} + + +/** + * Begin a block of texture instructions. + * Create the necessary indirection. + */ +static GLboolean begin_tex(void* data) +{ + PROG_CODE; + + if (code->cur_node == 0) { + if (code->node[0].alu_end < 0 && + code->node[0].tex_end < 0) + return GL_TRUE; + } + + if (code->cur_node == 3) { + error("Too many texture indirections"); + return GL_FALSE; + } + + if (!finish_node(c)) + return GL_FALSE; + + struct r300_fragment_program_node *node = &code->node[++code->cur_node]; + node->alu_offset = code->alu.length; + node->alu_end = -1; + node->tex_offset = code->tex.length; + node->tex_end = -1; + return GL_TRUE; +} + + +static GLboolean emit_tex(void* data, struct prog_instruction* inst) +{ + PROG_CODE; + + if (code->tex.length >= PFS_MAX_TEX_INST) { + error("Too many TEX instructions"); + return GL_FALSE; + } + + GLuint unit = inst->TexSrcUnit; + GLuint dest = inst->DstReg.Index; + GLuint opcode; + + switch(inst->Opcode) { + case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + default: + error("Unknown texture opcode %i", inst->Opcode); + return GL_FALSE; + } + + if (inst->Opcode == OPCODE_KIL) { + unit = 0; + dest = 0; + } else { + use_temporary(code, dest); + } + + use_temporary(code, inst->SrcReg[0].Index); + + code->node[code->cur_node].tex_end++; + code->tex.inst[code->tex.length++] = + (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | + (dest << R300_DST_ADDR_SHIFT) | + (unit << R300_TEX_ID_SHIFT) | + (opcode << R300_TEX_INST_SHIFT); + return GL_TRUE; +} + + +static const struct radeon_pair_handler pair_handler = { + .EmitConst = &emit_const, + .EmitPaired = &emit_alu, + .EmitTex = &emit_tex, + .BeginTexBlock = &begin_tex, + .MaxHwTemps = PFS_NUM_TEMP_REGS +}; + +/** + * Final compilation step: Turn the intermediate radeon_program into + * machine-readable instructions. + */ +GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler) +{ + struct r300_fragment_program_code *code = compiler->code; + + _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); + code->node[0].alu_end = -1; + code->node[0].tex_end = -1; + + if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) + return GL_FALSE; + + if (!finish_node(compiler)) + return GL_FALSE; + + return GL_TRUE; +} + diff --git a/r300/r300_fragprog_swizzle.c b/r300/r300_fragprog_swizzle.c new file mode 100644 index 0000000..a86d2bd --- /dev/null +++ b/r300/r300_fragprog_swizzle.c @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * Utilities to deal with the somewhat odd restriction on R300 fragment + * program swizzles. + */ + +#include "r300_fragprog_swizzle.h" + +#include "r300_reg.h" +#include "radeon_nqssadce.h" + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) + +struct swizzle_data { + GLuint hash; /**< swizzle value this matches */ + GLuint base; /**< base value for hw swizzle */ + GLuint stride; /**< difference in base between arg0/1/2 */ +}; + +static const struct swizzle_data native_swizzles[] = { + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0} +}; + +static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); + + +/** + * Find a native RGB swizzle that matches the given swizzle. + * Returns 0 if none found. + */ +static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) +{ + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data* sd = &native_swizzles[i]; + for(comp = 0; comp < 3; ++comp) { + GLuint swz = GET_SWZ(swizzle, comp); + if (swz == SWIZZLE_NIL) + continue; + if (swz != GET_SWZ(sd->hash, comp)) + break; + } + if (comp == 3) + return sd; + } + + return 0; +} + + +/** + * Check whether the given instruction supports the swizzle and negate + * combinations in the given source register. + */ +GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +{ + if (reg.Abs) + reg.NegateBase = 0; + + if (opcode == OPCODE_KIL || + opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXP) { + int j; + + if (reg.Abs || reg.NegateBase != (15*reg.NegateAbs)) + return GL_FALSE; + + for(j = 0; j < 4; ++j) { + GLuint swz = GET_SWZ(reg.Swizzle, j); + if (swz == SWIZZLE_NIL) + continue; + if (swz != j) + return GL_FALSE; + } + + return GL_TRUE; + } + + GLuint relevant = 0; + int j; + + for(j = 0; j < 3; ++j) + if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) + relevant |= 1 << j; + + if ((reg.NegateBase & relevant) && (reg.NegateBase & relevant) != relevant) + return GL_FALSE; + + if (!lookup_native_swizzle(reg.Swizzle)) + return GL_FALSE; + + return GL_TRUE; +} + + +/** + * Generate MOV dst, src using only native swizzles. + */ +void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +{ + if (src.Abs) + src.NegateBase = 0; + + while(dst.WriteMask) { + const struct swizzle_data *best_swizzle = 0; + GLuint best_matchcount = 0; + GLuint best_matchmask = 0; + GLboolean rgbnegate; + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + GLuint matchcount = 0; + GLuint matchmask = 0; + for(comp = 0; comp < 3; ++comp) { + if (!GET_BIT(dst.WriteMask, comp)) + continue; + GLuint swz = GET_SWZ(src.Swizzle, comp); + if (swz == SWIZZLE_NIL) + continue; + if (swz == GET_SWZ(sd->hash, comp)) { + matchcount++; + matchmask |= 1 << comp; + } + } + if (matchcount > best_matchcount) { + best_swizzle = sd; + best_matchcount = matchcount; + best_matchmask = matchmask; + if (matchmask == (dst.WriteMask & WRITEMASK_XYZ)) + break; + } + } + + if ((src.NegateBase & best_matchmask) != 0) { + best_matchmask &= src.NegateBase; + rgbnegate = !src.NegateAbs; + } else { + rgbnegate = src.NegateAbs; + } + + struct prog_instruction *inst; + + _mesa_insert_instructions(s->Program, s->IP, 1); + inst = s->Program->Instructions + s->IP++; + inst->Opcode = OPCODE_MOV; + inst->DstReg = dst; + inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); + inst->SrcReg[0] = src; + /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ + + dst.WriteMask &= ~inst->DstReg.WriteMask; + } +} + + +/** + * Translate an RGB (XYZ) swizzle into the hardware code for the given + * instruction source. + */ +GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) +{ + const struct swizzle_data* sd = lookup_native_swizzle(swizzle); + + if (!sd) { + _mesa_printf("Not a native swizzle: %08x\n", swizzle); + return 0; + } + + return sd->base + src*sd->stride; +} + + +/** + * Translate an Alpha (W) swizzle into the hardware code for the given + * instruction source. + */ +GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle) +{ + if (swizzle < 3) + return swizzle + 3*src; + + switch(swizzle) { + case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + default: return R300_ALU_ARGA_ONE; + } +} diff --git a/r300/r300_fragprog_swizzle.h b/r300/r300_fragprog_swizzle.h new file mode 100644 index 0000000..3da99a9 --- /dev/null +++ b/r300/r300_fragprog_swizzle.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_SWIZZLE_H_ +#define __R300_FRAGPROG_SWIZZLE_H_ + +#include "glheader.h" +#include "shader/prog_instruction.h" + +struct nqssadce_state; + +GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); +void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); + +GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle); +GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle); + +#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/r300/r300_ioctl.c b/r300/r300_ioctl.c index ea94ce2..bd7f060 100644 --- a/r300/r300_ioctl.c +++ b/r300/r300_ioctl.c @@ -51,9 +51,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_cmdbuf.h" #include "r300_state.h" -#include "r300_program.h" +#include "r300_vertprog.h" #include "radeon_reg.h" #include "r300_emit.h" +#include "r300_fragprog.h" #include "vblank.h" @@ -106,19 +107,19 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) e32(cbpitch); R300_STATECHANGE(r300, cmk); - reg_start(R300_RB3D_COLORMASK, 0); + reg_start(RB3D_COLOR_CHANNEL_MASK, 0); if (flags & CLEARBUFFER_COLOR) { - e32((ctx->Color.ColorMask[BCOMP] ? R300_COLORMASK0_B : 0) | - (ctx->Color.ColorMask[GCOMP] ? R300_COLORMASK0_G : 0) | - (ctx->Color.ColorMask[RCOMP] ? R300_COLORMASK0_R : 0) | - (ctx->Color.ColorMask[ACOMP] ? R300_COLORMASK0_A : 0)); + e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | + (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | + (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | + (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); } else { e32(0x0); } R300_STATECHANGE(r300, zs); - reg_start(R300_RB3D_ZSTENCIL_CNTL_0, 2); + reg_start(R300_ZB_CNTL, 2); { uint32_t t1, t2; @@ -127,37 +128,28 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) t2 = 0x0; if (flags & CLEARBUFFER_DEPTH) { - t1 |= R300_RB3D_Z_WRITE_ONLY; + t1 |= R300_Z_ENABLE | R300_Z_WRITE_ENABLE; t2 |= - (R300_ZS_ALWAYS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); - } else { - t1 |= R300_RB3D_Z_DISABLED_1; // disable + (R300_ZS_ALWAYS << R300_Z_FUNC_SHIFT); } if (flags & CLEARBUFFER_STENCIL) { - t1 |= R300_RB3D_STENCIL_ENABLE; + t1 |= R300_STENCIL_ENABLE; t2 |= (R300_ZS_ALWAYS << - R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | + R300_S_FRONT_FUNC_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) | + R300_S_FRONT_SFAIL_OP_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT) | + R300_S_FRONT_ZPASS_OP_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT) | - (R300_ZS_ALWAYS << - R300_RB3D_ZS1_BACK_FUNC_SHIFT) | - (R300_ZS_REPLACE << - R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) | - (R300_ZS_REPLACE << - R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT) | - (R300_ZS_REPLACE << - R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT); + R300_S_FRONT_ZFAIL_OP_SHIFT); } e32(t1); e32(t2); - e32(r300->state.stencil.clear); + e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | + (ctx->Stencil.Clear & R300_STENCILREF_MASK)); } cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__); @@ -172,11 +164,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]); cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); + r300EmitCacheFlush(rmesa); cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); } @@ -190,10 +178,16 @@ static void r300EmitClearState(GLcontext * ctx) int cmd_written = 0; drm_radeon_cmd_header_t *cmd = NULL; int has_tcl = 1; + int is_r500 = 0; + GLuint vap_cntl; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + + /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are * quite complex; see the functions in r300_emit.c. @@ -203,46 +197,57 @@ static void r300EmitClearState(GLcontext * ctx) * these registers, as well as the actual values used for rendering. */ R300_STATECHANGE(r300, vir[0]); - reg_start(R300_VAP_INPUT_ROUTE_0_0, 0); + reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0); if (!has_tcl) - e32(0x22030003); + e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); else - e32(0x21030003); + e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); /* disable fog */ R300_STATECHANGE(r300, fogs); - reg_start(R300_RE_FOG_STATE, 0); + reg_start(R300_FG_FOG_BLEND, 0); e32(0x0); R300_STATECHANGE(r300, vir[1]); - reg_start(R300_VAP_INPUT_ROUTE_1_0, 0); - e32(0xF688F688); + reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0); + e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) + << R300_SWIZZLE0_SHIFT) | + (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) + << R300_SWIZZLE1_SHIFT))); /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ R300_STATECHANGE(r300, vic); - reg_start(R300_VAP_INPUT_CNTL_0, 1); - e32(R300_INPUT_CNTL_0_COLOR); + reg_start(R300_VAP_VTX_STATE_CNTL, 1); + e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); - if (!has_tcl) { - R300_STATECHANGE(r300, vte); - /* comes from fglrx startup of clear */ - reg_start(R300_SE_VTE_CNTL, 1); - e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | - R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | - R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | - R300_VPORT_Z_OFFSET_ENA); - e32(0x8); - - reg_start(0x21dc, 0); - e32(0xaaaaaaaa); - } + R300_STATECHANGE(r300, vte); + /* comes from fglrx startup of clear */ + reg_start(R300_SE_VTE_CNTL, 1); + e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | + R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | + R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | + R300_VPORT_Z_OFFSET_ENA); + e32(0x8); + + reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0); + e32(0xaaaaaaaa); R300_STATECHANGE(r300, vof); reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1); e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT); - e32(0x0); /* no textures */ + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); + e32(0x0); /* no textures */ R300_STATECHANGE(r300, txe); reg_start(R300_TX_ENABLE, 0); @@ -258,7 +263,7 @@ static void r300EmitClearState(GLcontext * ctx) efloat(0.0); R300_STATECHANGE(r300, at); - reg_start(R300_PP_ALPHA_TEST, 0); + reg_start(R300_FG_ALPHA_FUNC, 0); e32(0x0); R300_STATECHANGE(r300, bld); @@ -266,78 +271,192 @@ static void r300EmitClearState(GLcontext * ctx) e32(0x0); e32(0x0); - R300_STATECHANGE(r300, vap_clip_cntl); - reg_start(R300_VAP_CLIP_CNTL, 0); - e32(R300_221C_CLEAR); + if (has_tcl) { + R300_STATECHANGE(r300, vap_clip_cntl); + reg_start(R300_VAP_CLIP_CNTL, 0); + e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); + } R300_STATECHANGE(r300, ps); - reg_start(R300_RE_POINTSIZE, 0); + reg_start(R300_GA_POINT_SIZE, 0); e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); - R300_STATECHANGE(r300, ri); - reg_start(R300_RS_INTERP_0, 8); - for (i = 0; i < 8; ++i) { - e32(R300_RS_INTERP_USED); + if (!is_r500) { + R300_STATECHANGE(r300, ri); + reg_start(R300_RS_IP_0, 7); + for (i = 0; i < 8; ++i) { + e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); + } + + R300_STATECHANGE(r300, rc); + /* The second constant is needed to get glxgears display anything .. */ + reg_start(R300_RS_COUNT, 1); + e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + e32(0x0); + + R300_STATECHANGE(r300, rr); + reg_start(R300_RS_INST_0, 0); + e32(R300_RS_INST_COL_CN_WRITE); + } else { + R300_STATECHANGE(r300, ri); + reg_start(R500_RS_IP_0, 7); + for (i = 0; i < 8; ++i) { + e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + } + + R300_STATECHANGE(r300, rc); + /* The second constant is needed to get glxgears display anything .. */ + reg_start(R300_RS_COUNT, 1); + e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + e32(0x0); + + R300_STATECHANGE(r300, rr); + reg_start(R500_RS_INST_0, 0); + e32(R500_RS_INST_COL_CN_WRITE); + } - R300_STATECHANGE(r300, rc); - /* The second constant is needed to get glxgears display anything .. */ - reg_start(R300_RS_CNTL_0, 1); - e32((1 << R300_RS_CNTL_CI_CNT_SHIFT) | R300_RS_CNTL_0_UNKNOWN_18); - e32(0x0); + if (!is_r500) { + R300_STATECHANGE(r300, fp); + reg_start(R300_US_CONFIG, 2); + e32(0x0); + e32(0x0); + e32(0x0); + reg_start(R300_US_CODE_ADDR_0, 3); + e32(0x0); + e32(0x0); + e32(0x0); + e32(R300_RGBA_OUT); - R300_STATECHANGE(r300, rr); - reg_start(R300_RS_ROUTE_0, 0); - e32(R300_RS_ROUTE_0_COLOR); + R300_STATECHANGE(r300, fpi[0]); + R300_STATECHANGE(r300, fpi[1]); + R300_STATECHANGE(r300, fpi[2]); + R300_STATECHANGE(r300, fpi[3]); - R300_STATECHANGE(r300, fp); - reg_start(R300_PFS_CNTL_0, 2); - e32(0x0); - e32(0x0); - e32(0x0); - reg_start(R300_PFS_NODE_0, 3); - e32(0x0); - e32(0x0); - e32(0x0); - e32(R300_PFS_NODE_OUTPUT_COLOR); + reg_start(R300_US_ALU_RGB_INST_0, 0); + e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); - R300_STATECHANGE(r300, fpi[0]); - R300_STATECHANGE(r300, fpi[1]); - R300_STATECHANGE(r300, fpi[2]); - R300_STATECHANGE(r300, fpi[3]); + reg_start(R300_US_ALU_RGB_ADDR_0, 0); + e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); - reg_start(R300_PFS_INSTR0_0, 0); - e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); + reg_start(R300_US_ALU_ALPHA_INST_0, 0); + e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); - reg_start(R300_PFS_INSTR1_0, 0); - e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); + reg_start(R300_US_ALU_ALPHA_ADDR_0, 0); + e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + } else { + R300_STATECHANGE(r300, fp); + reg_start(R500_US_CONFIG, 1); + e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); + e32(0x0); + reg_start(R500_US_CODE_ADDR, 2); + e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); + e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); + e32(R500_US_CODE_OFFSET_ADDR(0)); + + R300_STATECHANGE(r300, r500fp); + r500fp_start_fragment(0, 6); + + e32(R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP); + + e32(R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST); + + e32(R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST); + + e32(R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_B); + + e32(R500_ALPHA_OP_CMP | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_A); + + e32(R500_ALU_RGBA_OP_CMP | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0); + } - reg_start(R300_PFS_INSTR2_0, 0); - e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); + reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0); + e32(0x00000000); + if (has_tcl) { + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT)); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vap_cntl |= R500_TCL_STATE_OPTIMIZATION; + } else + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) + vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) + vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else + vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); - reg_start(R300_PFS_INSTR3_0, 0); - e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + R300_STATECHANGE(rmesa, vap_cntl); + reg_start(R300_VAP_CNTL, 0); + e32(vap_cntl); if (has_tcl) { R300_STATECHANGE(r300, pvs); - reg_start(R300_VAP_PVS_CNTL_1, 2); - e32((0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | - (0 << R300_PVS_CNTL_1_POS_END_SHIFT) | - (1 << R300_PVS_CNTL_1_PROGRAM_END_SHIFT)); - e32(0x0); - e32(1 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT); + reg_start(R300_VAP_PVS_CODE_CNTL_0, 2); + + e32((0 << R300_PVS_FIRST_INST_SHIFT) | + (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | + (1 << R300_PVS_LAST_INST_SHIFT)); + e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); + e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); R300_STATECHANGE(r300, vpi); vsf_start_fragment(0x0, 8); - e32(VP_OUT(ADD, OUT, 0, XYZW)); - e32(VP_IN(IN, 0)); - e32(VP_ZERO()); + + e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT)); + e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); + e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); e32(0x0); - e32(VP_OUT(ADD, OUT, 1, XYZW)); - e32(VP_IN(IN, 1)); - e32(VP_ZERO()); + e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT)); + e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); + e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); e32(0x0); } } @@ -414,19 +533,22 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) void r300Flush(GLcontext * ctx) { - r300ContextPtr r300 = R300_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); - if (r300->cmdbuf.count_used > r300->cmdbuf.count_reemit) - r300FlushCmdBuf(r300, __FUNCTION__); + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + + if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit) + r300FlushCmdBuf(rmesa, __FUNCTION__); } #ifdef USER_BUFFERS #include "r300_mem.h" -static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) +void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) { struct r300_dma_buffer *dmabuf; size = MAX2(size, RADEON_BUFFER_SIZE * 16); @@ -438,9 +560,12 @@ static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) rmesa->dma.flush(rmesa); } - if (rmesa->dma.current.buf) + if (rmesa->dma.current.buf) { +#ifdef USER_BUFFERS + r300_mem_use(rmesa, rmesa->dma.current.buf->id); +#endif r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); - + } if (rmesa->dma.nr_released_bufs > 4) r300FlushCmdBuf(rmesa, __FUNCTION__); diff --git a/r300/r300_ioctl.h b/r300/r300_ioctl.h index 7a19a2c..e1143fb 100644 --- a/r300/r300_ioctl.h +++ b/r300/r300_ioctl.h @@ -56,4 +56,5 @@ extern void r300AllocDmaRegion(r300ContextPtr rmesa, extern void r300InitIoctlFuncs(struct dd_function_table *functions); +extern void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size); #endif /* __R300_IOCTL_H__ */ diff --git a/r300/r300_program.h b/r300/r300_program.h deleted file mode 100644 index eddd783..0000000 --- a/r300/r300_program.h +++ /dev/null @@ -1,150 +0,0 @@ -/* -Copyright (C) 2004 Nicolai Haehnle. All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Nicolai Haehnle <prefect_@gmx.net> - */ - -#ifndef __R300_PROGRAM_H__ -#define __R300_PROGRAM_H__ - -#include "r300_reg.h" - -/** - * Vertex program helper macros - */ - -/* Produce out dword */ -#define VP_OUTCLASS_TMP R300_VPI_OUT_REG_CLASS_TEMPORARY -#define VP_OUTCLASS_OUT R300_VPI_OUT_REG_CLASS_RESULT - -#define VP_OUTMASK_X R300_VPI_OUT_WRITE_X -#define VP_OUTMASK_Y R300_VPI_OUT_WRITE_Y -#define VP_OUTMASK_Z R300_VPI_OUT_WRITE_Z -#define VP_OUTMASK_W R300_VPI_OUT_WRITE_W -#define VP_OUTMASK_XY (VP_OUTMASK_X|VP_OUTMASK_Y) -#define VP_OUTMASK_XZ (VP_OUTMASK_X|VP_OUTMASK_Z) -#define VP_OUTMASK_XW (VP_OUTMASK_X|VP_OUTMASK_W) -#define VP_OUTMASK_XYZ (VP_OUTMASK_XY|VP_OUTMASK_Z) -#define VP_OUTMASK_XYW (VP_OUTMASK_XY|VP_OUTMASK_W) -#define VP_OUTMASK_XZW (VP_OUTMASK_XZ|VP_OUTMASK_W) -#define VP_OUTMASK_XYZW (VP_OUTMASK_XYZ|VP_OUTMASK_W) -#define VP_OUTMASK_YZ (VP_OUTMASK_Y|VP_OUTMASK_Z) -#define VP_OUTMASK_YW (VP_OUTMASK_Y|VP_OUTMASK_W) -#define VP_OUTMASK_YZW (VP_OUTMASK_YZ|VP_OUTMASK_W) -#define VP_OUTMASK_ZW (VP_OUTMASK_Z|VP_OUTMASK_W) - -#define VP_OUT(instr,outclass,outidx,outmask) \ - (R300_VPI_OUT_OP_##instr | \ - ((outidx) << R300_VPI_OUT_REG_INDEX_SHIFT) | \ - VP_OUTCLASS_##outclass | \ - VP_OUTMASK_##outmask) - -/* Produce in dword */ -#define VP_INCLASS_TMP R300_VPI_IN_REG_CLASS_TEMPORARY -#define VP_INCLASS_IN R300_VPI_IN_REG_CLASS_ATTRIBUTE -#define VP_INCLASS_CONST R300_VPI_IN_REG_CLASS_PARAMETER - -#define VP_IN(class,idx) \ - (((idx) << R300_VPI_IN_REG_INDEX_SHIFT) | \ - VP_INCLASS_##class | \ - (R300_VPI_IN_SELECT_X << R300_VPI_IN_X_SHIFT) | \ - (R300_VPI_IN_SELECT_Y << R300_VPI_IN_Y_SHIFT) | \ - (R300_VPI_IN_SELECT_Z << R300_VPI_IN_Z_SHIFT) | \ - (R300_VPI_IN_SELECT_W << R300_VPI_IN_W_SHIFT)) -#define VP_ZERO() \ - ((R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_X_SHIFT) | \ - (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_Y_SHIFT) | \ - (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_Z_SHIFT) | \ - (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_W_SHIFT)) -#define VP_ONE() \ - ((R300_VPI_IN_SELECT_ONE << R300_VPI_IN_X_SHIFT) | \ - (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_Y_SHIFT) | \ - (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_Z_SHIFT) | \ - (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_W_SHIFT)) - -#define VP_NEG(in,comp) ((in) ^ (R300_VPI_IN_NEG_##comp)) -#define VP_NEGALL(in,comp) VP_NEG(VP_NEG(VP_NEG(VP_NEG((in),X),Y),Z),W) - -/** - * Fragment program helper macros - */ - -/* Produce unshifted source selectors */ -#define FP_TMP(idx) (idx) -#define FP_CONST(idx) ((idx) | (1 << 5)) - -/* Produce source/dest selector dword */ -#define FP_SELC_MASK_NO 0 -#define FP_SELC_MASK_X 1 -#define FP_SELC_MASK_Y 2 -#define FP_SELC_MASK_XY 3 -#define FP_SELC_MASK_Z 4 -#define FP_SELC_MASK_XZ 5 -#define FP_SELC_MASK_YZ 6 -#define FP_SELC_MASK_XYZ 7 - -#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_FPI1_DSTC_SHIFT) | \ - (FP_SELC_MASK_##regmask << 23) | \ - (FP_SELC_MASK_##outmask << 26) | \ - ((src0) << R300_FPI1_SRC0C_SHIFT) | \ - ((src1) << R300_FPI1_SRC1C_SHIFT) | \ - ((src2) << R300_FPI1_SRC2C_SHIFT)) - -#define FP_SELA_MASK_NO 0 -#define FP_SELA_MASK_W 1 - -#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_FPI3_DSTA_SHIFT) | \ - (FP_SELA_MASK_##regmask << 23) | \ - (FP_SELA_MASK_##outmask << 24) | \ - ((src0) << R300_FPI3_SRC0A_SHIFT) | \ - ((src1) << R300_FPI3_SRC1A_SHIFT) | \ - ((src2) << R300_FPI3_SRC2A_SHIFT)) - -/* Produce unshifted argument selectors */ -#define FP_ARGC(source) R300_FPI0_ARGC_##source -#define FP_ARGA(source) R300_FPI2_ARGA_##source -#define FP_ABS(arg) ((arg) | (1 << 6)) -#define FP_NEG(arg) ((arg) ^ (1 << 5)) - -/* Produce instruction dword */ -#define FP_INSTRC(opcode,arg0,arg1,arg2) \ - (R300_FPI0_OUTC_##opcode | \ - ((arg0) << R300_FPI0_ARG0C_SHIFT) | \ - ((arg1) << R300_FPI0_ARG1C_SHIFT) | \ - ((arg2) << R300_FPI0_ARG2C_SHIFT)) - -#define FP_INSTRA(opcode,arg0,arg1,arg2) \ - (R300_FPI2_OUTA_##opcode | \ - ((arg0) << R300_FPI2_ARG0A_SHIFT) | \ - ((arg1) << R300_FPI2_ARG1A_SHIFT) | \ - ((arg2) << R300_FPI2_ARG2A_SHIFT)) - -extern void debug_vp(GLcontext * ctx, struct gl_vertex_program *vp); - -#endif /* __R300_PROGRAM_H__ */ diff --git a/r300/r300_reg.h b/r300/r300_reg.h index e5501b6..778db96 100644 --- a/r300/r300_reg.h +++ b/r300/r300_reg.h @@ -67,9 +67,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* * Vertex Array Processing (VAP) Control - * Stolen from r200 code from Christoph Brill (It's a guess!) */ #define R300_VAP_CNTL 0x2080 +# define R300_PVS_NUM_SLOTS_SHIFT 0 +# define R300_PVS_NUM_CNTLRS_SHIFT 4 +# define R300_PVS_NUM_FPUS_SHIFT 8 +# define R300_VF_MAX_VTX_NUM_SHIFT 18 +# define R300_GL_CLIP_SPACE_DEF (0 << 22) +# define R300_DX_CLIP_SPACE_DEF (1 << 22) +# define R500_TCL_STATE_OPTIMIZATION (1 << 23) /* This register is written directly and also starts data section * in many 3d CP_PACKET3's @@ -106,16 +112,19 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* number of vertices */ # define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 -/* BEGIN: Wild guesses */ +#define R500_VAP_INDEX_OFFSET 0x208c + #define R300_VAP_OUTPUT_VTX_FMT_0 0x2090 # define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0) -# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT (1<<1) -# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) /* GUESS */ -# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) /* GUESS */ -# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) /* GUESS */ -# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) +# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) #define R300_VAP_OUTPUT_VTX_FMT_1 0x2094 + /* each of the following is 3 bits wide, specifies number + of components */ # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 @@ -124,30 +133,64 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 -/* END: Wild guesses */ +# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1 +# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2 +# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 #define R300_SE_VTE_CNTL 0x20b0 -# define R300_VPORT_X_SCALE_ENA 0x00000001 -# define R300_VPORT_X_OFFSET_ENA 0x00000002 -# define R300_VPORT_Y_SCALE_ENA 0x00000004 -# define R300_VPORT_Y_OFFSET_ENA 0x00000008 -# define R300_VPORT_Z_SCALE_ENA 0x00000010 -# define R300_VPORT_Z_OFFSET_ENA 0x00000020 -# define R300_VTX_XY_FMT 0x00000100 -# define R300_VTX_Z_FMT 0x00000200 -# define R300_VTX_W0_FMT 0x00000400 -# define R300_VTX_W0_NORMALIZE 0x00000800 -# define R300_VTX_ST_DENORMALIZED 0x00001000 +# define R300_VPORT_X_SCALE_ENA (1 << 0) +# define R300_VPORT_X_OFFSET_ENA (1 << 1) +# define R300_VPORT_Y_SCALE_ENA (1 << 2) +# define R300_VPORT_Y_OFFSET_ENA (1 << 3) +# define R300_VPORT_Z_SCALE_ENA (1 << 4) +# define R300_VPORT_Z_OFFSET_ENA (1 << 5) +# define R300_VTX_XY_FMT (1 << 8) +# define R300_VTX_Z_FMT (1 << 9) +# define R300_VTX_W0_FMT (1 << 10) +# define R300_SERIAL_PROC_ENA (1 << 11) /* BEGIN: Vertex data assembly - lots of uncertainties */ /* gap */ +/* Maximum Vertex Indx Clamp */ +#define R300_VAP_VF_MAX_VTX_INDX 0x2134 +/* Minimum Vertex Indx Clamp */ +#define R300_VAP_VF_MIN_VTX_INDX 0x2138 + +/** Vertex assembler/processor control status */ #define R300_VAP_CNTL_STATUS 0x2140 +/* No swap at all (default) */ # define R300_VC_NO_SWAP (0 << 0) +/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */ # define R300_VC_16BIT_SWAP (1 << 0) +/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */ # define R300_VC_32BIT_SWAP (2 << 0) +/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */ +# define R300_VC_HALF_DWORD_SWAP (3 << 0) +/* The TCL engine will not be used (as it is logically or even physically removed) */ # define R300_VAP_TCL_BYPASS (1 << 8) +/* Read only flag if TCL engine is busy. */ +# define R300_VAP_PVS_BUSY (1 << 11) +/* TODO: gap for MAX_MPS */ +/* Read only flag if the vertex store is busy. */ +# define R300_VAP_VS_BUSY (1 << 24) +/* Read only flag if the reciprocal engine is busy. */ +# define R300_VAP_RCP_BUSY (1 << 25) +/* Read only flag if the viewport transform engine is busy. */ +# define R300_VAP_VTE_BUSY (1 << 26) +/* Read only flag if the memory interface unit is busy. */ +# define R300_VAP_MUI_BUSY (1 << 27) +/* Read only flag if the vertex cache is busy. */ +# define R300_VAP_VC_BUSY (1 << 28) +/* Read only flag if the vertex fetcher is busy. */ +# define R300_VAP_VF_BUSY (1 << 29) +/* Read only flag if the register pipeline is busy. */ +# define R300_VAP_REGPIPE_BUSY (1 << 30) +/* Read only flag if the VAP engine is busy. */ +# define R300_VAP_VAP_BUSY (1 << 31) /* gap */ @@ -175,27 +218,31 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Always set COMPONENTS_4 in immediate mode. */ -#define R300_VAP_INPUT_ROUTE_0_0 0x2150 -# define R300_INPUT_ROUTE_COMPONENTS_1 (0 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_2 (1 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_3 (2 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_4 (3 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_RGBA (4 << 0) /* GUESS */ -# define R300_VAP_INPUT_ROUTE_IDX_SHIFT 8 -# define R300_VAP_INPUT_ROUTE_IDX_MASK (31 << 8) /* GUESS */ -# define R300_VAP_INPUT_ROUTE_END (1 << 13) -# define R300_INPUT_ROUTE_IMMEDIATE_MODE (0 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_FLOAT (1 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_UNSIGNED_BYTE (2 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_FLOAT_COLOR (3 << 14) /* GUESS */ -#define R300_VAP_INPUT_ROUTE_0_1 0x2154 -#define R300_VAP_INPUT_ROUTE_0_2 0x2158 -#define R300_VAP_INPUT_ROUTE_0_3 0x215C -#define R300_VAP_INPUT_ROUTE_0_4 0x2160 -#define R300_VAP_INPUT_ROUTE_0_5 0x2164 -#define R300_VAP_INPUT_ROUTE_0_6 0x2168 -#define R300_VAP_INPUT_ROUTE_0_7 0x216C - +#define R300_VAP_PROG_STREAM_CNTL_0 0x2150 +# define R300_DATA_TYPE_0_SHIFT 0 +# define R300_DATA_TYPE_FLOAT_1 0 +# define R300_DATA_TYPE_FLOAT_2 1 +# define R300_DATA_TYPE_FLOAT_3 2 +# define R300_DATA_TYPE_FLOAT_4 3 +# define R300_DATA_TYPE_BYTE 4 +# define R300_DATA_TYPE_D3DCOLOR 5 +# define R300_DATA_TYPE_SHORT_2 6 +# define R300_DATA_TYPE_SHORT_4 7 +# define R300_DATA_TYPE_VECTOR_3_TTT 8 +# define R300_DATA_TYPE_VECTOR_3_EET 9 +# define R300_SKIP_DWORDS_SHIFT 4 +# define R300_DST_VEC_LOC_SHIFT 8 +# define R300_LAST_VEC (1 << 13) +# define R300_SIGNED (1 << 14) +# define R300_NORMALIZE (1 << 15) +# define R300_DATA_TYPE_1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_1 0x2154 +#define R300_VAP_PROG_STREAM_CNTL_2 0x2158 +#define R300_VAP_PROG_STREAM_CNTL_3 0x215C +#define R300_VAP_PROG_STREAM_CNTL_4 0x2160 +#define R300_VAP_PROG_STREAM_CNTL_5 0x2164 +#define R300_VAP_PROG_STREAM_CNTL_6 0x2168 +#define R300_VAP_PROG_STREAM_CNTL_7 0x216C /* gap */ /* Notes: @@ -203,9 +250,26 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * if vertex program uses only position, fglrx will set normal, too * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. */ -#define R300_VAP_INPUT_CNTL_0 0x2180 -# define R300_INPUT_CNTL_0_COLOR 0x00000001 -#define R300_VAP_INPUT_CNTL_1 0x2184 +#define R300_VAP_VTX_STATE_CNTL 0x2180 +# define R300_COLOR_0_ASSEMBLY_SHIFT 0 +# define R300_SEL_COLOR 0 +# define R300_SEL_USER_COLOR_0 1 +# define R300_SEL_USER_COLOR_1 2 +# define R300_COLOR_1_ASSEMBLY_SHIFT 2 +# define R300_COLOR_2_ASSEMBLY_SHIFT 4 +# define R300_COLOR_3_ASSEMBLY_SHIFT 6 +# define R300_COLOR_4_ASSEMBLY_SHIFT 8 +# define R300_COLOR_5_ASSEMBLY_SHIFT 10 +# define R300_COLOR_6_ASSEMBLY_SHIFT 12 +# define R300_COLOR_7_ASSEMBLY_SHIFT 14 +# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16) + +/* + * Each bit in this field applies to the corresponding vector in the VSM + * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit + * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream. + */ +#define R300_VAP_VSM_VTX_ASSM 0x2184 # define R300_INPUT_CNTL_POS 0x00000001 # define R300_INPUT_CNTL_NORMAL 0x00000002 # define R300_INPUT_CNTL_COLOR 0x00000004 @@ -218,6 +282,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */ # define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ +/* Programmable Stream Control Signed Normalize Control */ +#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc +# define SGN_NORM_ZERO 0 +# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 +# define SGN_NORM_NO_ZERO 2 + /* gap */ /* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0 @@ -227,26 +297,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * mode, the swizzling pattern is e.g. used to set zw components in texture * coordinates with only tweo components. */ -#define R300_VAP_INPUT_ROUTE_1_0 0x21E0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0 +# define R300_SWIZZLE0_SHIFT 0 +# define R300_SWIZZLE_SELECT_X_SHIFT 0 +# define R300_SWIZZLE_SELECT_Y_SHIFT 3 +# define R300_SWIZZLE_SELECT_Z_SHIFT 6 +# define R300_SWIZZLE_SELECT_W_SHIFT 9 + +# define R300_SWIZZLE_SELECT_X 0 +# define R300_SWIZZLE_SELECT_Y 1 +# define R300_SWIZZLE_SELECT_Z 2 +# define R300_SWIZZLE_SELECT_W 3 +# define R300_SWIZZLE_SELECT_FP_ZERO 4 +# define R300_SWIZZLE_SELECT_FP_ONE 5 +/* alternate forms for r300_emit.c */ # define R300_INPUT_ROUTE_SELECT_X 0 # define R300_INPUT_ROUTE_SELECT_Y 1 # define R300_INPUT_ROUTE_SELECT_Z 2 # define R300_INPUT_ROUTE_SELECT_W 3 # define R300_INPUT_ROUTE_SELECT_ZERO 4 # define R300_INPUT_ROUTE_SELECT_ONE 5 -# define R300_INPUT_ROUTE_SELECT_MASK 7 -# define R300_INPUT_ROUTE_X_SHIFT 0 -# define R300_INPUT_ROUTE_Y_SHIFT 3 -# define R300_INPUT_ROUTE_Z_SHIFT 6 -# define R300_INPUT_ROUTE_W_SHIFT 9 -# define R300_INPUT_ROUTE_ENABLE (15 << 12) -#define R300_VAP_INPUT_ROUTE_1_1 0x21E4 -#define R300_VAP_INPUT_ROUTE_1_2 0x21E8 -#define R300_VAP_INPUT_ROUTE_1_3 0x21EC -#define R300_VAP_INPUT_ROUTE_1_4 0x21F0 -#define R300_VAP_INPUT_ROUTE_1_5 0x21F4 -#define R300_VAP_INPUT_ROUTE_1_6 0x21F8 -#define R300_VAP_INPUT_ROUTE_1_7 0x21FC + +# define R300_WRITE_ENA_SHIFT 12 +# define R300_WRITE_ENA_X 1 +# define R300_WRITE_ENA_Y 2 +# define R300_WRITE_ENA_Z 4 +# define R300_WRITE_ENA_W 8 +# define R300_SWIZZLE1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec +#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc /* END: Vertex data assembly */ @@ -278,23 +362,35 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Multiple vertex programs and parameter sets can be loaded at once, * which could explain the size discrepancy. */ -#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 -# define R300_PVS_UPLOAD_PROGRAM 0x00000000 -# define R300_PVS_UPLOAD_PARAMETERS 0x00000200 -# define R300_PVS_UPLOAD_CLIP_PLANE0 0x00000400 -# define R300_PVS_UPLOAD_CLIP_PLANE1 0x00000401 -# define R300_PVS_UPLOAD_CLIP_PLANE2 0x00000402 -# define R300_PVS_UPLOAD_CLIP_PLANE3 0x00000403 -# define R300_PVS_UPLOAD_CLIP_PLANE4 0x00000404 -# define R300_PVS_UPLOAD_CLIP_PLANE5 0x00000405 -# define R300_PVS_UPLOAD_POINTSIZE 0x00000406 - -# define R500_PVS_UPLOAD_CLIP_PLANE0 0x00000600 -# define R500_PVS_UPLOAD_CLIP_PLANE1 0x00000601 -# define R500_PVS_UPLOAD_CLIP_PLANE2 0x00000602 -# define R500_PVS_UPLOAD_CLIP_PLANE3 0x00000603 -# define R500_PVS_UPLOAD_CLIP_PLANE4 0x00000604 -# define R500_PVS_UPLOAD_CLIP_PLANE5 0x00000605 +#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200 +# define R300_PVS_CODE_START 0 +# define R300_MAX_PVS_CODE_LINES 256 +# define R500_MAX_PVS_CODE_LINES 1024 +# define R300_PVS_CONST_START 512 +# define R500_PVS_CONST_START 1024 +# define R300_MAX_PVS_CONST_VECS 256 +# define R500_MAX_PVS_CONST_VECS 1024 +# define R300_PVS_UCP_START 1024 +# define R500_PVS_UCP_START 1536 +# define R300_POINT_VPORT_SCALE_OFFSET 1030 +# define R500_POINT_VPORT_SCALE_OFFSET 1542 +# define R300_POINT_GEN_TEX_OFFSET 1031 +# define R500_POINT_GEN_TEX_OFFSET 1543 + +/* + * These are obsolete defines form r300_context.h, but they might give some + * clues when investigating the addresses further... + */ +#if 0 +#define VSF_DEST_PROGRAM 0x0 +#define VSF_DEST_MATRIX0 0x200 +#define VSF_DEST_MATRIX1 0x204 +#define VSF_DEST_MATRIX2 0x208 +#define VSF_DEST_VECTOR0 0x20c +#define VSF_DEST_VECTOR1 0x20d +#define VSF_DEST_UNKNOWN1 0x400 +#define VSF_DEST_UNKNOWN2 0x406 +#endif /* gap */ @@ -314,9 +410,33 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view */ #define R300_VAP_CLIP_CNTL 0x221C -# define R300_221C_NORMAL 0x00000000 -# define R300_221C_CLEAR 0x0001C000 -#define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_1 (1 << 1) +# define R300_VAP_UCP_ENABLE_2 (1 << 2) +# define R300_VAP_UCP_ENABLE_3 (1 << 3) +# define R300_VAP_UCP_ENABLE_4 (1 << 4) +# define R300_VAP_UCP_ENABLE_5 (1 << 5) +# define R300_PS_UCP_MODE_DIST_COP (0 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14) +# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14) +# define R300_CLIP_DISABLE (1 << 16) +# define R300_UCP_CULL_ONLY_ENABLE (1 << 17) +# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18) +# define R500_COLOR2_IS_TEXTURE (1 << 20) +# define R500_COLOR3_IS_TEXTURE (1 << 21) + +/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first + * plane is per-pixel and the second plane is per-vertex. + * + * This was determined by experimentation alone but I believe it is correct. + * + * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest. + */ +#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220 +#define R300_VAP_GB_VERT_DISC_ADJ 0x2224 +#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 +#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c /* gap */ @@ -325,10 +445,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and * avoids bugs caused by still running shaders reading bad data from memory. */ -#define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */ +#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284 -/* Absolutely no clue what this register is about. */ -#define R300_VAP_UNKNOWN_2288 0x2288 +/* This register is used to define the number of core clocks to wait for a + * vertex to be received by the VAP input controller (while the primitive + * path is backed up) before forcing any accumulated vertices to be submitted + * to the vertex processing path. + */ +#define VAP_PVS_VTX_TIMEOUT_REG 0x2288 # define R300_2288_R300 0x00750000 /* -- nh */ # define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ @@ -341,25 +465,27 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * The meaning of the two UNKNOWN fields is obviously not known. However, * experiments so far have shown that both *must* point to an instruction * inside the vertex program, otherwise the GPU locks up. + * * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and - * CNTL_1_UNKNOWN points to instruction where last write to position takes - * place. + * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to + * position takes place. + * * Most likely this is used to ignore rest of the program in cases * where group of verts arent visible. For some reason this "section" * is sometimes accepted other instruction that have no relationship with - *position calculations. + * position calculations. */ -#define R300_VAP_PVS_CNTL_1 0x22D0 -# define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0 -# define R300_PVS_CNTL_1_POS_END_SHIFT 10 -# define R300_PVS_CNTL_1_PROGRAM_END_SHIFT 20 +#define R300_VAP_PVS_CODE_CNTL_0 0x22D0 +# define R300_PVS_FIRST_INST_SHIFT 0 +# define R300_PVS_XYZW_VALID_INST_SHIFT 10 +# define R300_PVS_LAST_INST_SHIFT 20 /* Addresses are relative the the vertex program parameters area. */ -#define R300_VAP_PVS_CNTL_2 0x22D4 -# define R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 -# define R300_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 -#define R300_VAP_PVS_CNTL_3 0x22D8 -# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT 10 -# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0 +#define R300_VAP_PVS_CONST_CNTL 0x22D4 +# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 +# define R300_PVS_MAX_CONST_ADDR_SHIFT 16 +#define R300_VAP_PVS_CODE_CNTL_1 0x22D8 +# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 +#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices @@ -407,17 +533,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * (or something closely related to that). * This bit is rather fatal at the time being due to lackings at pixel * shader side + * Specifies top of Raster pipe specific enable controls. */ #define R300_GB_ENABLE 0x4008 -# define R300_GB_POINT_STUFF_ENABLE (1<<0) -# define R300_GB_LINE_STUFF_ENABLE (1<<1) -# define R300_GB_TRIANGLE_STUFF_ENABLE (1<<2) -# define R300_GB_STENCIL_AUTO_ENABLE (1<<4) -# define R300_GB_UNK31 (1<<31) +# define R300_GB_POINT_STUFF_DISABLE (0 << 0) +# define R300_GB_POINT_STUFF_ENABLE (1 << 0) /* Specifies if points will have stuffed texture coordinates. */ +# define R300_GB_LINE_STUFF_DISABLE (0 << 1) +# define R300_GB_LINE_STUFF_ENABLE (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */ +# define R300_GB_TRIANGLE_STUFF_DISABLE (0 << 2) +# define R300_GB_TRIANGLE_STUFF_ENABLE (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */ +# define R300_GB_STENCIL_AUTO_DISABLE (0 << 4) +# define R300_GB_STENCIL_AUTO_ENABLE (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */ +# define R300_GB_STENCIL_AUTO_FORCE (2 << 4) /* Force 0 into dzy low bit. */ + /* each of the following is 2 bits wide */ -#define R300_GB_TEX_REPLICATE 0 -#define R300_GB_TEX_ST 1 -#define R300_GB_TEX_STR 2 +#define R300_GB_TEX_REPLICATE 0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */ +#define R300_GB_TEX_ST 1 /* Stuff with source texture coordinates (S,T). */ +#define R300_GB_TEX_STR 2 /* Stuff with source texture coordinates (S,T,R). */ # define R300_GB_TEX0_SOURCE_SHIFT 16 # define R300_GB_TEX1_SOURCE_SHIFT 18 # define R300_GB_TEX2_SOURCE_SHIFT 20 @@ -428,7 +560,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_GB_TEX7_SOURCE_SHIFT 30 /* MSPOS - positions for multisample antialiasing (?) */ -#define R300_GB_MSPOS0 0x4010 +#define R300_GB_MSPOS0 0x4010 /* shifts - each of the fields is 4 bits */ # define R300_GB_MSPOS0__MS_X0_SHIFT 0 # define R300_GB_MSPOS0__MS_Y0_SHIFT 4 @@ -439,7 +571,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_GB_MSPOS0__MSBD0_Y 24 # define R300_GB_MSPOS0__MSBD0_X 28 -#define R300_GB_MSPOS1 0x4014 +#define R300_GB_MSPOS1 0x4014 # define R300_GB_MSPOS1__MS_X3_SHIFT 0 # define R300_GB_MSPOS1__MS_Y3_SHIFT 4 # define R300_GB_MSPOS1__MS_X4_SHIFT 8 @@ -448,31 +580,47 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_GB_MSPOS1__MS_Y5_SHIFT 20 # define R300_GB_MSPOS1__MSBD1 24 - -#define R300_GB_TILE_CONFIG 0x4018 -# define R300_GB_TILE_ENABLE (1<<0) -# define R300_GB_TILE_PIPE_COUNT_RV300 0 -# define R300_GB_TILE_PIPE_COUNT_R300 (3<<1) -# define R300_GB_TILE_PIPE_COUNT_R420 (7<<1) -# define R300_GB_TILE_PIPE_COUNT_RV410 (3<<1) -# define R300_GB_TILE_SIZE_8 0 -# define R300_GB_TILE_SIZE_16 (1<<4) -# define R300_GB_TILE_SIZE_32 (2<<4) -# define R300_GB_SUPER_SIZE_1 (0<<6) -# define R300_GB_SUPER_SIZE_2 (1<<6) -# define R300_GB_SUPER_SIZE_4 (2<<6) -# define R300_GB_SUPER_SIZE_8 (3<<6) -# define R300_GB_SUPER_SIZE_16 (4<<6) -# define R300_GB_SUPER_SIZE_32 (5<<6) -# define R300_GB_SUPER_SIZE_64 (6<<6) -# define R300_GB_SUPER_SIZE_128 (7<<6) +/* Specifies the graphics pipeline configuration for rasterization. */ +#define R300_GB_TILE_CONFIG 0x4018 +# define R300_GB_TILE_DISABLE (0 << 0) +# define R300_GB_TILE_ENABLE (1 << 0) +# define R300_GB_TILE_PIPE_COUNT_RV300 (0 << 1) /* RV350 (1 pipe, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R300 (3 << 1) /* R300 (2 pipes, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R420_3P (6 << 1) /* R420-3P (3 pipes, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R420 (7 << 1) /* R420 (4 pipes, 1 ctx) */ +# define R300_GB_TILE_SIZE_8 (0 << 4) +# define R300_GB_TILE_SIZE_16 (1 << 4) +# define R300_GB_TILE_SIZE_32 (2 << 4) +# define R300_GB_SUPER_SIZE_1 (0 << 6) +# define R300_GB_SUPER_SIZE_2 (1 << 6) +# define R300_GB_SUPER_SIZE_4 (2 << 6) +# define R300_GB_SUPER_SIZE_8 (3 << 6) +# define R300_GB_SUPER_SIZE_16 (4 << 6) +# define R300_GB_SUPER_SIZE_32 (5 << 6) +# define R300_GB_SUPER_SIZE_64 (6 << 6) +# define R300_GB_SUPER_SIZE_128 (7 << 6) # define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */ # define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */ -# define R300_GB_SUPER_TILE_A 0 -# define R300_GB_SUPER_TILE_B (1<<15) -# define R300_GB_SUBPIXEL_1_12 0 -# define R300_GB_SUBPIXEL_1_16 (1<<16) - +# define R300_GB_SUPER_TILE_A (0 << 15) +# define R300_GB_SUPER_TILE_B (1 << 15) +# define R300_GB_SUBPIXEL_1_12 (0 << 16) +# define R300_GB_SUBPIXEL_1_16 (1 << 16) +# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) +# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) +# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) +# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) +# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) +# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21) +# define GB_TILE_CONFIG_SUBPRECISION (0 << 22) +# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) +# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) +# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) +# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) + +/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */ #define R300_GB_FIFO_SIZE 0x4024 /* each of the following is 2 bits wide */ #define R300_GB_FIFO_SIZE_32 0 @@ -496,30 +644,102 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ # define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 -#define R300_GB_SELECT 0x401C -# define R300_GB_FOG_SELECT_C0A 0 -# define R300_GB_FOG_SELECT_C1A 1 -# define R300_GB_FOG_SELECT_C2A 2 -# define R300_GB_FOG_SELECT_C3A 3 -# define R300_GB_FOG_SELECT_1_1_W 4 -# define R300_GB_FOG_SELECT_Z 5 -# define R300_GB_DEPTH_SELECT_Z 0 -# define R300_GB_DEPTH_SELECT_1_1_W (1<<3) -# define R300_GB_W_SELECT_1_W 0 -# define R300_GB_W_SELECT_1 (1<<4) - -#define R300_GB_AA_CONFIG 0x4020 -# define R300_AA_DISABLE 0x00 -# define R300_AA_ENABLE 0x01 -# define R300_AA_SUBSAMPLES_2 0 -# define R300_AA_SUBSAMPLES_3 (1<<1) -# define R300_AA_SUBSAMPLES_4 (2<<1) -# define R300_AA_SUBSAMPLES_6 (3<<1) +#define GB_Z_PEQ_CONFIG 0x4028 +# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) +# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) + +/* Specifies various polygon specific selects (fog, depth, perspective). */ +#define R300_GB_SELECT 0x401c +# define R300_GB_FOG_SELECT_C0A (0 << 0) +# define R300_GB_FOG_SELECT_C1A (1 << 0) +# define R300_GB_FOG_SELECT_C2A (2 << 0) +# define R300_GB_FOG_SELECT_C3A (3 << 0) +# define R300_GB_FOG_SELECT_1_1_W (4 << 0) +# define R300_GB_FOG_SELECT_Z (5 << 0) +# define R300_GB_DEPTH_SELECT_Z (0 << 3 +# define R300_GB_DEPTH_SELECT_1_1_W (1 << 3) +# define R300_GB_W_SELECT_1_W (0 << 4) +# define R300_GB_W_SELECT_1 (1 << 4) +# define R300_GB_FOG_STUFF_DISABLE (0 << 5) +# define R300_GB_FOG_STUFF_ENABLE (1 << 5) +# define R300_GB_FOG_STUFF_TEX_SHIFT 6 +# define R300_GB_FOG_STUFF_TEX_MASK 0x000003c0 +# define R300_GB_FOG_STUFF_COMP_SHIFT 10 +# define R300_GB_FOG_STUFF_COMP_MASK 0x00000c00 + +/* Specifies the graphics pipeline configuration for antialiasing. */ +#define GB_AA_CONFIG 0x4020 +# define GB_AA_CONFIG_AA_DISABLE (0 << 0) +# define GB_AA_CONFIG_AA_ENABLE (1 << 0) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) + +/* Selects which of 4 pipes are active. */ +#define GB_PIPE_SELECT 0x402c +# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 +# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 +# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 +# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 +# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 +# define GB_PIPE_SELECT_MAX_PIPE 12 +# define GB_PIPE_SELECT_BAD_PIPES 14 +# define GB_PIPE_SELECT_CONFIG_PIPES 18 + + +/* Specifies the sizes of the various FIFO`s in the sc/rs. */ +#define GB_FIFO_SIZE1 0x4070 +/* High water mark for SC input fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 +# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f +/* High water mark for SC input fifo (B) */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 +# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 +/* High water mark for RS colors' fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 +# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 +/* High water mark for RS textures' fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 +# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 + +/* This table specifies the source location and format for up to 16 texture + * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) + */ +#define R500_RS_IP_0 0x4074 +#define R500_RS_IP_1 0x4078 +#define R500_RS_IP_2 0x407C +#define R500_RS_IP_3 0x4080 +#define R500_RS_IP_4 0x4084 +#define R500_RS_IP_5 0x4088 +#define R500_RS_IP_6 0x408C +#define R500_RS_IP_7 0x4090 +#define R500_RS_IP_8 0x4094 +#define R500_RS_IP_9 0x4098 +#define R500_RS_IP_10 0x409C +#define R500_RS_IP_11 0x40A0 +#define R500_RS_IP_12 0x40A4 +#define R500_RS_IP_13 0x40A8 +#define R500_RS_IP_14 0x40AC +#define R500_RS_IP_15 0x40B0 +#define R500_RS_IP_PTR_K0 62 +#define R500_RS_IP_PTR_K1 63 +#define R500_RS_IP_TEX_PTR_S_SHIFT 0 +#define R500_RS_IP_TEX_PTR_T_SHIFT 6 +#define R500_RS_IP_TEX_PTR_R_SHIFT 12 +#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 +#define R500_RS_IP_COL_PTR_SHIFT 24 +#define R500_RS_IP_COL_FMT_SHIFT 27 +# define R500_RS_COL_PTR(x) (x << 24) +# define R500_RS_COL_FMT(x) (x << 27) +/* gap */ +#define R500_RS_IP_OFFSET_DIS (0 << 31) +#define R500_RS_IP_OFFSET_EN (1 << 31) /* gap */ /* Zero to flush caches. */ -#define R300_TX_CNTL 0x4100 +#define R300_TX_INVALTAGS 0x4100 #define R300_TX_FLUSH 0x0 /* The upper enable bits are guessed, based on fglrx reported limits. */ @@ -541,53 +761,335 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_ENABLE_14 (1 << 14) # define R300_TX_ENABLE_15 (1 << 15) -/* The pointsize is given in multiples of 6. The pointsize can be - * enormous: Clear() renders a single point that fills the entire - * framebuffer. +#define R500_TX_FILTER_4 0x4110 +# define R500_TX_WEIGHT_1_SHIFT (0) +# define R500_TX_WEIGHT_0_SHIFT (11) +# define R500_TX_WEIGHT_PAIR (1<<22) +# define R500_TX_PHASE_SHIFT (23) +# define R500_TX_DIRECTION_HORIZONTAL (0<<27) +# define R500_TX_DIRECTION_VERITCAL (1<<27) + +/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ +#define R300_GA_POINT_S0 0x4200 + +/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ +#define R300_GA_POINT_T0 0x4204 + +/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ +#define R300_GA_POINT_S1 0x4208 + +/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ +#define R300_GA_POINT_T1 0x420c + +/* Specifies amount to shift integer position of vertex (screen space) before + * converting to float for triangle stipple. */ -#define R300_RE_POINTSIZE 0x421C -# define R300_POINTSIZE_Y_SHIFT 0 -# define R300_POINTSIZE_Y_MASK (0xFFFF << 0) /* GUESS */ -# define R300_POINTSIZE_X_SHIFT 16 -# define R300_POINTSIZE_X_MASK (0xFFFF << 16) /* GUESS */ +#define R300_GA_TRIANGLE_STIPPLE 0x4214 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16 +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000 + +/* The pointsize is given in multiples of 6. The pointsize can be enormous: + * Clear() renders a single point that fills the entire framebuffer. + * 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in + * 8b precision). + */ +#define R300_GA_POINT_SIZE 0x421C +# define R300_POINTSIZE_Y_SHIFT 0 +# define R300_POINTSIZE_Y_MASK 0x0000ffff +# define R300_POINTSIZE_X_SHIFT 16 +# define R300_POINTSIZE_X_MASK 0xffff0000 # define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) -/* The line width is given in multiples of 6. +/* Blue fill color */ +#define R500_GA_FILL_R 0x4220 + +/* Blue fill color */ +#define R500_GA_FILL_G 0x4224 + +/* Blue fill color */ +#define R500_GA_FILL_B 0x4228 + +/* Alpha fill color */ +#define R500_GA_FILL_A 0x422c + + +/* Specifies maximum and minimum point & sprite sizes for per vertex size + * specification. The lower part (15:0) is MIN and (31:16) is max. + */ +#define R300_GA_POINT_MINMAX 0x4230 +# define R300_GA_POINT_MINMAX_MIN_SHIFT 0 +# define R300_GA_POINT_MINMAX_MIN_MASK (0xFFFF << 0) +# define R300_GA_POINT_MINMAX_MAX_SHIFT 16 +# define R300_GA_POINT_MINMAX_MAX_MASK (0xFFFF << 16) + +/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b + * subprecision); (16.0) fixed format. + * + * The line width is given in multiples of 6. * In default mode lines are classified as vertical lines. * HO: horizontal * VE: vertical or horizontal * HO & VE: no classification */ -#define R300_RE_LINE_CNT 0x4234 -# define R300_LINESIZE_SHIFT 0 -# define R300_LINESIZE_MASK (0xFFFF << 0) /* GUESS */ -# define R300_LINESIZE_MAX (R300_LINESIZE_MASK / 6) +#define R300_GA_LINE_CNTL 0x4234 +# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0 +# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff +# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */ +# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */ +# define R500_GA_LINE_CNTL_SORT_NO (0 << 18) +# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18) +/** TODO: looks wrong */ +# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6) +/** TODO: looks wrong */ # define R300_LINE_CNT_HO (1 << 16) +/** TODO: looks wrong */ # define R300_LINE_CNT_VE (1 << 17) -/* Some sort of scale or clamp value for texcoordless textures. */ -#define R300_RE_UNK4238 0x4238 - -/* Something shade related */ -#define R300_RE_SHADE 0x4274 - -#define R300_RE_SHADE_MODEL 0x4278 -# define R300_RE_SHADE_MODEL_SMOOTH 0x3aaaa -# define R300_RE_SHADE_MODEL_FLAT 0x39595 - -/* Dangerous */ -#define R300_RE_POLYGON_MODE 0x4288 -# define R300_PM_ENABLED (1 << 0) -# define R300_PM_FRONT_POINT (0 << 0) -# define R300_PM_BACK_POINT (0 << 0) -# define R300_PM_FRONT_LINE (1 << 4) -# define R300_PM_FRONT_FILL (1 << 5) -# define R300_PM_BACK_LINE (1 << 7) -# define R300_PM_BACK_FILL (1 << 8) - -/* Fog parameters */ -#define R300_RE_FOG_SCALE 0x4294 -#define R300_RE_FOG_START 0x4298 +/* Line Stipple configuration information. */ +#define R300_GA_LINE_STIPPLE_CONFIG 0x4238 +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2 +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc + +/* Used to load US instructions and constants */ +#define R500_GA_US_VECTOR_INDEX 0x4250 +# define R500_GA_US_VECTOR_INDEX_SHIFT 0 +# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff +# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16) +# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16) +# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17) +# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17) + +/* Data register for loading US instructions and constants */ +#define R500_GA_US_VECTOR_DATA 0x4254 + +/* Specifies color properties and mappings of textures. */ +#define R500_GA_COLOR_CONTROL_PS3 0x4258 +# define R500_TEX0_SHADING_PS3_SOLID (0 << 0) +# define R500_TEX0_SHADING_PS3_FLAT (1 << 0) +# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0) +# define R500_TEX1_SHADING_PS3_SOLID (0 << 2) +# define R500_TEX1_SHADING_PS3_FLAT (1 << 2) +# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2) +# define R500_TEX2_SHADING_PS3_SOLID (0 << 4) +# define R500_TEX2_SHADING_PS3_FLAT (1 << 4) +# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4) +# define R500_TEX3_SHADING_PS3_SOLID (0 << 6) +# define R500_TEX3_SHADING_PS3_FLAT (1 << 6) +# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6) +# define R500_TEX4_SHADING_PS3_SOLID (0 << 8) +# define R500_TEX4_SHADING_PS3_FLAT (1 << 8) +# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8) +# define R500_TEX5_SHADING_PS3_SOLID (0 << 10) +# define R500_TEX5_SHADING_PS3_FLAT (1 << 10) +# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10) +# define R500_TEX6_SHADING_PS3_SOLID (0 << 12) +# define R500_TEX6_SHADING_PS3_FLAT (1 << 12) +# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12) +# define R500_TEX7_SHADING_PS3_SOLID (0 << 14) +# define R500_TEX7_SHADING_PS3_FLAT (1 << 14) +# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14) +# define R500_TEX8_SHADING_PS3_SOLID (0 << 16) +# define R500_TEX8_SHADING_PS3_FLAT (1 << 16) +# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16) +# define R500_TEX9_SHADING_PS3_SOLID (0 << 18) +# define R500_TEX9_SHADING_PS3_FLAT (1 << 18) +# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18) +# define R500_TEX10_SHADING_PS3_SOLID (0 << 20) +# define R500_TEX10_SHADING_PS3_FLAT (1 << 20) +# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20) +# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22) +# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26) + +/* Returns idle status of various G3D block, captured when GA_IDLE written or + * when hard or soft reset asserted. + */ +#define R500_GA_IDLE 0x425c +# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0) +# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1) +# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2) +# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3) +# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4) +# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5) +# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6) +# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7) +# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8) +# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9) +# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10) +# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11) +# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12) +# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13) +# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14) +# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15) +# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16) +# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17) +# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18) +# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19) +# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20) +# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21) +# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22) +# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23) +# define R500_GA_IDLE_SU_IDLE (0 << 24) +# define R500_GA_IDLE_GA_IDLE (0 << 25) +# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26) + +/* Current value of stipple accumulator. */ +#define R300_GA_LINE_STIPPLE_VALUE 0x4260 + +/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */ +#define R300_GA_LINE_S0 0x4264 +/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */ +#define R300_GA_LINE_S1 0x4268 + +/* GA Input fifo high water marks */ +#define R500_GA_FIFO_CNTL 0x4270 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3 +# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0 +# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6 + +/* GA enhance/tweaks */ +#define R300_GA_ENHANCE 0x4274 +# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0) +# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */ +# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1) +# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */ +# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */ +# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */ +# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3) +# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */ + +#define R300_GA_COLOR_CONTROL 0x4278 +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0) +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT (1 << 0) +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD (2 << 0) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID (0 << 2) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT (1 << 2) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD (2 << 2) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID (0 << 4) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT (1 << 4) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD (2 << 4) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID (0 << 6) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT (1 << 6) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD (2 << 6) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID (0 << 8) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT (1 << 8) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD (2 << 8) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID (0 << 10) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT (1 << 10) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD (2 << 10) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID (0 << 12) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT (1 << 12) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD (2 << 12) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID (0 << 14) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT (1 << 14) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD (2 << 14) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST (0 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD (2 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST (3 << 16) + +/** TODO: might be candidate for removal */ +# define R300_RE_SHADE_MODEL_SMOOTH ( \ + R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) +/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */ +# define R300_RE_SHADE_MODEL_FLAT ( \ + R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + +/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */ +#define R300_GA_SOLID_RG 0x427c +# define GA_SOLID_RG_COLOR_GREEN_SHIFT 0 +# define GA_SOLID_RG_COLOR_GREEN_MASK 0x0000ffff +# define GA_SOLID_RG_COLOR_RED_SHIFT 16 +# define GA_SOLID_RG_COLOR_RED_MASK 0xffff0000 +/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */ +#define R300_GA_SOLID_BA 0x4280 +# define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0 +# define GA_SOLID_BA_COLOR_ALPHA_MASK 0x0000ffff +# define GA_SOLID_BA_COLOR_BLUE_SHIFT 16 +# define GA_SOLID_BA_COLOR_BLUE_MASK 0xffff0000 + +/* Polygon Mode + * Dangerous + */ +#define R300_GA_POLY_MODE 0x4288 +# define R300_GA_POLY_MODE_DISABLE (0 << 0) +# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */ +/* reserved */ +# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4) +/* reserved */ +# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7) +/* reserved */ + +/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */ +#define R300_GA_ROUND_MODE 0x428c +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0) +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0) +# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2) +# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2) +# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4) +# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5) +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6 +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0 + +/* Specifies x & y offsets for vertex data after conversion to FP. + * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b + * subprecision). + */ +#define R300_GA_OFFSET 0x4290 +# define R300_GA_OFFSET_X_OFFSET_SHIFT 0 +# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff +# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16 +# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000 + +/* Specifies the scale to apply to fog. */ +#define R300_GA_FOG_SCALE 0x4294 +/* Specifies the offset to apply to fog. */ +#define R300_GA_FOG_OFFSET 0x4298 +/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */ +#define R300_GA_SOFT_RESET 0x429c /* Not sure why there are duplicate of factor and constant values. * My best guess so far is that there are seperate zbiases for test and write. @@ -595,11 +1097,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Some of the tests indicate that fgl has a fallback implementation of zbias * via pixel shaders. */ -#define R300_RE_ZBIAS_CNTL 0x42A0 /* GUESS */ -#define R300_RE_ZBIAS_T_FACTOR 0x42A4 -#define R300_RE_ZBIAS_T_CONSTANT 0x42A8 -#define R300_RE_ZBIAS_W_FACTOR 0x42AC -#define R300_RE_ZBIAS_W_CONSTANT 0x42B0 +#define R300_SU_TEX_WRAP 0x42A0 +#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4 +#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8 +#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC +#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0 /* This register needs to be set to (1<<1) for RV350 to correctly * perform depth test (see --vb-triangles in r300_demo) @@ -610,31 +1112,44 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * One to enable depth test and one for depth write. * Yet this doesnt explain why depth writes work ... */ -#define R300_RE_OCCLUSION_CNTL 0x42B4 -# define R300_OCCLUSION_ON (1<<1) +#define R300_SU_POLY_OFFSET_ENABLE 0x42B4 +# define R300_FRONT_ENABLE (1 << 0) +# define R300_BACK_ENABLE (1 << 1) +# define R300_PARA_ENABLE (1 << 2) -#define R300_RE_CULL_CNTL 0x42B8 +#define R300_SU_CULL_MODE 0x42B8 # define R300_CULL_FRONT (1 << 0) # define R300_CULL_BACK (1 << 1) # define R300_FRONT_FACE_CCW (0 << 2) # define R300_FRONT_FACE_CW (1 << 2) +/* SU Depth Scale value */ +#define R300_SU_DEPTH_SCALE 0x42c0 +/* SU Depth Offset value */ +#define R300_SU_DEPTH_OFFSET 0x42c4 + /* BEGIN: Rasterization / Interpolators - many guesses */ -/* 0_UNKNOWN_18 has always been set except for clear operations. +/* * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends * on the vertex program, *not* the fragment program) */ -#define R300_RS_CNTL_0 0x4300 -# define R300_RS_CNTL_TC_CNT_SHIFT 2 -# define R300_RS_CNTL_TC_CNT_MASK (7 << 2) - /* number of color interpolators used */ -# define R300_RS_CNTL_CI_CNT_SHIFT 7 -# define R300_RS_CNTL_0_UNKNOWN_18 (1 << 18) - /* Guess: RS_CNTL_1 holds the index of the highest used RS_ROUTE_n - register. */ -#define R300_RS_CNTL_1 0x4304 +#define R300_RS_COUNT 0x4300 +# define R300_IT_COUNT_SHIFT 0 +# define R300_IT_COUNT_MASK 0x0000007f +# define R300_IC_COUNT_SHIFT 7 +# define R300_IC_COUNT_MASK 0x00000780 +# define R300_W_ADDR_SHIFT 12 +# define R300_W_ADDR_MASK 0x0003f000 +# define R300_HIRES_DIS (0 << 18) +# define R300_HIRES_EN (1 << 18) + +#define R300_RS_INST_COUNT 0x4304 +# define R300_RS_INST_COUNT_SHIFT 0 +# define R300_RS_INST_COUNT_MASK 0x0000000f +# define R300_RS_TX_OFFSET_SHIFT 5 +# define R300_RS_TX_OFFSET_MASK 0x000000e0 /* gap */ @@ -651,63 +1166,108 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Note: The _UNKNOWN constants are always set in their respective * register. I don't know if this is necessary. */ -#define R300_RS_INTERP_0 0x4310 -#define R300_RS_INTERP_1 0x4314 -# define R300_RS_INTERP_1_UNKNOWN 0x40 -#define R300_RS_INTERP_2 0x4318 -# define R300_RS_INTERP_2_UNKNOWN 0x80 -#define R300_RS_INTERP_3 0x431C -# define R300_RS_INTERP_3_UNKNOWN 0xC0 -#define R300_RS_INTERP_4 0x4320 -#define R300_RS_INTERP_5 0x4324 -#define R300_RS_INTERP_6 0x4328 -#define R300_RS_INTERP_7 0x432C -# define R300_RS_INTERP_SRC_SHIFT 2 -# define R300_RS_INTERP_SRC_MASK (7 << 2) -# define R300_RS_INTERP_USED 0x00D10000 +#define R300_RS_IP_0 0x4310 +#define R300_RS_IP_1 0x4314 +#define R300_RS_IP_2 0x4318 +#define R300_RS_IP_3 0x431C +# define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */ +# define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */ +# define R300_RS_TEX_PTR(x) (x << 0) +# define R300_RS_COL_PTR(x) (x << 6) +# define R300_RS_COL_FMT(x) (x << 9) +# define R300_RS_COL_FMT_RGBA 0 +# define R300_RS_COL_FMT_RGB0 1 +# define R300_RS_COL_FMT_RGB1 2 +# define R300_RS_COL_FMT_000A 4 +# define R300_RS_COL_FMT_0000 5 +# define R300_RS_COL_FMT_0001 6 +# define R300_RS_COL_FMT_111A 8 +# define R300_RS_COL_FMT_1110 9 +# define R300_RS_COL_FMT_1111 10 +# define R300_RS_SEL_S(x) (x << 13) +# define R300_RS_SEL_T(x) (x << 16) +# define R300_RS_SEL_R(x) (x << 19) +# define R300_RS_SEL_Q(x) (x << 22) +# define R300_RS_SEL_C0 0 +# define R300_RS_SEL_C1 1 +# define R300_RS_SEL_C2 2 +# define R300_RS_SEL_C3 3 +# define R300_RS_SEL_K0 4 +# define R300_RS_SEL_K1 5 + + +/* */ +#define R500_RS_INST_0 0x4320 +#define R500_RS_INST_1 0x4324 +#define R500_RS_INST_2 0x4328 +#define R500_RS_INST_3 0x432c +#define R500_RS_INST_4 0x4330 +#define R500_RS_INST_5 0x4334 +#define R500_RS_INST_6 0x4338 +#define R500_RS_INST_7 0x433c +#define R500_RS_INST_8 0x4340 +#define R500_RS_INST_9 0x4344 +#define R500_RS_INST_10 0x4348 +#define R500_RS_INST_11 0x434c +#define R500_RS_INST_12 0x4350 +#define R500_RS_INST_13 0x4354 +#define R500_RS_INST_14 0x4358 +#define R500_RS_INST_15 0x435c +#define R500_RS_INST_TEX_ID_SHIFT 0 +#define R500_RS_INST_TEX_CN_WRITE (1 << 4) +#define R500_RS_INST_TEX_ADDR_SHIFT 5 +#define R500_RS_INST_COL_ID_SHIFT 12 +#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) +#define R500_RS_INST_COL_CN_WRITE (1 << 16) +#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16) +#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16) +#define R500_RS_INST_COL_ADDR_SHIFT 18 +#define R500_RS_INST_TEX_ADJ (1 << 25) +#define R500_RS_INST_W_CN (1 << 26) /* These DWORDs control how vertex data is routed into fragment program * registers, after interpolators. */ -#define R300_RS_ROUTE_0 0x4330 -#define R300_RS_ROUTE_1 0x4334 -#define R300_RS_ROUTE_2 0x4338 -#define R300_RS_ROUTE_3 0x433C /* GUESS */ -#define R300_RS_ROUTE_4 0x4340 /* GUESS */ -#define R300_RS_ROUTE_5 0x4344 /* GUESS */ -#define R300_RS_ROUTE_6 0x4348 /* GUESS */ -#define R300_RS_ROUTE_7 0x434C /* GUESS */ -# define R300_RS_ROUTE_SOURCE_INTERP_0 0 -# define R300_RS_ROUTE_SOURCE_INTERP_1 1 -# define R300_RS_ROUTE_SOURCE_INTERP_2 2 -# define R300_RS_ROUTE_SOURCE_INTERP_3 3 -# define R300_RS_ROUTE_SOURCE_INTERP_4 4 -# define R300_RS_ROUTE_SOURCE_INTERP_5 5 /* GUESS */ -# define R300_RS_ROUTE_SOURCE_INTERP_6 6 /* GUESS */ -# define R300_RS_ROUTE_SOURCE_INTERP_7 7 /* GUESS */ -# define R300_RS_ROUTE_ENABLE (1 << 3) /* GUESS */ -# define R300_RS_ROUTE_DEST_SHIFT 6 -# define R300_RS_ROUTE_DEST_MASK (31 << 6) /* GUESS */ - -/* Special handling for color: When the fragment program uses color, - * the ROUTE_0_COLOR bit is set and ROUTE_0_COLOR_DEST contains the - * color register index. - * - * Apperently you may set the R300_RS_ROUTE_0_COLOR bit, but not provide any - * R300_RS_ROUTE_0_COLOR_DEST value; this setup is used for clearing the state. - * See r300_ioctl.c:r300EmitClearState. I'm not sure if this setup is strictly - * correct or not. - Oliver. - */ -# define R300_RS_ROUTE_0_COLOR (1 << 14) -# define R300_RS_ROUTE_0_COLOR_DEST_SHIFT 17 -# define R300_RS_ROUTE_0_COLOR_DEST_MASK (31 << 17) /* GUESS */ -/* As above, but for secondary color */ -# define R300_RS_ROUTE_1_COLOR1 (1 << 14) -# define R300_RS_ROUTE_1_COLOR1_DEST_SHIFT 17 -# define R300_RS_ROUTE_1_COLOR1_DEST_MASK (31 << 17) -# define R300_RS_ROUTE_1_UNKNOWN11 (1 << 11) +#define R300_RS_INST_0 0x4330 +#define R300_RS_INST_1 0x4334 +#define R300_RS_INST_2 0x4338 +#define R300_RS_INST_3 0x433C /* GUESS */ +#define R300_RS_INST_4 0x4340 /* GUESS */ +#define R300_RS_INST_5 0x4344 /* GUESS */ +#define R300_RS_INST_6 0x4348 /* GUESS */ +#define R300_RS_INST_7 0x434C /* GUESS */ +# define R300_RS_INST_TEX_ID(x) ((x) << 0) +# define R300_RS_INST_TEX_CN_WRITE (1 << 3) +# define R300_RS_INST_TEX_ADDR_SHIFT 6 +# define R300_RS_INST_COL_ID(x) ((x) << 11) +# define R300_RS_INST_COL_CN_WRITE (1 << 14) +# define R300_RS_INST_COL_ADDR_SHIFT 17 +# define R300_RS_INST_TEX_ADJ (1 << 22) +# define R300_RS_COL_BIAS_UNUSED_SHIFT 23 + /* END: Rasterization / Interpolators - many guesses */ +/* Hierarchical Z Enable */ +#define R300_SC_HYPERZ 0x43a4 +# define R300_SC_HYPERZ_DISABLE (0 << 0) +# define R300_SC_HYPERZ_ENABLE (1 << 0) +# define R300_SC_HYPERZ_MIN (0 << 1) +# define R300_SC_HYPERZ_MAX (1 << 1) +# define R300_SC_HYPERZ_ADJ_256 (0 << 2) +# define R300_SC_HYPERZ_ADJ_128 (1 << 2) +# define R300_SC_HYPERZ_ADJ_64 (2 << 2) +# define R300_SC_HYPERZ_ADJ_32 (3 << 2) +# define R300_SC_HYPERZ_ADJ_16 (4 << 2) +# define R300_SC_HYPERZ_ADJ_8 (5 << 2) +# define R300_SC_HYPERZ_ADJ_4 (6 << 2) +# define R300_SC_HYPERZ_ADJ_2 (7 << 2) +# define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5) +# define R300_SC_HYPERZ_HZ_Z0MIN (1 << 5) +# define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6) +# define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6) + +#define R300_SC_EDGERULE 0x43a8 + /* BEGIN: Scissors and cliprects */ /* There are four clipping rectangles. Their corner coordinates are inclusive. @@ -724,21 +1284,21 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * For some reason, the top-left corner of the framebuffer is at (1440, 1440) * for the purpose of clipping and scissors. */ -#define R300_RE_CLIPRECT_TL_0 0x43B0 -#define R300_RE_CLIPRECT_BR_0 0x43B4 -#define R300_RE_CLIPRECT_TL_1 0x43B8 -#define R300_RE_CLIPRECT_BR_1 0x43BC -#define R300_RE_CLIPRECT_TL_2 0x43C0 -#define R300_RE_CLIPRECT_BR_2 0x43C4 -#define R300_RE_CLIPRECT_TL_3 0x43C8 -#define R300_RE_CLIPRECT_BR_3 0x43CC +#define R300_SC_CLIPRECT_TL_0 0x43B0 +#define R300_SC_CLIPRECT_BR_0 0x43B4 +#define R300_SC_CLIPRECT_TL_1 0x43B8 +#define R300_SC_CLIPRECT_BR_1 0x43BC +#define R300_SC_CLIPRECT_TL_2 0x43C0 +#define R300_SC_CLIPRECT_BR_2 0x43C4 +#define R300_SC_CLIPRECT_TL_3 0x43C8 +#define R300_SC_CLIPRECT_BR_3 0x43CC # define R300_CLIPRECT_OFFSET 1440 # define R300_CLIPRECT_MASK 0x1FFF # define R300_CLIPRECT_X_SHIFT 0 # define R300_CLIPRECT_X_MASK (0x1FFF << 0) # define R300_CLIPRECT_Y_SHIFT 13 # define R300_CLIPRECT_Y_MASK (0x1FFF << 13) -#define R300_RE_CLIPRECT_CNTL 0x43D0 +#define R300_SC_CLIP_RULE 0x43D0 # define R300_CLIP_OUT (1 << 0) # define R300_CLIP_0 (1 << 1) # define R300_CLIP_1 (1 << 2) @@ -758,13 +1318,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* gap */ -#define R300_RE_SCISSORS_TL 0x43E0 -#define R300_RE_SCISSORS_BR 0x43E4 +#define R300_SC_SCISSORS_TL 0x43E0 +#define R300_SC_SCISSORS_BR 0x43E4 # define R300_SCISSORS_OFFSET 1440 # define R300_SCISSORS_X_SHIFT 0 # define R300_SCISSORS_X_MASK (0x1FFF << 0) # define R300_SCISSORS_Y_SHIFT 13 # define R300_SCISSORS_Y_MASK (0x1FFF << 13) + +/* Screen door sample mask */ +#define R300_SC_SCREENDOOR 0x43e8 + /* END: Scissors and cliprects */ /* BEGIN: Texture specification */ @@ -774,43 +1338,55 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * unit. This means that e.g. the offset for texture image unit N is found in * register TX_OFFSET_0 + (4*N) */ -#define R300_TX_FILTER_0 0x4400 +#define R300_TX_FILTER0_0 0x4400 +#define R300_TX_FILTER0_1 0x4404 +#define R300_TX_FILTER0_2 0x4408 +#define R300_TX_FILTER0_3 0x440c +#define R300_TX_FILTER0_4 0x4410 +#define R300_TX_FILTER0_5 0x4414 +#define R300_TX_FILTER0_6 0x4418 +#define R300_TX_FILTER0_7 0x441c +#define R300_TX_FILTER0_8 0x4420 +#define R300_TX_FILTER0_9 0x4424 +#define R300_TX_FILTER0_10 0x4428 +#define R300_TX_FILTER0_11 0x442c +#define R300_TX_FILTER0_12 0x4430 +#define R300_TX_FILTER0_13 0x4434 +#define R300_TX_FILTER0_14 0x4438 +#define R300_TX_FILTER0_15 0x443c # define R300_TX_REPEAT 0 # define R300_TX_MIRRORED 1 -# define R300_TX_CLAMP 4 # define R300_TX_CLAMP_TO_EDGE 2 +# define R300_TX_MIRROR_ONCE_TO_EDGE 3 +# define R300_TX_CLAMP 4 +# define R300_TX_MIRROR_ONCE 5 # define R300_TX_CLAMP_TO_BORDER 6 +# define R300_TX_MIRROR_ONCE_TO_BORDER 7 # define R300_TX_WRAP_S_SHIFT 0 # define R300_TX_WRAP_S_MASK (7 << 0) # define R300_TX_WRAP_T_SHIFT 3 # define R300_TX_WRAP_T_MASK (7 << 3) -# define R300_TX_WRAP_Q_SHIFT 6 -# define R300_TX_WRAP_Q_MASK (7 << 6) +# define R300_TX_WRAP_R_SHIFT 6 +# define R300_TX_WRAP_R_MASK (7 << 6) +# define R300_TX_MAG_FILTER_4 (0 << 9) # define R300_TX_MAG_FILTER_NEAREST (1 << 9) # define R300_TX_MAG_FILTER_LINEAR (2 << 9) +# define R300_TX_MAG_FILTER_ANISO (3 << 9) # define R300_TX_MAG_FILTER_MASK (3 << 9) # define R300_TX_MIN_FILTER_NEAREST (1 << 11) # define R300_TX_MIN_FILTER_LINEAR (2 << 11) -# define R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST (5 << 11) -# define R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR (9 << 11) -# define R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST (6 << 11) -# define R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR (10 << 11) - -/* NOTE: NEAREST doesnt seem to exist. - * Im not seting MAG_FILTER_MASK and (3 << 11) on for all - * anisotropy modes because that would void selected mag filter - */ -# define R300_TX_MIN_FILTER_ANISO_NEAREST (0 << 13) -# define R300_TX_MIN_FILTER_ANISO_LINEAR (0 << 13) -# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (1 << 13) -# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR (2 << 13) -# define R300_TX_MIN_FILTER_MASK ( (15 << 11) | (3 << 13) ) -# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) -# define R300_TX_MAX_ANISO_2_TO_1 (2 << 21) -# define R300_TX_MAX_ANISO_4_TO_1 (4 << 21) -# define R300_TX_MAX_ANISO_8_TO_1 (6 << 21) -# define R300_TX_MAX_ANISO_16_TO_1 (8 << 21) -# define R300_TX_MAX_ANISO_MASK (14 << 21) +# define R300_TX_MIN_FILTER_ANISO (3 << 11) +# define R300_TX_MIN_FILTER_MASK (3 << 11) +# define R300_TX_MIN_FILTER_MIP_NONE (0 << 13) +# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) +# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) +# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) +# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) +# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21) +# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21) +# define R300_TX_MAX_ANISO_8_TO_1 (3 << 21) +# define R300_TX_MAX_ANISO_16_TO_1 (4 << 21) +# define R300_TX_MAX_ANISO_MASK (7 << 21) #define R300_TX_FILTER1_0 0x4440 # define R300_CHROMA_KEY_MODE_DISABLE 0 @@ -818,7 +1394,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_CHROMA_KEY_BLEND 2 # define R300_MC_ROUND_NORMAL (0<<2) # define R300_MC_ROUND_MPEG4 (1<<2) -# define R300_LOD_BIAS_MASK 0x1fff +# define R300_LOD_BIAS_SHIFT 3 +# define R300_LOD_BIAS_MASK 0x1ff8 # define R300_EDGE_ANISO_EDGE_DIAG (0<<13) # define R300_EDGE_ANISO_EDGE_ONLY (1<<13) # define R300_MC_COORD_TRUNCATE_DISABLE (0<<14) @@ -829,12 +1406,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_TRI_PERF_3_8 (3<<15) # define R300_ANISO_THRESHOLD_MASK (7<<17) +# define R500_MACRO_SWITCH (1<<22) +# define R500_BORDER_FIX (1<<31) + #define R300_TX_SIZE_0 0x4480 # define R300_TX_WIDTHMASK_SHIFT 0 # define R300_TX_WIDTHMASK_MASK (2047 << 0) # define R300_TX_HEIGHTMASK_SHIFT 11 # define R300_TX_HEIGHTMASK_MASK (2047 << 11) -# define R300_TX_UNK23 (1 << 23) +# define R300_TX_DEPTHMASK_SHIFT 22 +# define R300_TX_DEPTHMASK_MASK (0xf << 22) # define R300_TX_MAX_MIP_LEVEL_SHIFT 26 # define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) # define R300_TX_SIZE_PROJECTED (1<<30) @@ -845,7 +1426,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. They are given meanings as R, G, B and Alpha by the swizzle specification */ # define R300_TX_FORMAT_X8 0x0 +# define R500_TX_FORMAT_X1 0x0 // bit set in format 2 # define R300_TX_FORMAT_X16 0x1 +# define R500_TX_FORMAT_X1_REV 0x0 // bit set in format 2 # define R300_TX_FORMAT_Y4X4 0x2 # define R300_TX_FORMAT_Y8X8 0x3 # define R300_TX_FORMAT_Y16X16 0x4 @@ -866,9 +1449,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */ # define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */ # define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */ + + /* These two values are wrong, but they're the only values that + * produce any even vaguely correct results. Can r300 only do 16-bit + * depth textures? + */ +# define R300_TX_FORMAT_X24_Y8 0x1e +# define R300_TX_FORMAT_X32 0x1e + /* 0x16 - some 16 bit green format.. ?? */ -# define R300_TX_FORMAT_UNK25 (1 << 25) /* no swizzle */ -# define R300_TX_FORMAT_CUBIC_MAP (1 << 26) +# define R300_TX_FORMAT_3D (1 << 25) +# define R300_TX_FORMAT_CUBIC_MAP (2 << 25) /* gap */ /* Floating point formats */ @@ -921,7 +1512,18 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_FORMAT_YUV_MODE 0x00800000 -#define R300_TX_PITCH_0 0x4500 /* obvious missing in gap */ +#define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */ +# define R300_TX_PITCHMASK_SHIFT 0 +# define R300_TX_PITCHMASK_MASK (2047 << 0) +# define R500_TXFORMAT_MSB (1 << 14) +# define R500_TXWIDTH_BIT11 (1 << 15) +# define R500_TXHEIGHT_BIT11 (1 << 16) +# define R500_POW2FIX2FLT (1 << 17) +# define R500_SEL_FILTER4_TC0 (0 << 18) +# define R500_SEL_FILTER4_TC1 (1 << 18) +# define R500_SEL_FILTER4_TC2 (2 << 18) +# define R500_SEL_FILTER4_TC3 (3 << 18) + #define R300_TX_OFFSET_0 0x4540 /* BEGIN: Guess from R200 */ # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) @@ -929,15 +1531,50 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) # define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) # define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MICRO_TILE_LINEAR (0 << 3) # define R300_TXO_MICRO_TILE (1 << 3) +# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 /* END: Guess from R200 */ /* 32 bit chroma key */ #define R300_TX_CHROMA_KEY_0 0x4580 +#define R300_TX_CHROMA_KEY_1 0x4584 +#define R300_TX_CHROMA_KEY_2 0x4588 +#define R300_TX_CHROMA_KEY_3 0x458c +#define R300_TX_CHROMA_KEY_4 0x4590 +#define R300_TX_CHROMA_KEY_5 0x4594 +#define R300_TX_CHROMA_KEY_6 0x4598 +#define R300_TX_CHROMA_KEY_7 0x459c +#define R300_TX_CHROMA_KEY_8 0x45a0 +#define R300_TX_CHROMA_KEY_9 0x45a4 +#define R300_TX_CHROMA_KEY_10 0x45a8 +#define R300_TX_CHROMA_KEY_11 0x45ac +#define R300_TX_CHROMA_KEY_12 0x45b0 +#define R300_TX_CHROMA_KEY_13 0x45b4 +#define R300_TX_CHROMA_KEY_14 0x45b8 +#define R300_TX_CHROMA_KEY_15 0x45bc /* ff00ff00 == { 0, 1.0, 0, 1.0 } */ -#define R300_TX_BORDER_COLOR_0 0x45C0 + +/* Border Color */ +#define R300_TX_BORDER_COLOR_0 0x45c0 +#define R300_TX_BORDER_COLOR_1 0x45c4 +#define R300_TX_BORDER_COLOR_2 0x45c8 +#define R300_TX_BORDER_COLOR_3 0x45cc +#define R300_TX_BORDER_COLOR_4 0x45d0 +#define R300_TX_BORDER_COLOR_5 0x45d4 +#define R300_TX_BORDER_COLOR_6 0x45d8 +#define R300_TX_BORDER_COLOR_7 0x45dc +#define R300_TX_BORDER_COLOR_8 0x45e0 +#define R300_TX_BORDER_COLOR_9 0x45e4 +#define R300_TX_BORDER_COLOR_10 0x45e8 +#define R300_TX_BORDER_COLOR_11 0x45ec +#define R300_TX_BORDER_COLOR_12 0x45f0 +#define R300_TX_BORDER_COLOR_13 0x45f4 +#define R300_TX_BORDER_COLOR_14 0x45f8 +#define R300_TX_BORDER_COLOR_15 0x45fc + /* END: Texture specification */ @@ -960,23 +1597,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * offsets into the respective instruction streams, while *_END points to the * last instruction relative to this offset. */ -#define R300_PFS_CNTL_0 0x4600 +#define R300_US_CONFIG 0x4600 # define R300_PFS_CNTL_LAST_NODES_SHIFT 0 # define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0) # define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3) -#define R300_PFS_CNTL_1 0x4604 +#define R300_US_PIXSIZE 0x4604 /* There is an unshifted value here which has so far always been equal to the * index of the highest used temporary register. */ -#define R300_PFS_CNTL_2 0x4608 +#define R300_US_CODE_OFFSET 0x4608 # define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0 # define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0) # define R300_PFS_CNTL_ALU_END_SHIFT 6 # define R300_PFS_CNTL_ALU_END_MASK (63 << 6) -# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 12 -# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 12) /* GUESS */ +# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13 +# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13) # define R300_PFS_CNTL_TEX_END_SHIFT 18 -# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* GUESS */ +# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* gap */ @@ -986,48 +1623,66 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * first node is stored in NODE_2, the second node is stored in NODE_3. * * Offsets are relative to the master offset from PFS_CNTL_2. - * LAST_NODE is set for the last node, and only for the last node. */ -#define R300_PFS_NODE_0 0x4610 -#define R300_PFS_NODE_1 0x4614 -#define R300_PFS_NODE_2 0x4618 -#define R300_PFS_NODE_3 0x461C -# define R300_PFS_NODE_ALU_OFFSET_SHIFT 0 -# define R300_PFS_NODE_ALU_OFFSET_MASK (63 << 0) -# define R300_PFS_NODE_ALU_END_SHIFT 6 -# define R300_PFS_NODE_ALU_END_MASK (63 << 6) -# define R300_PFS_NODE_TEX_OFFSET_SHIFT 12 -# define R300_PFS_NODE_TEX_OFFSET_MASK (31 << 12) -# define R300_PFS_NODE_TEX_END_SHIFT 17 -# define R300_PFS_NODE_TEX_END_MASK (31 << 17) -/*# define R300_PFS_NODE_LAST_NODE (1 << 22) */ -# define R300_PFS_NODE_OUTPUT_COLOR (1 << 22) -# define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23) +#define R300_US_CODE_ADDR_0 0x4610 +#define R300_US_CODE_ADDR_1 0x4614 +#define R300_US_CODE_ADDR_2 0x4618 +#define R300_US_CODE_ADDR_3 0x461C +# define R300_ALU_START_SHIFT 0 +# define R300_ALU_START_MASK (63 << 0) +# define R300_ALU_SIZE_SHIFT 6 +# define R300_ALU_SIZE_MASK (63 << 6) +# define R300_TEX_START_SHIFT 12 +# define R300_TEX_START_MASK (31 << 12) +# define R300_TEX_SIZE_SHIFT 17 +# define R300_TEX_SIZE_MASK (31 << 17) +# define R300_RGBA_OUT (1 << 22) +# define R300_W_OUT (1 << 23) /* TEX * As far as I can tell, texture instructions cannot write into output * registers directly. A subsequent ALU instruction is always necessary, * even if it's just MAD o0, r0, 1, 0 */ -#define R300_PFS_TEXI_0 0x4620 -# define R300_FPITX_SRC_SHIFT 0 -# define R300_FPITX_SRC_MASK (31 << 0) - /* GUESS */ -# define R300_FPITX_SRC_CONST (1 << 5) -# define R300_FPITX_DST_SHIFT 6 -# define R300_FPITX_DST_MASK (31 << 6) -# define R300_FPITX_IMAGE_SHIFT 11 - /* GUESS based on layout and native limits */ -# define R300_FPITX_IMAGE_MASK (15 << 11) -/* Unsure if these are opcodes, or some kind of bitfield, but this is how - * they were set when I checked - */ -# define R300_FPITX_OPCODE_SHIFT 15 -# define R300_FPITX_OP_TEX 1 -# define R300_FPITX_OP_KIL 2 -# define R300_FPITX_OP_TXP 3 -# define R300_FPITX_OP_TXB 4 -# define R300_FPITX_OPCODE_MASK (7 << 15) +#define R300_US_TEX_INST_0 0x4620 +# define R300_SRC_ADDR_SHIFT 0 +# define R300_SRC_ADDR_MASK (31 << 0) +# define R300_DST_ADDR_SHIFT 6 +# define R300_DST_ADDR_MASK (31 << 6) +# define R300_TEX_ID_SHIFT 11 +# define R300_TEX_ID_MASK (15 << 11) +# define R300_TEX_INST_SHIFT 15 +# define R300_TEX_OP_NOP 0 +# define R300_TEX_OP_LD 1 +# define R300_TEX_OP_KIL 2 +# define R300_TEX_OP_TXP 3 +# define R300_TEX_OP_TXB 4 +# define R300_TEX_INST_MASK (7 << 15) + +/* Output format from the unfied shader */ +#define R300_US_OUT_FMT 0x46A4 +# define R300_US_OUT_FMT_C4_8 (0 << 0) +# define R300_US_OUT_FMT_C4_10 (1 << 0) +# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R300_US_OUT_FMT_C_16 (3 << 0) +# define R300_US_OUT_FMT_C2_16 (4 << 0) +# define R300_US_OUT_FMT_C4_16 (5 << 0) +# define R300_US_OUT_FMT_C_16_MPEG (6 << 0) +# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0) +# define R300_US_OUT_FMT_C2_4 (8 << 0) +# define R300_US_OUT_FMT_C_3_3_2 (9 << 0) +# define R300_US_OUT_FMT_C_6_5_6 (10 << 0) +# define R300_US_OUT_FMT_C_11_11_10 (11 << 0) +# define R300_US_OUT_FMT_C_10_11_11 (12 << 0) +# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0) +/* reserved */ +# define R300_US_OUT_FMT_UNUSED (15 << 0) +# define R300_US_OUT_FMT_C_16_FP (16 << 0) +# define R300_US_OUT_FMT_C2_16_FP (17 << 0) +# define R300_US_OUT_FMT_C4_16_FP (18 << 0) +# define R300_US_OUT_FMT_C_32_FP (19 << 0) +# define R300_US_OUT_FMT_C2_32_FP (20 << 0) +# define R300_US_OUT_FMT_C4_32_FP (20 << 0) /* ALU * The ALU instructions register blocks are enumerated according to the order @@ -1093,172 +1748,256 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * - Set FPI0/FPI2_SPECIAL_LRP * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD */ -#define R300_PFS_INSTR1_0 0x46C0 -# define R300_FPI1_SRC0C_SHIFT 0 -# define R300_FPI1_SRC0C_MASK (31 << 0) -# define R300_FPI1_SRC0C_CONST (1 << 5) -# define R300_FPI1_SRC1C_SHIFT 6 -# define R300_FPI1_SRC1C_MASK (31 << 6) -# define R300_FPI1_SRC1C_CONST (1 << 11) -# define R300_FPI1_SRC2C_SHIFT 12 -# define R300_FPI1_SRC2C_MASK (31 << 12) -# define R300_FPI1_SRC2C_CONST (1 << 17) -# define R300_FPI1_SRC_MASK 0x0003ffff -# define R300_FPI1_DSTC_SHIFT 18 -# define R300_FPI1_DSTC_MASK (31 << 18) -# define R300_FPI1_DSTC_REG_MASK_SHIFT 23 -# define R300_FPI1_DSTC_REG_X (1 << 23) -# define R300_FPI1_DSTC_REG_Y (1 << 24) -# define R300_FPI1_DSTC_REG_Z (1 << 25) -# define R300_FPI1_DSTC_OUTPUT_MASK_SHIFT 26 -# define R300_FPI1_DSTC_OUTPUT_X (1 << 26) -# define R300_FPI1_DSTC_OUTPUT_Y (1 << 27) -# define R300_FPI1_DSTC_OUTPUT_Z (1 << 28) - -#define R300_PFS_INSTR3_0 0x47C0 -# define R300_FPI3_SRC0A_SHIFT 0 -# define R300_FPI3_SRC0A_MASK (31 << 0) -# define R300_FPI3_SRC0A_CONST (1 << 5) -# define R300_FPI3_SRC1A_SHIFT 6 -# define R300_FPI3_SRC1A_MASK (31 << 6) -# define R300_FPI3_SRC1A_CONST (1 << 11) -# define R300_FPI3_SRC2A_SHIFT 12 -# define R300_FPI3_SRC2A_MASK (31 << 12) -# define R300_FPI3_SRC2A_CONST (1 << 17) -# define R300_FPI3_SRC_MASK 0x0003ffff -# define R300_FPI3_DSTA_SHIFT 18 -# define R300_FPI3_DSTA_MASK (31 << 18) -# define R300_FPI3_DSTA_REG (1 << 23) -# define R300_FPI3_DSTA_OUTPUT (1 << 24) -# define R300_FPI3_DSTA_DEPTH (1 << 27) - -#define R300_PFS_INSTR0_0 0x48C0 -# define R300_FPI0_ARGC_SRC0C_XYZ 0 -# define R300_FPI0_ARGC_SRC0C_XXX 1 -# define R300_FPI0_ARGC_SRC0C_YYY 2 -# define R300_FPI0_ARGC_SRC0C_ZZZ 3 -# define R300_FPI0_ARGC_SRC1C_XYZ 4 -# define R300_FPI0_ARGC_SRC1C_XXX 5 -# define R300_FPI0_ARGC_SRC1C_YYY 6 -# define R300_FPI0_ARGC_SRC1C_ZZZ 7 -# define R300_FPI0_ARGC_SRC2C_XYZ 8 -# define R300_FPI0_ARGC_SRC2C_XXX 9 -# define R300_FPI0_ARGC_SRC2C_YYY 10 -# define R300_FPI0_ARGC_SRC2C_ZZZ 11 -# define R300_FPI0_ARGC_SRC0A 12 -# define R300_FPI0_ARGC_SRC1A 13 -# define R300_FPI0_ARGC_SRC2A 14 -# define R300_FPI0_ARGC_SRC1C_LRP 15 -# define R300_FPI0_ARGC_ZERO 20 -# define R300_FPI0_ARGC_ONE 21 - /* GUESS */ -# define R300_FPI0_ARGC_HALF 22 -# define R300_FPI0_ARGC_SRC0C_YZX 23 -# define R300_FPI0_ARGC_SRC1C_YZX 24 -# define R300_FPI0_ARGC_SRC2C_YZX 25 -# define R300_FPI0_ARGC_SRC0C_ZXY 26 -# define R300_FPI0_ARGC_SRC1C_ZXY 27 -# define R300_FPI0_ARGC_SRC2C_ZXY 28 -# define R300_FPI0_ARGC_SRC0CA_WZY 29 -# define R300_FPI0_ARGC_SRC1CA_WZY 30 -# define R300_FPI0_ARGC_SRC2CA_WZY 31 - -# define R300_FPI0_ARG0C_SHIFT 0 -# define R300_FPI0_ARG0C_MASK (31 << 0) -# define R300_FPI0_ARG0C_NEG (1 << 5) -# define R300_FPI0_ARG0C_ABS (1 << 6) -# define R300_FPI0_ARG1C_SHIFT 7 -# define R300_FPI0_ARG1C_MASK (31 << 7) -# define R300_FPI0_ARG1C_NEG (1 << 12) -# define R300_FPI0_ARG1C_ABS (1 << 13) -# define R300_FPI0_ARG2C_SHIFT 14 -# define R300_FPI0_ARG2C_MASK (31 << 14) -# define R300_FPI0_ARG2C_NEG (1 << 19) -# define R300_FPI0_ARG2C_ABS (1 << 20) -# define R300_FPI0_SPECIAL_LRP (1 << 21) -# define R300_FPI0_OUTC_MAD (0 << 23) -# define R300_FPI0_OUTC_DP3 (1 << 23) -# define R300_FPI0_OUTC_DP4 (2 << 23) -# define R300_FPI0_OUTC_MIN (4 << 23) -# define R300_FPI0_OUTC_MAX (5 << 23) -# define R300_FPI0_OUTC_CMPH (7 << 23) -# define R300_FPI0_OUTC_CMP (8 << 23) -# define R300_FPI0_OUTC_FRC (9 << 23) -# define R300_FPI0_OUTC_REPL_ALPHA (10 << 23) -# define R300_FPI0_OUTC_SAT (1 << 30) -# define R300_FPI0_INSERT_NOP (1 << 31) - -#define R300_PFS_INSTR2_0 0x49C0 -# define R300_FPI2_ARGA_SRC0C_X 0 -# define R300_FPI2_ARGA_SRC0C_Y 1 -# define R300_FPI2_ARGA_SRC0C_Z 2 -# define R300_FPI2_ARGA_SRC1C_X 3 -# define R300_FPI2_ARGA_SRC1C_Y 4 -# define R300_FPI2_ARGA_SRC1C_Z 5 -# define R300_FPI2_ARGA_SRC2C_X 6 -# define R300_FPI2_ARGA_SRC2C_Y 7 -# define R300_FPI2_ARGA_SRC2C_Z 8 -# define R300_FPI2_ARGA_SRC0A 9 -# define R300_FPI2_ARGA_SRC1A 10 -# define R300_FPI2_ARGA_SRC2A 11 -# define R300_FPI2_ARGA_SRC1A_LRP 15 -# define R300_FPI2_ARGA_ZERO 16 -# define R300_FPI2_ARGA_ONE 17 - /* GUESS */ -# define R300_FPI2_ARGA_HALF 18 -# define R300_FPI2_ARG0A_SHIFT 0 -# define R300_FPI2_ARG0A_MASK (31 << 0) -# define R300_FPI2_ARG0A_NEG (1 << 5) - /* GUESS */ -# define R300_FPI2_ARG0A_ABS (1 << 6) -# define R300_FPI2_ARG1A_SHIFT 7 -# define R300_FPI2_ARG1A_MASK (31 << 7) -# define R300_FPI2_ARG1A_NEG (1 << 12) - /* GUESS */ -# define R300_FPI2_ARG1A_ABS (1 << 13) -# define R300_FPI2_ARG2A_SHIFT 14 -# define R300_FPI2_ARG2A_MASK (31 << 14) -# define R300_FPI2_ARG2A_NEG (1 << 19) - /* GUESS */ -# define R300_FPI2_ARG2A_ABS (1 << 20) -# define R300_FPI2_SPECIAL_LRP (1 << 21) -# define R300_FPI2_OUTA_MAD (0 << 23) -# define R300_FPI2_OUTA_DP4 (1 << 23) -# define R300_FPI2_OUTA_MIN (2 << 23) -# define R300_FPI2_OUTA_MAX (3 << 23) -# define R300_FPI2_OUTA_CMP (6 << 23) -# define R300_FPI2_OUTA_FRC (7 << 23) -# define R300_FPI2_OUTA_EX2 (8 << 23) -# define R300_FPI2_OUTA_LG2 (9 << 23) -# define R300_FPI2_OUTA_RCP (10 << 23) -# define R300_FPI2_OUTA_RSQ (11 << 23) -# define R300_FPI2_OUTA_SAT (1 << 30) -# define R300_FPI2_UNKNOWN_31 (1 << 31) +#define R300_US_ALU_RGB_ADDR_0 0x46C0 +# define R300_ALU_SRC0C_SHIFT 0 +# define R300_ALU_SRC0C_MASK (31 << 0) +# define R300_ALU_SRC0C_CONST (1 << 5) +# define R300_ALU_SRC1C_SHIFT 6 +# define R300_ALU_SRC1C_MASK (31 << 6) +# define R300_ALU_SRC1C_CONST (1 << 11) +# define R300_ALU_SRC2C_SHIFT 12 +# define R300_ALU_SRC2C_MASK (31 << 12) +# define R300_ALU_SRC2C_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTC_SHIFT 18 +# define R300_ALU_DSTC_MASK (31 << 18) +# define R300_ALU_DSTC_REG_MASK_SHIFT 23 +# define R300_ALU_DSTC_REG_X (1 << 23) +# define R300_ALU_DSTC_REG_Y (1 << 24) +# define R300_ALU_DSTC_REG_Z (1 << 25) +# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26 +# define R300_ALU_DSTC_OUTPUT_X (1 << 26) +# define R300_ALU_DSTC_OUTPUT_Y (1 << 27) +# define R300_ALU_DSTC_OUTPUT_Z (1 << 28) + +#define R300_US_ALU_ALPHA_ADDR_0 0x47C0 +# define R300_ALU_SRC0A_SHIFT 0 +# define R300_ALU_SRC0A_MASK (31 << 0) +# define R300_ALU_SRC0A_CONST (1 << 5) +# define R300_ALU_SRC1A_SHIFT 6 +# define R300_ALU_SRC1A_MASK (31 << 6) +# define R300_ALU_SRC1A_CONST (1 << 11) +# define R300_ALU_SRC2A_SHIFT 12 +# define R300_ALU_SRC2A_MASK (31 << 12) +# define R300_ALU_SRC2A_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTA_SHIFT 18 +# define R300_ALU_DSTA_MASK (31 << 18) +# define R300_ALU_DSTA_REG (1 << 23) +# define R300_ALU_DSTA_OUTPUT (1 << 24) +# define R300_ALU_DSTA_DEPTH (1 << 27) + +#define R300_US_ALU_RGB_INST_0 0x48C0 +# define R300_ALU_ARGC_SRC0C_XYZ 0 +# define R300_ALU_ARGC_SRC0C_XXX 1 +# define R300_ALU_ARGC_SRC0C_YYY 2 +# define R300_ALU_ARGC_SRC0C_ZZZ 3 +# define R300_ALU_ARGC_SRC1C_XYZ 4 +# define R300_ALU_ARGC_SRC1C_XXX 5 +# define R300_ALU_ARGC_SRC1C_YYY 6 +# define R300_ALU_ARGC_SRC1C_ZZZ 7 +# define R300_ALU_ARGC_SRC2C_XYZ 8 +# define R300_ALU_ARGC_SRC2C_XXX 9 +# define R300_ALU_ARGC_SRC2C_YYY 10 +# define R300_ALU_ARGC_SRC2C_ZZZ 11 +# define R300_ALU_ARGC_SRC0A 12 +# define R300_ALU_ARGC_SRC1A 13 +# define R300_ALU_ARGC_SRC2A 14 +# define R300_ALU_ARGC_SRCP_XYZ 15 +# define R300_ALU_ARGC_SRCP_XXX 16 +# define R300_ALU_ARGC_SRCP_YYY 17 +# define R300_ALU_ARGC_SRCP_ZZZ 18 +# define R300_ALU_ARGC_SRCP_WWW 19 +# define R300_ALU_ARGC_ZERO 20 +# define R300_ALU_ARGC_ONE 21 +# define R300_ALU_ARGC_HALF 22 +# define R300_ALU_ARGC_SRC0C_YZX 23 +# define R300_ALU_ARGC_SRC1C_YZX 24 +# define R300_ALU_ARGC_SRC2C_YZX 25 +# define R300_ALU_ARGC_SRC0C_ZXY 26 +# define R300_ALU_ARGC_SRC1C_ZXY 27 +# define R300_ALU_ARGC_SRC2C_ZXY 28 +# define R300_ALU_ARGC_SRC0CA_WZY 29 +# define R300_ALU_ARGC_SRC1CA_WZY 30 +# define R300_ALU_ARGC_SRC2CA_WZY 31 + +# define R300_ALU_ARG0C_SHIFT 0 +# define R300_ALU_ARG0C_MASK (31 << 0) +# define R300_ALU_ARG0C_NOP (0 << 5) +# define R300_ALU_ARG0C_NEG (1 << 5) +# define R300_ALU_ARG0C_ABS (2 << 5) +# define R300_ALU_ARG0C_NAB (3 << 5) +# define R300_ALU_ARG1C_SHIFT 7 +# define R300_ALU_ARG1C_MASK (31 << 7) +# define R300_ALU_ARG1C_NOP (0 << 12) +# define R300_ALU_ARG1C_NEG (1 << 12) +# define R300_ALU_ARG1C_ABS (2 << 12) +# define R300_ALU_ARG1C_NAB (3 << 12) +# define R300_ALU_ARG2C_SHIFT 14 +# define R300_ALU_ARG2C_MASK (31 << 14) +# define R300_ALU_ARG2C_NOP (0 << 19) +# define R300_ALU_ARG2C_NEG (1 << 19) +# define R300_ALU_ARG2C_ABS (2 << 19) +# define R300_ALU_ARG2C_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTC_MAD (0 << 23) +# define R300_ALU_OUTC_DP3 (1 << 23) +# define R300_ALU_OUTC_DP4 (2 << 23) +# define R300_ALU_OUTC_D2A (3 << 23) +# define R300_ALU_OUTC_MIN (4 << 23) +# define R300_ALU_OUTC_MAX (5 << 23) +# define R300_ALU_OUTC_CMPH (7 << 23) +# define R300_ALU_OUTC_CMP (8 << 23) +# define R300_ALU_OUTC_FRC (9 << 23) +# define R300_ALU_OUTC_REPL_ALPHA (10 << 23) + +# define R300_ALU_OUTC_MOD_NOP (0 << 27) +# define R300_ALU_OUTC_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTC_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTC_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTC_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTC_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTC_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTC_CLAMP (1 << 30) +# define R300_ALU_INSERT_NOP (1 << 31) + +#define R300_US_ALU_ALPHA_INST_0 0x49C0 +# define R300_ALU_ARGA_SRC0C_X 0 +# define R300_ALU_ARGA_SRC0C_Y 1 +# define R300_ALU_ARGA_SRC0C_Z 2 +# define R300_ALU_ARGA_SRC1C_X 3 +# define R300_ALU_ARGA_SRC1C_Y 4 +# define R300_ALU_ARGA_SRC1C_Z 5 +# define R300_ALU_ARGA_SRC2C_X 6 +# define R300_ALU_ARGA_SRC2C_Y 7 +# define R300_ALU_ARGA_SRC2C_Z 8 +# define R300_ALU_ARGA_SRC0A 9 +# define R300_ALU_ARGA_SRC1A 10 +# define R300_ALU_ARGA_SRC2A 11 +# define R300_ALU_ARGA_SRCP_X 12 +# define R300_ALU_ARGA_SRCP_Y 13 +# define R300_ALU_ARGA_SRCP_Z 14 +# define R300_ALU_ARGA_SRCP_W 15 + +# define R300_ALU_ARGA_ZERO 16 +# define R300_ALU_ARGA_ONE 17 +# define R300_ALU_ARGA_HALF 18 +# define R300_ALU_ARG0A_SHIFT 0 +# define R300_ALU_ARG0A_MASK (31 << 0) +# define R300_ALU_ARG0A_NOP (0 << 5) +# define R300_ALU_ARG0A_NEG (1 << 5) +# define R300_ALU_ARG0A_ABS (2 << 5) +# define R300_ALU_ARG0A_NAB (3 << 5) +# define R300_ALU_ARG1A_SHIFT 7 +# define R300_ALU_ARG1A_MASK (31 << 7) +# define R300_ALU_ARG1A_NOP (0 << 12) +# define R300_ALU_ARG1A_NEG (1 << 12) +# define R300_ALU_ARG1A_ABS (2 << 12) +# define R300_ALU_ARG1A_NAB (3 << 12) +# define R300_ALU_ARG2A_SHIFT 14 +# define R300_ALU_ARG2A_MASK (31 << 14) +# define R300_ALU_ARG2A_NOP (0 << 19) +# define R300_ALU_ARG2A_NEG (1 << 19) +# define R300_ALU_ARG2A_ABS (2 << 19) +# define R300_ALU_ARG2A_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTA_MAD (0 << 23) +# define R300_ALU_OUTA_DP4 (1 << 23) +# define R300_ALU_OUTA_MIN (2 << 23) +# define R300_ALU_OUTA_MAX (3 << 23) +# define R300_ALU_OUTA_CND (5 << 23) +# define R300_ALU_OUTA_CMP (6 << 23) +# define R300_ALU_OUTA_FRC (7 << 23) +# define R300_ALU_OUTA_EX2 (8 << 23) +# define R300_ALU_OUTA_LG2 (9 << 23) +# define R300_ALU_OUTA_RCP (10 << 23) +# define R300_ALU_OUTA_RSQ (11 << 23) + +# define R300_ALU_OUTA_MOD_NOP (0 << 27) +# define R300_ALU_OUTA_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTA_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTA_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTA_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTA_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTA_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTA_CLAMP (1 << 30) /* END: Fragment program instruction set */ -/* Fog state and color */ -#define R300_RE_FOG_STATE 0x4BC0 -# define R300_FOG_ENABLE (1 << 0) -# define R300_FOG_MODE_LINEAR (0 << 1) -# define R300_FOG_MODE_EXP (1 << 1) -# define R300_FOG_MODE_EXP2 (2 << 1) -# define R300_FOG_MODE_MASK (3 << 1) -#define R300_FOG_COLOR_R 0x4BC8 -#define R300_FOG_COLOR_G 0x4BCC -#define R300_FOG_COLOR_B 0x4BD0 - -#define R300_PP_ALPHA_TEST 0x4BD4 -# define R300_REF_ALPHA_MASK 0x000000ff -# define R300_ALPHA_TEST_FAIL (0 << 8) -# define R300_ALPHA_TEST_LESS (1 << 8) -# define R300_ALPHA_TEST_LEQUAL (3 << 8) -# define R300_ALPHA_TEST_EQUAL (2 << 8) -# define R300_ALPHA_TEST_GEQUAL (6 << 8) -# define R300_ALPHA_TEST_GREATER (4 << 8) -# define R300_ALPHA_TEST_NEQUAL (5 << 8) -# define R300_ALPHA_TEST_PASS (7 << 8) -# define R300_ALPHA_TEST_OP_MASK (7 << 8) -# define R300_ALPHA_TEST_ENABLE (1 << 11) +/* Fog: Fog Blending Enable */ +#define R300_FG_FOG_BLEND 0x4bc0 +# define R300_FG_FOG_BLEND_DISABLE (0 << 0) +# define R300_FG_FOG_BLEND_ENABLE (1 << 0) +# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1) +# define R300_FG_FOG_BLEND_FN_EXP (1 << 1) +# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1) +# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1) +# define R300_FG_FOG_BLEND_FN_MASK (3 << 1) + +/* Fog: Red Component of Fog Color */ +#define R300_FG_FOG_COLOR_R 0x4bc8 +/* Fog: Green Component of Fog Color */ +#define R300_FG_FOG_COLOR_G 0x4bcc +/* Fog: Blue Component of Fog Color */ +#define R300_FG_FOG_COLOR_B 0x4bd0 +# define R300_FG_FOG_COLOR_MASK 0x000003ff + +/* Fog: Constant Factor for Fog Blending */ +#define R300_FG_FOG_FACTOR 0x4bc4 +# define FG_FOG_FACTOR_MASK 0x000003ff + +/* Fog: Alpha function */ +#define R300_FG_ALPHA_FUNC 0x4bd4 +# define R300_FG_ALPHA_FUNC_VAL_MASK 0x000000ff +# define R300_FG_ALPHA_FUNC_NEVER (0 << 8) +# define R300_FG_ALPHA_FUNC_LESS (1 << 8) +# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8) +# define R300_FG_ALPHA_FUNC_LE (3 << 8) +# define R300_FG_ALPHA_FUNC_GREATER (4 << 8) +# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8) +# define R300_FG_ALPHA_FUNC_GE (6 << 8) +# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8) +# define R300_ALPHA_TEST_OP_MASK (7 << 8) +# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11) +# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11) + +# define R500_FG_ALPHA_FUNC_10BIT (0 << 12) +# define R500_FG_ALPHA_FUNC_8BIT (1 << 12) + +# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16) +# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16) +# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17) +# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17) + +# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20) +# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20) + +# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24) +# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */ +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25) +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25) + +# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28) +# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28) + + +/* Fog: Where does the depth come from? */ +#define R300_FG_DEPTH_SRC 0x4bd8 +# define R300_FG_DEPTH_SRC_SCAN (0 << 0) +# define R300_FG_DEPTH_SRC_SHADER (1 << 0) + +/* Fog: Alpha Compare Value */ +#define R500_FG_ALPHA_VALUE 0x4be0 +# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff /* gap */ @@ -1267,12 +2006,33 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_PFS_PARAM_0_Y 0x4C04 #define R300_PFS_PARAM_0_Z 0x4C08 #define R300_PFS_PARAM_0_W 0x4C0C -/* GUESS: PARAM_31 is last, based on native limits reported by fglrx */ +/* last consts */ #define R300_PFS_PARAM_31_X 0x4DF0 #define R300_PFS_PARAM_31_Y 0x4DF4 #define R300_PFS_PARAM_31_Z 0x4DF8 #define R300_PFS_PARAM_31_W 0x4DFC +/* Unpipelined. */ +#define R300_RB3D_CCTL 0x4e00 +# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5) +# define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7) +# define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7) +# define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9) +# define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9) +# define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10) +# define R300_RB3D_CCTL_CMASK_ENABLE (1 << 10) +/* reserved */ +# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE (0 << 12) +# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE (1 << 12) +# define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE (0 << 13) +# define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE (1 << 13) +# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE (0 << 14) +# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE (1 << 14) + + /* Notes: * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in * the application @@ -1284,9 +2044,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_RB3D_CBLEND 0x4E04 #define R300_RB3D_ABLEND 0x4E08 /* the following only appear in CBLEND */ -# define R300_BLEND_ENABLE (1 << 0) -# define R300_BLEND_UNKNOWN (3 << 1) -# define R300_BLEND_NO_SEPARATE (1 << 3) +# define R300_ALPHA_BLEND_ENABLE (1 << 0) +# define R300_SEPARATE_ALPHA_ENABLE (1 << 1) +# define R300_READ_ENABLE (1 << 2) +# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) + /* the following are shared between CBLEND and ABLEND */ # define R300_FCN_MASK (3 << 12) # define R300_COMB_FCN_ADD_CLAMP (0 << 12) @@ -1315,67 +2083,213 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_BLEND_MASK (63) # define R300_SRC_BLEND_SHIFT (16) # define R300_DST_BLEND_SHIFT (24) + +/* Constant color used by the blender. Pipelined through the blender. + * Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE, + * RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead. + */ #define R300_RB3D_BLEND_COLOR 0x4E10 -#define R300_RB3D_COLORMASK 0x4E0C -# define R300_COLORMASK0_B (1<<0) -# define R300_COLORMASK0_G (1<<1) -# define R300_COLORMASK0_R (1<<2) -# define R300_COLORMASK0_A (1<<3) -/* gap */ -#define R300_RB3D_COLOROFFSET0 0x4E28 -# define R300_COLOROFFSET_MASK 0xFFFFFFF0 /* GUESS */ -#define R300_RB3D_COLOROFFSET1 0x4E2C /* GUESS */ -#define R300_RB3D_COLOROFFSET2 0x4E30 /* GUESS */ -#define R300_RB3D_COLOROFFSET3 0x4E34 /* GUESS */ +/* 3D Color Channel Mask. If all the channels used in the current color format + * are disabled, then the cb will discard all the incoming quads. Pipelined + * through the blender. + */ +#define RB3D_COLOR_CHANNEL_MASK 0x4E0C +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 (1 << 0) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK0 (1 << 2) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1 (1 << 4) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK1 (1 << 6) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2 (1 << 8) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK2 (1 << 10) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3 (1 << 12) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK3 (1 << 14) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15) + +/* Clear color that is used when the color mask is set to 00. Unpipelined. + * Program this register with a 32-bit value in ARGB8888 or ARGB2101010 + * formats, ignoring the fields. + */ +#define RB3D_COLOR_CLEAR_VALUE 0x4e14 /* gap */ -/* Bit 16: Larger tiles +/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */ +#define RB3D_CLRCMP_CLR 0x4e20 + +/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */ +#define RB3D_CLRCMP_MSK 0x4e24 + +/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */ +#define R300_RB3D_COLOROFFSET0 0x4E28 +# define R300_COLOROFFSET_MASK 0xFFFFFFE0 +/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */ +#define R300_RB3D_COLOROFFSET1 0x4E2C +/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */ +#define R300_RB3D_COLOROFFSET2 0x4E30 +/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */ +#define R300_RB3D_COLOROFFSET3 0x4E34 + +/* Color buffer format and tiling control for all the multibuffers and the + * pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any + * of the registers are changed. + * + * Bit 16: Larger tiles * Bit 17: 4x2 tiles * Bit 18: Extremely weird tile like, but some pixels duplicated? */ #define R300_RB3D_COLORPITCH0 0x4E38 -# define R300_COLORPITCH_MASK 0x00001FF8 /* GUESS */ -# define R300_COLOR_TILE_ENABLE (1 << 16) /* GUESS */ -# define R300_COLOR_MICROTILE_ENABLE (1 << 17) /* GUESS */ -# define R300_COLOR_ENDIAN_NO_SWAP (0 << 18) /* GUESS */ -# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 18) /* GUESS */ -# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 18) /* GUESS */ -# define R300_COLOR_FORMAT_RGB565 (2 << 22) -# define R300_COLOR_FORMAT_ARGB8888 (3 << 22) -#define R300_RB3D_COLORPITCH1 0x4E3C /* GUESS */ -#define R300_RB3D_COLORPITCH2 0x4E40 /* GUESS */ -#define R300_RB3D_COLORPITCH3 0x4E44 /* GUESS */ +# define R300_COLORPITCH_MASK 0x00003FFE +# define R300_COLOR_TILE_DISABLE (0 << 16) +# define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_MICROTILE_DISABLE (0 << 17) +# define R300_COLOR_MICROTILE_ENABLE (1 << 17) +# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) +# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) +# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) +# define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19) +# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21) +# define R500_COLOR_FORMAT_UV1010 (1 << 21) +# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */ +# define R300_COLOR_FORMAT_ARGB1555 (3 << 21) +# define R300_COLOR_FORMAT_RGB565 (4 << 21) +# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21) +# define R300_COLOR_FORMAT_ARGB8888 (6 << 21) +# define R300_COLOR_FORMAT_ARGB32323232 (7 << 21) +/* reserved */ +# define R300_COLOR_FORMAT_I8 (9 << 21) +# define R300_COLOR_FORMAT_ARGB16161616 (10 << 21) +# define R300_COLOR_FORMAT_VYUY (11 << 21) +# define R300_COLOR_FORMAT_YVYU (12 << 21) +# define R300_COLOR_FORMAT_UV88 (13 << 21) +# define R500_COLOR_FORMAT_I10 (14 << 21) +# define R300_COLOR_FORMAT_ARGB4444 (15 << 21) +#define R300_RB3D_COLORPITCH1 0x4E3C +#define R300_RB3D_COLORPITCH2 0x4E40 +#define R300_RB3D_COLORPITCH3 0x4E44 /* gap */ -/* Guess by Vladimir. +/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then + * a flush or free will not occur upon a write to this register, but a sync + * will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE + * are zero but DC_FINISH is one, then a sync will be sent immediately -- the + * cb will not wait for all the previous operations to complete before sending + * the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to + * zero. + * * Set to 0A before 3D operations, set to 02 afterwards. */ -#define R300_RB3D_DSTCACHE_CTLSTAT 0x4E4C -# define R300_RB3D_DSTCACHE_UNKNOWN_02 0x00000002 -# define R300_RB3D_DSTCACHE_UNKNOWN_0A 0x0000000A +#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4) + +#define R300_RB3D_DITHER_CTL 0x4E50 +# define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0) +# define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND (1 << 0) +# define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT (2 << 0) +/* reserved */ +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE (0 << 2) +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND (1 << 2) +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2) +/* reserved */ + +/* Resolve buffer destination address. The cache must be empty before changing + * this register if the cb is in resolve mode. Unpipelined + */ +#define R300_RB3D_AARESOLVE_OFFSET 0x4e80 +# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5 +# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */ + +/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before + * changing this register if the cb is in resolve mode. Unpipelined + */ +#define R300_RB3D_AARESOLVE_PITCH 0x4e84 +# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1 +# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */ + +/* Resolve Buffer Control. Unpipelined */ +#define R300_RB3D_AARESOLVE_CTL 0x4e88 +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2) + + +/* Discard src pixels less than or equal to threshold. */ +#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0 +/* Discard src pixels greater than or equal to threshold. */ +#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000 + +/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */ +#define R300_RB3D_ROPCNTL 0x4e18 +# define R300_RB3D_ROPCNTL_ROP_ENABLE 0x00000004 +# define R300_RB3D_ROPCNTL_ROP_MASK (15 << 8) +# define R300_RB3D_ROPCNTL_ROP_SHIFT 8 + +/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */ +#define R300_RB3D_CLRCMP_FLIPE 0x4e1c + +/* Sets the fifo sizes */ +#define R500_RB3D_FIFO_SIZE 0x4ef4 +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0) + +/* Constant color used by the blender. Pipelined through the blender. */ +#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8 +# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16 + +/* Constant color used by the blender. Pipelined through the blender. */ +#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16 /* gap */ /* There seems to be no "write only" setting, so use Z-test = ALWAYS * for this. * Bit (1<<8) is the "test" bit. so plain write is 6 - vd */ -#define R300_RB3D_ZSTENCIL_CNTL_0 0x4F00 -# define R300_RB3D_Z_DISABLED_1 0x00000010 -# define R300_RB3D_Z_DISABLED_2 0x00000014 -# define R300_RB3D_Z_TEST 0x00000012 -# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016 -# define R300_RB3D_Z_WRITE_ONLY 0x00000006 - -# define R300_RB3D_Z_TEST 0x00000012 -# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016 -# define R300_RB3D_Z_WRITE_ONLY 0x00000006 -# define R300_RB3D_STENCIL_ENABLE 0x00000001 - -#define R300_RB3D_ZSTENCIL_CNTL_1 0x4F04 +#define R300_ZB_CNTL 0x4F00 +# define R300_STENCIL_ENABLE (1 << 0) +# define R300_Z_ENABLE (1 << 1) +# define R300_Z_WRITE_ENABLE (1 << 2) +# define R300_Z_SIGNED_COMPARE (1 << 3) +# define R300_STENCIL_FRONT_BACK (1 << 4) + +#define R300_ZB_ZSTENCILCNTL 0x4f04 /* functions */ # define R300_ZS_NEVER 0 # define R300_ZS_LESS 1 @@ -1395,162 +2309,344 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ZS_INVERT 5 # define R300_ZS_INCR_WRAP 6 # define R300_ZS_DECR_WRAP 7 +# define R300_Z_FUNC_SHIFT 0 /* front and back refer to operations done for front and back faces, i.e. separate stencil function support */ -# define R300_RB3D_ZS1_DEPTH_FUNC_SHIFT 0 -# define R300_RB3D_ZS1_FRONT_FUNC_SHIFT 3 -# define R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT 6 -# define R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT 9 -# define R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT 12 -# define R300_RB3D_ZS1_BACK_FUNC_SHIFT 15 -# define R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT 18 -# define R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT 21 -# define R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT 24 - -#define R300_RB3D_ZSTENCIL_CNTL_2 0x4F08 -# define R300_RB3D_ZS2_STENCIL_REF_SHIFT 0 -# define R300_RB3D_ZS2_STENCIL_MASK 0xFF -# define R300_RB3D_ZS2_STENCIL_MASK_SHIFT 8 -# define R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT 16 +# define R300_S_FRONT_FUNC_SHIFT 3 +# define R300_S_FRONT_SFAIL_OP_SHIFT 6 +# define R300_S_FRONT_ZPASS_OP_SHIFT 9 +# define R300_S_FRONT_ZFAIL_OP_SHIFT 12 +# define R300_S_BACK_FUNC_SHIFT 15 +# define R300_S_BACK_SFAIL_OP_SHIFT 18 +# define R300_S_BACK_ZPASS_OP_SHIFT 21 +# define R300_S_BACK_ZFAIL_OP_SHIFT 24 + +#define R300_ZB_STENCILREFMASK 0x4f08 +# define R300_STENCILREF_SHIFT 0 +# define R300_STENCILREF_MASK 0x000000ff +# define R300_STENCILMASK_SHIFT 8 +# define R300_STENCILMASK_MASK 0x0000ff00 +# define R300_STENCILWRITEMASK_SHIFT 16 +# define R300_STENCILWRITEMASK_MASK 0x00ff0000 /* gap */ -#define R300_RB3D_ZSTENCIL_FORMAT 0x4F10 -# define R300_DEPTH_FORMAT_16BIT_INT_Z (0 << 0) -# define R300_DEPTH_FORMAT_24BIT_INT_Z (2 << 0) - /* 16 bit format or some aditional bit ? */ -# define R300_DEPTH_FORMAT_UNK32 (32 << 0) +#define R300_ZB_FORMAT 0x4f10 +# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0) +# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0) +# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0) +/* reserved up to (15 << 0) */ +# define R300_INVERT_13E3_LEADING_ONES (0 << 4) +# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4) -#define R300_RB3D_EARLY_Z 0x4F14 -# define R300_EARLY_Z_DISABLE (0 << 0) -# define R300_EARLY_Z_ENABLE (1 << 0) +#define R300_ZB_ZTOP 0x4F14 +# define R300_ZTOP_DISABLE (0 << 0) +# define R300_ZTOP_ENABLE (1 << 0) /* gap */ -#define R300_RB3D_ZCACHE_CTLSTAT 0x4F18 /* GUESS */ -# define R300_RB3D_ZCACHE_UNKNOWN_01 0x1 -# define R300_RB3D_ZCACHE_UNKNOWN_03 0x3 +#define R300_ZB_ZCACHE_CTLSTAT 0x4f18 +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31) + +#define R300_ZB_BW_CNTL 0x4f1c +# define R300_HIZ_DISABLE (0 << 0) +# define R300_HIZ_ENABLE (1 << 0) +# define R300_HIZ_MIN (0 << 1) +# define R300_HIZ_MAX (1 << 1) +# define R300_FAST_FILL_DISABLE (0 << 2) +# define R300_FAST_FILL_ENABLE (1 << 2) +# define R300_RD_COMP_DISABLE (0 << 3) +# define R300_RD_COMP_ENABLE (1 << 3) +# define R300_WR_COMP_DISABLE (0 << 4) +# define R300_WR_COMP_ENABLE (1 << 4) +# define R300_ZB_CB_CLEAR_RMW (0 << 5) +# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5) +# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6) +# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6) + +# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7) +# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7) +# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8) +# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8) + +# define R500_BMASK_ENABLE (0 << 10) +# define R500_BMASK_DISABLE (1 << 10) +# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11) +# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11) +# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12) +# define R500_HIZ_FP_EXP_BITS_1 (1 << 12) +# define R500_HIZ_FP_EXP_BITS_2 (2 << 12) +# define R500_HIZ_FP_EXP_BITS_3 (3 << 12) +# define R500_HIZ_FP_EXP_BITS_4 (4 << 12) +# define R500_HIZ_FP_EXP_BITS_5 (5 << 12) +# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15) +# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15) +# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16) +# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16) +# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17) +# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17) +# define R500_PEQ_PACKING_DISABLE (0 << 18) +# define R500_PEQ_PACKING_ENABLE (1 << 18) +# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18) +# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18) + /* gap */ -#define R300_RB3D_DEPTHOFFSET 0x4F20 -#define R300_RB3D_DEPTHPITCH 0x4F24 -# define R300_DEPTHPITCH_MASK 0x00001FF8 /* GUESS */ -# define R300_DEPTH_TILE_ENABLE (1 << 16) /* GUESS */ -# define R300_DEPTH_MICROTILE_ENABLE (1 << 17) /* GUESS */ -# define R300_DEPTH_ENDIAN_NO_SWAP (0 << 18) /* GUESS */ -# define R300_DEPTH_ENDIAN_WORD_SWAP (1 << 18) /* GUESS */ -# define R300_DEPTH_ENDIAN_DWORD_SWAP (2 << 18) /* GUESS */ - -/* BEGIN: Vertex program instruction set */ - -/* Every instruction is four dwords long: - * DWORD 0: output and opcode - * DWORD 1: first argument - * DWORD 2: second argument - * DWORD 3: third argument - * - * Notes: - * - ABS r, a is implemented as MAX r, a, -a - * - MOV is implemented as ADD to zero - * - XPD is implemented as MUL + MAD - * - FLR is implemented as FRC + ADD - * - apparently, fglrx tries to schedule instructions so that there is at - * least one instruction between the write to a temporary and the first - * read from said temporary; however, violations of this scheduling are - * allowed - * - register indices seem to be unrelated with OpenGL aliasing to - * conventional state - * - only one attribute and one parameter can be loaded at a time; however, - * the same attribute/parameter can be used for more than one argument - * - the second software argument for POW is the third hardware argument - * (no idea why) - * - MAD with only temporaries as input seems to use VPI_OUT_SELECT_MAD_2 - * - * There is some magic surrounding LIT: - * The single argument is replicated across all three inputs, but swizzled: - * First argument: xyzy - * Second argument: xyzx - * Third argument: xyzw - * Whenever the result is used later in the fragment program, fglrx forces - * x and w to be 1.0 in the input selection; I don't know whether this is - * strictly necessary +/* Z Buffer Address Offset. + * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles. */ -#define R300_VPI_OUT_OP_DOT (1 << 0) -#define R300_VPI_OUT_OP_MUL (2 << 0) -#define R300_VPI_OUT_OP_ADD (3 << 0) -#define R300_VPI_OUT_OP_MAD (4 << 0) -#define R300_VPI_OUT_OP_DST (5 << 0) -#define R300_VPI_OUT_OP_FRC (6 << 0) -#define R300_VPI_OUT_OP_MAX (7 << 0) -#define R300_VPI_OUT_OP_MIN (8 << 0) -#define R300_VPI_OUT_OP_SGE (9 << 0) -#define R300_VPI_OUT_OP_SLT (10 << 0) - /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, vector(scalar, vector) */ -#define R300_VPI_OUT_OP_UNK12 (12 << 0) -#define R300_VPI_OUT_OP_ARL (13 << 0) -#define R300_VPI_OUT_OP_EXP (65 << 0) -#define R300_VPI_OUT_OP_LOG (66 << 0) - /* Used in fog computations, scalar(scalar) */ -#define R300_VPI_OUT_OP_UNK67 (67 << 0) -#define R300_VPI_OUT_OP_LIT (68 << 0) -#define R300_VPI_OUT_OP_POW (69 << 0) -#define R300_VPI_OUT_OP_RCP (70 << 0) -#define R300_VPI_OUT_OP_RSQ (72 << 0) - /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, scalar(scalar) */ -#define R300_VPI_OUT_OP_UNK73 (73 << 0) -#define R300_VPI_OUT_OP_EX2 (75 << 0) -#define R300_VPI_OUT_OP_LG2 (76 << 0) -#define R300_VPI_OUT_OP_MAD_2 (128 << 0) - /* all temps, vector(scalar, vector, vector) */ -#define R300_VPI_OUT_OP_UNK129 (129 << 0) - -#define R300_VPI_OUT_REG_CLASS_TEMPORARY (0 << 8) -#define R300_VPI_OUT_REG_CLASS_ADDR (1 << 8) -#define R300_VPI_OUT_REG_CLASS_RESULT (2 << 8) -#define R300_VPI_OUT_REG_CLASS_MASK (31 << 8) - -#define R300_VPI_OUT_REG_INDEX_SHIFT 13 - /* GUESS based on fglrx native limits */ -#define R300_VPI_OUT_REG_INDEX_MASK (31 << 13) - -#define R300_VPI_OUT_WRITE_X (1 << 20) -#define R300_VPI_OUT_WRITE_Y (1 << 21) -#define R300_VPI_OUT_WRITE_Z (1 << 22) -#define R300_VPI_OUT_WRITE_W (1 << 23) - -#define R300_VPI_IN_REG_CLASS_TEMPORARY (0 << 0) -#define R300_VPI_IN_REG_CLASS_ATTRIBUTE (1 << 0) -#define R300_VPI_IN_REG_CLASS_PARAMETER (2 << 0) -#define R300_VPI_IN_REG_CLASS_NONE (9 << 0) -#define R300_VPI_IN_REG_CLASS_MASK (31 << 0) - -#define R300_VPI_IN_REG_INDEX_SHIFT 5 - /* GUESS based on fglrx native limits */ -#define R300_VPI_IN_REG_INDEX_MASK (255 << 5) - -/* The R300 can select components from the input register arbitrarily. - * Use the following constants, shifted by the component shift you - * want to select +#define R300_ZB_DEPTHOFFSET 0x4f20 + +/* Z Buffer Pitch and Endian Control */ +#define R300_ZB_DEPTHPITCH 0x4f24 +# define R300_DEPTHPITCH_MASK 0x00003FFC +# define R300_DEPTHMACROTILE_DISABLE (0 << 16) +# define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMICROTILE_LINEAR (0 << 17) +# define R300_DEPTHMICROTILE_TILED (1 << 17) +# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHENDIAN_NO_SWAP (0 << 18) +# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) +# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) +# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18) + +/* Z Buffer Clear Value */ +#define R300_ZB_DEPTHCLEARVALUE 0x4f28 + +/* Hierarchical Z Memory Offset */ +#define R300_ZB_HIZ_OFFSET 0x4f44 + +/* Hierarchical Z Write Index */ +#define R300_ZB_HIZ_WRINDEX 0x4f48 + +/* Hierarchical Z Data */ +#define R300_ZB_HIZ_DWORD 0x4f4c + +/* Hierarchical Z Read Index */ +#define R300_ZB_HIZ_RDINDEX 0x4f50 + +/* Hierarchical Z Pitch */ +#define R300_ZB_HIZ_PITCH 0x4f54 + +/* Z Buffer Z Pass Counter Data */ +#define R300_ZB_ZPASS_DATA 0x4f58 + +/* Z Buffer Z Pass Counter Address */ +#define R300_ZB_ZPASS_ADDR 0x4f5c + +/* Depth buffer X and Y coordinate offset */ +#define R300_ZB_DEPTHXY_OFFSET 0x4f60 +# define R300_DEPTHX_OFFSET_SHIFT 1 +# define R300_DEPTHX_OFFSET_MASK 0x000007FE +# define R300_DEPTHY_OFFSET_SHIFT 17 +# define R300_DEPTHY_OFFSET_MASK 0x07FE0000 + +/* Sets the fifo sizes */ +#define R500_ZB_FIFO_SIZE 0x4fd0 +# define R500_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0) + +/* Stencil Reference Value and Mask for backfacing quads */ +/* R300_ZB_STENCILREFMASK handles front face */ +#define R500_ZB_STENCILREFMASK_BF 0x4fd4 +# define R500_STENCILREF_SHIFT 0 +# define R500_STENCILREF_MASK 0x000000ff +# define R500_STENCILMASK_SHIFT 8 +# define R500_STENCILMASK_MASK 0x0000ff00 +# define R500_STENCILWRITEMASK_SHIFT 16 +# define R500_STENCILWRITEMASK_MASK 0x00ff0000 + +/** + * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION + * + * The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector + * Engine instruction or a Math Engine instruction. */ -#define R300_VPI_IN_SELECT_X 0 -#define R300_VPI_IN_SELECT_Y 1 -#define R300_VPI_IN_SELECT_Z 2 -#define R300_VPI_IN_SELECT_W 3 -#define R300_VPI_IN_SELECT_ZERO 4 -#define R300_VPI_IN_SELECT_ONE 5 -#define R300_VPI_IN_SELECT_MASK 7 - -#define R300_VPI_IN_X_SHIFT 13 -#define R300_VPI_IN_Y_SHIFT 16 -#define R300_VPI_IN_Z_SHIFT 19 -#define R300_VPI_IN_W_SHIFT 22 - -#define R300_VPI_IN_NEG_X (1 << 25) -#define R300_VPI_IN_NEG_Y (1 << 26) -#define R300_VPI_IN_NEG_Z (1 << 27) -#define R300_VPI_IN_NEG_W (1 << 28) -/* END: Vertex program instruction set */ + +/*\{*/ + +enum { + /* R3XX */ + VECTOR_NO_OP = 0, + VE_DOT_PRODUCT = 1, + VE_MULTIPLY = 2, + VE_ADD = 3, + VE_MULTIPLY_ADD = 4, + VE_DISTANCE_VECTOR = 5, + VE_FRACTION = 6, + VE_MAXIMUM = 7, + VE_MINIMUM = 8, + VE_SET_GREATER_THAN_EQUAL = 9, + VE_SET_LESS_THAN = 10, + VE_MULTIPLYX2_ADD = 11, + VE_MULTIPLY_CLAMP = 12, + VE_FLT2FIX_DX = 13, + VE_FLT2FIX_DX_RND = 14, + /* R5XX */ + VE_PRED_SET_EQ_PUSH = 15, + VE_PRED_SET_GT_PUSH = 16, + VE_PRED_SET_GTE_PUSH = 17, + VE_PRED_SET_NEQ_PUSH = 18, + VE_COND_WRITE_EQ = 19, + VE_COND_WRITE_GT = 20, + VE_COND_WRITE_GTE = 21, + VE_COND_WRITE_NEQ = 22, + VE_COND_MUX_EQ = 23, + VE_COND_MUX_GT = 24, + VE_COND_MUX_GTE = 25, + VE_SET_GREATER_THAN = 26, + VE_SET_EQUAL = 27, + VE_SET_NOT_EQUAL = 28, +}; + +enum { + /* R3XX */ + MATH_NO_OP = 0, + ME_EXP_BASE2_DX = 1, + ME_LOG_BASE2_DX = 2, + ME_EXP_BASEE_FF = 3, + ME_LIGHT_COEFF_DX = 4, + ME_POWER_FUNC_FF = 5, + ME_RECIP_DX = 6, + ME_RECIP_FF = 7, + ME_RECIP_SQRT_DX = 8, + ME_RECIP_SQRT_FF = 9, + ME_MULTIPLY = 10, + ME_EXP_BASE2_FULL_DX = 11, + ME_LOG_BASE2_FULL_DX = 12, + ME_POWER_FUNC_FF_CLAMP_B = 13, + ME_POWER_FUNC_FF_CLAMP_B1 = 14, + ME_POWER_FUNC_FF_CLAMP_01 = 15, + ME_SIN = 16, + ME_COS = 17, + /* R5XX */ + ME_LOG_BASE2_IEEE = 18, + ME_RECIP_IEEE = 19, + ME_RECIP_SQRT_IEEE = 20, + ME_PRED_SET_EQ = 21, + ME_PRED_SET_GT = 22, + ME_PRED_SET_GTE = 23, + ME_PRED_SET_NEQ = 24, + ME_PRED_SET_CLR = 25, + ME_PRED_SET_INV = 26, + ME_PRED_SET_POP = 27, + ME_PRED_SET_RESTORE = 28, +}; + +enum { + /* R3XX */ + PVS_MACRO_OP_2CLK_MADD = 0, + PVS_MACRO_OP_2CLK_M2X_ADD = 1, +}; + +enum { + PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */ + PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */ + PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */ + PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */ +}; + +enum { + PVS_DST_REG_TEMPORARY = 0, /* Intermediate Storage */ + PVS_DST_REG_A0 = 1, /* Address Register Storage */ + PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */ + PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */ + PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */ + PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */ +}; + +enum { + PVS_SRC_SELECT_X = 0, /* Select X Component */ + PVS_SRC_SELECT_Y = 1, /* Select Y Component */ + PVS_SRC_SELECT_Z = 2, /* Select Z Component */ + PVS_SRC_SELECT_W = 3, /* Select W Component */ + PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */ + PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */ +}; + +/* PVS Opcode & Destination Operand Description */ + +enum { + PVS_DST_OPCODE_MASK = 0x3f, + PVS_DST_OPCODE_SHIFT = 0, + PVS_DST_MATH_INST_MASK = 0x1, + PVS_DST_MATH_INST_SHIFT = 6, + PVS_DST_MACRO_INST_MASK = 0x1, + PVS_DST_MACRO_INST_SHIFT = 7, + PVS_DST_REG_TYPE_MASK = 0xf, + PVS_DST_REG_TYPE_SHIFT = 8, + PVS_DST_ADDR_MODE_1_MASK = 0x1, + PVS_DST_ADDR_MODE_1_SHIFT = 12, + PVS_DST_OFFSET_MASK = 0x7f, + PVS_DST_OFFSET_SHIFT = 13, + PVS_DST_WE_X_MASK = 0x1, + PVS_DST_WE_X_SHIFT = 20, + PVS_DST_WE_Y_MASK = 0x1, + PVS_DST_WE_Y_SHIFT = 21, + PVS_DST_WE_Z_MASK = 0x1, + PVS_DST_WE_Z_SHIFT = 22, + PVS_DST_WE_W_MASK = 0x1, + PVS_DST_WE_W_SHIFT = 23, + PVS_DST_VE_SAT_MASK = 0x1, + PVS_DST_VE_SAT_SHIFT = 24, + PVS_DST_ME_SAT_MASK = 0x1, + PVS_DST_ME_SAT_SHIFT = 25, + PVS_DST_PRED_ENABLE_MASK = 0x1, + PVS_DST_PRED_ENABLE_SHIFT = 26, + PVS_DST_PRED_SENSE_MASK = 0x1, + PVS_DST_PRED_SENSE_SHIFT = 27, + PVS_DST_DUAL_MATH_OP_MASK = 0x3, + PVS_DST_DUAL_MATH_OP_SHIFT = 27, + PVS_DST_ADDR_SEL_MASK = 0x3, + PVS_DST_ADDR_SEL_SHIFT = 29, + PVS_DST_ADDR_MODE_0_MASK = 0x1, + PVS_DST_ADDR_MODE_0_SHIFT = 31, +}; + +/* PVS Source Operand Description */ + +enum { + PVS_SRC_REG_TYPE_MASK = 0x3, + PVS_SRC_REG_TYPE_SHIFT = 0, + SPARE_0_MASK = 0x1, + SPARE_0_SHIFT = 2, + PVS_SRC_ABS_XYZW_MASK = 0x1, + PVS_SRC_ABS_XYZW_SHIFT = 3, + PVS_SRC_ADDR_MODE_0_MASK = 0x1, + PVS_SRC_ADDR_MODE_0_SHIFT = 4, + PVS_SRC_OFFSET_MASK = 0xff, + PVS_SRC_OFFSET_SHIFT = 5, + PVS_SRC_SWIZZLE_X_MASK = 0x7, + PVS_SRC_SWIZZLE_X_SHIFT = 13, + PVS_SRC_SWIZZLE_Y_MASK = 0x7, + PVS_SRC_SWIZZLE_Y_SHIFT = 16, + PVS_SRC_SWIZZLE_Z_MASK = 0x7, + PVS_SRC_SWIZZLE_Z_SHIFT = 19, + PVS_SRC_SWIZZLE_W_MASK = 0x7, + PVS_SRC_SWIZZLE_W_SHIFT = 22, + PVS_SRC_MODIFIER_X_MASK = 0x1, + PVS_SRC_MODIFIER_X_SHIFT = 25, + PVS_SRC_MODIFIER_Y_MASK = 0x1, + PVS_SRC_MODIFIER_Y_SHIFT = 26, + PVS_SRC_MODIFIER_Z_MASK = 0x1, + PVS_SRC_MODIFIER_Z_SHIFT = 27, + PVS_SRC_MODIFIER_W_MASK = 0x1, + PVS_SRC_MODIFIER_W_SHIFT = 28, + PVS_SRC_ADDR_SEL_MASK = 0x3, + PVS_SRC_ADDR_SEL_SHIFT = 29, + PVS_SRC_ADDR_MODE_1_MASK = 0x0, + PVS_SRC_ADDR_MODE_1_SHIFT = 32, +}; + +/*\}*/ /* BEGIN: Packet 3 commands */ @@ -1583,13 +2679,511 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_PRIM_NUM_VERTICES_SHIFT 16 #define R300_PRIM_NUM_VERTICES_MASK 0xffff + + +/* + * The R500 unified shader (US) registers come in banks of 512 each, one + * for each instruction slot in the shader. You can't touch them directly. + * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive + * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the + * instruction is fully specified. + */ +#define R500_US_ALU_ALPHA_INST_0 0xa800 +# define R500_ALPHA_OP_MAD 0 +# define R500_ALPHA_OP_DP 1 +# define R500_ALPHA_OP_MIN 2 +# define R500_ALPHA_OP_MAX 3 +/* #define R500_ALPHA_OP_RESERVED 4 */ +# define R500_ALPHA_OP_CND 5 +# define R500_ALPHA_OP_CMP 6 +# define R500_ALPHA_OP_FRC 7 +# define R500_ALPHA_OP_EX2 8 +# define R500_ALPHA_OP_LN2 9 +# define R500_ALPHA_OP_RCP 10 +# define R500_ALPHA_OP_RSQ 11 +# define R500_ALPHA_OP_SIN 12 +# define R500_ALPHA_OP_COS 13 +# define R500_ALPHA_OP_MDH 14 +# define R500_ALPHA_OP_MDV 15 +# define R500_ALPHA_ADDRD(x) (x << 4) +# define R500_ALPHA_ADDRD_REL (1 << 11) +# define R500_ALPHA_SEL_A_SHIFT 12 +# define R500_ALPHA_SEL_A_SRC0 (0 << 12) +# define R500_ALPHA_SEL_A_SRC1 (1 << 12) +# define R500_ALPHA_SEL_A_SRC2 (2 << 12) +# define R500_ALPHA_SEL_A_SRCP (3 << 12) +# define R500_ALPHA_SWIZ_A_R (0 << 14) +# define R500_ALPHA_SWIZ_A_G (1 << 14) +# define R500_ALPHA_SWIZ_A_B (2 << 14) +# define R500_ALPHA_SWIZ_A_A (3 << 14) +# define R500_ALPHA_SWIZ_A_0 (4 << 14) +# define R500_ALPHA_SWIZ_A_HALF (5 << 14) +# define R500_ALPHA_SWIZ_A_1 (6 << 14) +/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */ +# define R500_ALPHA_MOD_A_NOP (0 << 17) +# define R500_ALPHA_MOD_A_NEG (1 << 17) +# define R500_ALPHA_MOD_A_ABS (2 << 17) +# define R500_ALPHA_MOD_A_NAB (3 << 17) +# define R500_ALPHA_SEL_B_SHIFT 19 +# define R500_ALPHA_SEL_B_SRC0 (0 << 19) +# define R500_ALPHA_SEL_B_SRC1 (1 << 19) +# define R500_ALPHA_SEL_B_SRC2 (2 << 19) +# define R500_ALPHA_SEL_B_SRCP (3 << 19) +# define R500_ALPHA_SWIZ_B_R (0 << 21) +# define R500_ALPHA_SWIZ_B_G (1 << 21) +# define R500_ALPHA_SWIZ_B_B (2 << 21) +# define R500_ALPHA_SWIZ_B_A (3 << 21) +# define R500_ALPHA_SWIZ_B_0 (4 << 21) +# define R500_ALPHA_SWIZ_B_HALF (5 << 21) +# define R500_ALPHA_SWIZ_B_1 (6 << 21) +/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALPHA_MOD_B_NOP (0 << 24) +# define R500_ALPHA_MOD_B_NEG (1 << 24) +# define R500_ALPHA_MOD_B_ABS (2 << 24) +# define R500_ALPHA_MOD_B_NAB (3 << 24) +# define R500_ALPHA_OMOD_IDENTITY (0 << 26) +# define R500_ALPHA_OMOD_MUL_2 (1 << 26) +# define R500_ALPHA_OMOD_MUL_4 (2 << 26) +# define R500_ALPHA_OMOD_MUL_8 (3 << 26) +# define R500_ALPHA_OMOD_DIV_2 (4 << 26) +# define R500_ALPHA_OMOD_DIV_4 (5 << 26) +# define R500_ALPHA_OMOD_DIV_8 (6 << 26) +# define R500_ALPHA_OMOD_DISABLE (7 << 26) +# define R500_ALPHA_TARGET(x) (x << 29) +# define R500_ALPHA_W_OMASK (1 << 31) +#define R500_US_ALU_ALPHA_ADDR_0 0x9800 +# define R500_ALPHA_ADDR0(x) (x << 0) +# define R500_ALPHA_ADDR0_CONST (1 << 8) +# define R500_ALPHA_ADDR0_REL (1 << 9) +# define R500_ALPHA_ADDR1(x) (x << 10) +# define R500_ALPHA_ADDR1_CONST (1 << 18) +# define R500_ALPHA_ADDR1_REL (1 << 19) +# define R500_ALPHA_ADDR2(x) (x << 20) +# define R500_ALPHA_ADDR2_CONST (1 << 28) +# define R500_ALPHA_ADDR2_REL (1 << 29) +# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) +# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30) +# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30) +# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30) +#define R500_US_ALU_RGBA_INST_0 0xb000 +# define R500_ALU_RGBA_OP_MAD (0 << 0) +# define R500_ALU_RGBA_OP_DP3 (1 << 0) +# define R500_ALU_RGBA_OP_DP4 (2 << 0) +# define R500_ALU_RGBA_OP_D2A (3 << 0) +# define R500_ALU_RGBA_OP_MIN (4 << 0) +# define R500_ALU_RGBA_OP_MAX (5 << 0) +/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */ +# define R500_ALU_RGBA_OP_CND (7 << 0) +# define R500_ALU_RGBA_OP_CMP (8 << 0) +# define R500_ALU_RGBA_OP_FRC (9 << 0) +# define R500_ALU_RGBA_OP_SOP (10 << 0) +# define R500_ALU_RGBA_OP_MDH (11 << 0) +# define R500_ALU_RGBA_OP_MDV (12 << 0) +# define R500_ALU_RGBA_ADDRD(x) (x << 4) +# define R500_ALU_RGBA_ADDRD_REL (1 << 11) +# define R500_ALU_RGBA_SEL_C_SHIFT 12 +# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) +# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12) +# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12) +# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12) +# define R500_ALU_RGBA_R_SWIZ_R (0 << 14) +# define R500_ALU_RGBA_R_SWIZ_G (1 << 14) +# define R500_ALU_RGBA_R_SWIZ_B (2 << 14) +# define R500_ALU_RGBA_R_SWIZ_A (3 << 14) +# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14) +# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14) +# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14) +/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */ +# define R500_ALU_RGBA_G_SWIZ_R (0 << 17) +# define R500_ALU_RGBA_G_SWIZ_G (1 << 17) +# define R500_ALU_RGBA_G_SWIZ_B (2 << 17) +# define R500_ALU_RGBA_G_SWIZ_A (3 << 17) +# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17) +# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17) +# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17) +/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */ +# define R500_ALU_RGBA_B_SWIZ_R (0 << 20) +# define R500_ALU_RGBA_B_SWIZ_G (1 << 20) +# define R500_ALU_RGBA_B_SWIZ_B (2 << 20) +# define R500_ALU_RGBA_B_SWIZ_A (3 << 20) +# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20) +# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20) +# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20) +/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */ +# define R500_ALU_RGBA_MOD_C_NOP (0 << 23) +# define R500_ALU_RGBA_MOD_C_NEG (1 << 23) +# define R500_ALU_RGBA_MOD_C_ABS (2 << 23) +# define R500_ALU_RGBA_MOD_C_NAB (3 << 23) +# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25 +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25) +# define R500_ALU_RGBA_A_SWIZ_R (0 << 27) +# define R500_ALU_RGBA_A_SWIZ_G (1 << 27) +# define R500_ALU_RGBA_A_SWIZ_B (2 << 27) +# define R500_ALU_RGBA_A_SWIZ_A (3 << 27) +# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27) +# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27) +# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27) +/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */ +# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30) +#define R500_US_ALU_RGB_INST_0 0xa000 +# define R500_ALU_RGB_SEL_A_SHIFT 0 +# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0) +# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0) +# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0) +# define R500_ALU_RGB_SEL_A_SRCP (3 << 0) +# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2) +# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2) +# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2) +# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2) +# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2) +# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2) +# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2) +/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */ +# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5) +# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5) +# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5) +# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5) +# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5) +# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5) +# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5) +/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */ +# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8) +# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8) +# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8) +# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8) +# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8) +# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8) +# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8) +/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */ +# define R500_ALU_RGB_MOD_A_NOP (0 << 11) +# define R500_ALU_RGB_MOD_A_NEG (1 << 11) +# define R500_ALU_RGB_MOD_A_ABS (2 << 11) +# define R500_ALU_RGB_MOD_A_NAB (3 << 11) +# define R500_ALU_RGB_SEL_B_SHIFT 13 +# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13) +# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13) +# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13) +# define R500_ALU_RGB_SEL_B_SRCP (3 << 13) +# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15) +# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15) +# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15) +# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15) +# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15) +# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15) +# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15) +/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */ +# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18) +# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18) +# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18) +# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18) +# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18) +# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18) +# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18) +/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */ +# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21) +# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21) +# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21) +# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21) +# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21) +# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21) +# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21) +/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALU_RGB_MOD_B_NOP (0 << 24) +# define R500_ALU_RGB_MOD_B_NEG (1 << 24) +# define R500_ALU_RGB_MOD_B_ABS (2 << 24) +# define R500_ALU_RGB_MOD_B_NAB (3 << 24) +# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26) +# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26) +# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26) +# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26) +# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26) +# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26) +# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26) +# define R500_ALU_RGB_OMOD_DISABLE (7 << 26) +# define R500_ALU_RGB_TARGET(x) (x << 29) +# define R500_ALU_RGB_WMASK (1 << 31) +#define R500_US_ALU_RGB_ADDR_0 0x9000 +# define R500_RGB_ADDR0(x) (x << 0) +# define R500_RGB_ADDR0_CONST (1 << 8) +# define R500_RGB_ADDR0_REL (1 << 9) +# define R500_RGB_ADDR1(x) (x << 10) +# define R500_RGB_ADDR1_CONST (1 << 18) +# define R500_RGB_ADDR1_REL (1 << 19) +# define R500_RGB_ADDR2(x) (x << 20) +# define R500_RGB_ADDR2_CONST (1 << 28) +# define R500_RGB_ADDR2_REL (1 << 29) +# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) +# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30) +# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30) +# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30) +#define R500_US_CMN_INST_0 0xb800 +# define R500_INST_TYPE_MASK (3 << 0) +# define R500_INST_TYPE_ALU (0 << 0) +# define R500_INST_TYPE_OUT (1 << 0) +# define R500_INST_TYPE_FC (2 << 0) +# define R500_INST_TYPE_TEX (3 << 0) +# define R500_INST_TEX_SEM_WAIT (1 << 2) +# define R500_INST_RGB_PRED_SEL_NONE (0 << 3) +# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3) +# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3) +# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3) +# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3) +# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3) +# define R500_INST_RGB_PRED_INV (1 << 6) +# define R500_INST_WRITE_INACTIVE (1 << 7) +# define R500_INST_LAST (1 << 8) +# define R500_INST_NOP (1 << 9) +# define R500_INST_ALU_WAIT (1 << 10) +# define R500_INST_RGB_WMASK_R (1 << 11) +# define R500_INST_RGB_WMASK_G (1 << 12) +# define R500_INST_RGB_WMASK_B (1 << 13) +# define R500_INST_ALPHA_WMASK (1 << 14) +# define R500_INST_RGB_OMASK_R (1 << 15) +# define R500_INST_RGB_OMASK_G (1 << 16) +# define R500_INST_RGB_OMASK_B (1 << 17) +# define R500_INST_ALPHA_OMASK (1 << 18) +# define R500_INST_RGB_CLAMP (1 << 19) +# define R500_INST_ALPHA_CLAMP (1 << 20) +# define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALPHA_PRED_INV (1 << 22) +# define R500_INST_ALU_RESULT_OP_EQ (0 << 23) +# define R500_INST_ALU_RESULT_OP_LT (1 << 23) +# define R500_INST_ALU_RESULT_OP_GE (2 << 23) +# define R500_INST_ALU_RESULT_OP_NE (3 << 23) +# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25) +# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25) +# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25) +# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25) +# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25) +# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25) +/* XXX next four are kind of guessed */ +# define R500_INST_STAT_WE_R (1 << 28) +# define R500_INST_STAT_WE_G (1 << 29) +# define R500_INST_STAT_WE_B (1 << 30) +# define R500_INST_STAT_WE_A (1 << 31) + +/* note that these are 8 bit lengths, despite the offsets, at least for R500 */ +#define R500_US_CODE_ADDR 0x4630 +# define R500_US_CODE_START_ADDR(x) (x << 0) +# define R500_US_CODE_END_ADDR(x) (x << 16) +#define R500_US_CODE_OFFSET 0x4638 +# define R500_US_CODE_OFFSET_ADDR(x) (x << 0) +#define R500_US_CODE_RANGE 0x4634 +# define R500_US_CODE_RANGE_ADDR(x) (x << 0) +# define R500_US_CODE_RANGE_SIZE(x) (x << 16) +#define R500_US_CONFIG 0x4600 +# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) +#define R500_US_FC_ADDR_0 0xa000 +# define R500_FC_BOOL_ADDR(x) (x << 0) +# define R500_FC_INT_ADDR(x) (x << 8) +# define R500_FC_JUMP_ADDR(x) (x << 16) +# define R500_FC_JUMP_GLOBAL (1 << 31) +#define R500_US_FC_BOOL_CONST 0x4620 +# define R500_FC_KBOOL(x) (x) +#define R500_US_FC_CTRL 0x4624 +# define R500_FC_TEST_EN (1 << 30) +# define R500_FC_FULL_FC_EN (1 << 31) +#define R500_US_FC_INST_0 0x9800 +# define R500_FC_OP_JUMP (0 << 0) +# define R500_FC_OP_LOOP (1 << 0) +# define R500_FC_OP_ENDLOOP (2 << 0) +# define R500_FC_OP_REP (3 << 0) +# define R500_FC_OP_ENDREP (4 << 0) +# define R500_FC_OP_BREAKLOOP (5 << 0) +# define R500_FC_OP_BREAKREP (6 << 0) +# define R500_FC_OP_CONTINUE (7 << 0) +# define R500_FC_B_ELSE (1 << 4) +# define R500_FC_JUMP_ANY (1 << 5) +# define R500_FC_A_OP_NONE (0 << 6) +# define R500_FC_A_OP_POP (1 << 6) +# define R500_FC_A_OP_PUSH (2 << 6) +# define R500_FC_JUMP_FUNC(x) (x << 8) +# define R500_FC_B_POP_CNT(x) (x << 16) +# define R500_FC_B_OP0_NONE (0 << 24) +# define R500_FC_B_OP0_DECR (1 << 24) +# define R500_FC_B_OP0_INCR (2 << 24) +# define R500_FC_B_OP1_DECR (0 << 26) +# define R500_FC_B_OP1_NONE (1 << 26) +# define R500_FC_B_OP1_INCR (2 << 26) +# define R500_FC_IGNORE_UNCOVERED (1 << 28) +#define R500_US_FC_INT_CONST_0 0x4c00 +# define R500_FC_INT_CONST_KR(x) (x << 0) +# define R500_FC_INT_CONST_KG(x) (x << 8) +# define R500_FC_INT_CONST_KB(x) (x << 16) +/* _0 through _15 */ +#define R500_US_FORMAT0_0 0x4640 +# define R500_FORMAT_TXWIDTH(x) (x << 0) +# define R500_FORMAT_TXHEIGHT(x) (x << 11) +# define R500_FORMAT_TXDEPTH(x) (x << 22) +/* _0 through _3 */ +#define R500_US_OUT_FMT_0 0x46a4 +# define R500_OUT_FMT_C4_8 (0 << 0) +# define R500_OUT_FMT_C4_10 (1 << 0) +# define R500_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R500_OUT_FMT_C_16 (3 << 0) +# define R500_OUT_FMT_C2_16 (4 << 0) +# define R500_OUT_FMT_C4_16 (5 << 0) +# define R500_OUT_FMT_C_16_MPEG (6 << 0) +# define R500_OUT_FMT_C2_16_MPEG (7 << 0) +# define R500_OUT_FMT_C2_4 (8 << 0) +# define R500_OUT_FMT_C_3_3_2 (9 << 0) +# define R500_OUT_FMT_C_6_5_6 (10 << 0) +# define R500_OUT_FMT_C_11_11_10 (11 << 0) +# define R500_OUT_FMT_C_10_11_11 (12 << 0) +# define R500_OUT_FMT_C_2_10_10_10 (13 << 0) +/* #define R500_OUT_FMT_RESERVED (14 << 0) */ +# define R500_OUT_FMT_UNUSED (15 << 0) +# define R500_OUT_FMT_C_16_FP (16 << 0) +# define R500_OUT_FMT_C2_16_FP (17 << 0) +# define R500_OUT_FMT_C4_16_FP (18 << 0) +# define R500_OUT_FMT_C_32_FP (19 << 0) +# define R500_OUT_FMT_C2_32_FP (20 << 0) +# define R500_OUT_FMT_C4_32_FP (21 << 0) +# define R500_C0_SEL_A (0 << 8) +# define R500_C0_SEL_R (1 << 8) +# define R500_C0_SEL_G (2 << 8) +# define R500_C0_SEL_B (3 << 8) +# define R500_C1_SEL_A (0 << 10) +# define R500_C1_SEL_R (1 << 10) +# define R500_C1_SEL_G (2 << 10) +# define R500_C1_SEL_B (3 << 10) +# define R500_C2_SEL_A (0 << 12) +# define R500_C2_SEL_R (1 << 12) +# define R500_C2_SEL_G (2 << 12) +# define R500_C2_SEL_B (3 << 12) +# define R500_C3_SEL_A (0 << 14) +# define R500_C3_SEL_R (1 << 14) +# define R500_C3_SEL_G (2 << 14) +# define R500_C3_SEL_B (3 << 14) +# define R500_OUT_SIGN(x) (x << 16) +# define R500_ROUND_ADJ (1 << 20) +#define R500_US_PIXSIZE 0x4604 +# define R500_PIX_SIZE(x) (x) +#define R500_US_TEX_ADDR_0 0x9800 +# define R500_TEX_SRC_ADDR(x) (x << 0) +# define R500_TEX_SRC_ADDR_REL (1 << 7) +# define R500_TEX_SRC_S_SWIZ_R (0 << 8) +# define R500_TEX_SRC_S_SWIZ_G (1 << 8) +# define R500_TEX_SRC_S_SWIZ_B (2 << 8) +# define R500_TEX_SRC_S_SWIZ_A (3 << 8) +# define R500_TEX_SRC_T_SWIZ_R (0 << 10) +# define R500_TEX_SRC_T_SWIZ_G (1 << 10) +# define R500_TEX_SRC_T_SWIZ_B (2 << 10) +# define R500_TEX_SRC_T_SWIZ_A (3 << 10) +# define R500_TEX_SRC_R_SWIZ_R (0 << 12) +# define R500_TEX_SRC_R_SWIZ_G (1 << 12) +# define R500_TEX_SRC_R_SWIZ_B (2 << 12) +# define R500_TEX_SRC_R_SWIZ_A (3 << 12) +# define R500_TEX_SRC_Q_SWIZ_R (0 << 14) +# define R500_TEX_SRC_Q_SWIZ_G (1 << 14) +# define R500_TEX_SRC_Q_SWIZ_B (2 << 14) +# define R500_TEX_SRC_Q_SWIZ_A (3 << 14) +# define R500_TEX_DST_ADDR(x) (x << 16) +# define R500_TEX_DST_ADDR_REL (1 << 23) +# define R500_TEX_DST_R_SWIZ_R (0 << 24) +# define R500_TEX_DST_R_SWIZ_G (1 << 24) +# define R500_TEX_DST_R_SWIZ_B (2 << 24) +# define R500_TEX_DST_R_SWIZ_A (3 << 24) +# define R500_TEX_DST_G_SWIZ_R (0 << 26) +# define R500_TEX_DST_G_SWIZ_G (1 << 26) +# define R500_TEX_DST_G_SWIZ_B (2 << 26) +# define R500_TEX_DST_G_SWIZ_A (3 << 26) +# define R500_TEX_DST_B_SWIZ_R (0 << 28) +# define R500_TEX_DST_B_SWIZ_G (1 << 28) +# define R500_TEX_DST_B_SWIZ_B (2 << 28) +# define R500_TEX_DST_B_SWIZ_A (3 << 28) +# define R500_TEX_DST_A_SWIZ_R (0 << 30) +# define R500_TEX_DST_A_SWIZ_G (1 << 30) +# define R500_TEX_DST_A_SWIZ_B (2 << 30) +# define R500_TEX_DST_A_SWIZ_A (3 << 30) +#define R500_US_TEX_ADDR_DXDY_0 0xa000 +# define R500_DX_ADDR(x) (x << 0) +# define R500_DX_ADDR_REL (1 << 7) +# define R500_DX_S_SWIZ_R (0 << 8) +# define R500_DX_S_SWIZ_G (1 << 8) +# define R500_DX_S_SWIZ_B (2 << 8) +# define R500_DX_S_SWIZ_A (3 << 8) +# define R500_DX_T_SWIZ_R (0 << 10) +# define R500_DX_T_SWIZ_G (1 << 10) +# define R500_DX_T_SWIZ_B (2 << 10) +# define R500_DX_T_SWIZ_A (3 << 10) +# define R500_DX_R_SWIZ_R (0 << 12) +# define R500_DX_R_SWIZ_G (1 << 12) +# define R500_DX_R_SWIZ_B (2 << 12) +# define R500_DX_R_SWIZ_A (3 << 12) +# define R500_DX_Q_SWIZ_R (0 << 14) +# define R500_DX_Q_SWIZ_G (1 << 14) +# define R500_DX_Q_SWIZ_B (2 << 14) +# define R500_DX_Q_SWIZ_A (3 << 14) +# define R500_DY_ADDR(x) (x << 16) +# define R500_DY_ADDR_REL (1 << 17) +# define R500_DY_S_SWIZ_R (0 << 24) +# define R500_DY_S_SWIZ_G (1 << 24) +# define R500_DY_S_SWIZ_B (2 << 24) +# define R500_DY_S_SWIZ_A (3 << 24) +# define R500_DY_T_SWIZ_R (0 << 26) +# define R500_DY_T_SWIZ_G (1 << 26) +# define R500_DY_T_SWIZ_B (2 << 26) +# define R500_DY_T_SWIZ_A (3 << 26) +# define R500_DY_R_SWIZ_R (0 << 28) +# define R500_DY_R_SWIZ_G (1 << 28) +# define R500_DY_R_SWIZ_B (2 << 28) +# define R500_DY_R_SWIZ_A (3 << 28) +# define R500_DY_Q_SWIZ_R (0 << 30) +# define R500_DY_Q_SWIZ_G (1 << 30) +# define R500_DY_Q_SWIZ_B (2 << 30) +# define R500_DY_Q_SWIZ_A (3 << 30) +#define R500_US_TEX_INST_0 0x9000 +# define R500_TEX_ID(x) (x << 16) +# define R500_TEX_INST_NOP (0 << 22) +# define R500_TEX_INST_LD (1 << 22) +# define R500_TEX_INST_TEXKILL (2 << 22) +# define R500_TEX_INST_PROJ (3 << 22) +# define R500_TEX_INST_LODBIAS (4 << 22) +# define R500_TEX_INST_LOD (5 << 22) +# define R500_TEX_INST_DXDY (6 << 22) +# define R500_TEX_SEM_ACQUIRE (1 << 25) +# define R500_TEX_IGNORE_UNCOVERED (1 << 26) +# define R500_TEX_UNSCALED (1 << 27) +#define R300_US_W_FMT 0x46b4 +# define R300_W_FMT_W0 (0 << 0) +# define R300_W_FMT_W24 (1 << 0) +# define R300_W_FMT_W24FP (2 << 0) +# define R300_W_SRC_US (0 << 2) +# define R300_W_SRC_RAS (1 << 2) + + /* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. * Two parameter dwords: - * 0. The first parameter appears to be always 0 - * 1. The second parameter is a standard primitive emission dword. + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. */ #define R300_PACKET3_3D_DRAW_VBUF 0x00002800 +/* Draw a primitive from immediate vertices in this packet + * Up to 16382 dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + * 2 to end: Up to 16380 dwords of vertex data. + */ +#define R300_PACKET3_3D_DRAW_IMMD 0x00002900 + +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and + * immediate vertices in this packet + * Up to 16382 dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + * 2 to end: Up to 16380 dwords of vertex data. + */ +#define R300_PACKET3_3D_DRAW_INDX 0x00002A00 + + /* Specify the full set of vertex arrays as (address, stride). * The first parameter is the number of vertex arrays specified. * The rest of the command is a variable length list of blocks, where @@ -1610,8 +3204,29 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_EB_UNK1_SHIFT 24 # define R300_EB_UNK1 (0x80<<24) # define R300_EB_UNK2 0x0810 + +/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400 +/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_IMMD_2 0x00003500 +/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */ #define R300_PACKET3_3D_DRAW_INDX_2 0x00003600 +/* Clears a portion of hierachical Z RAM + * 3 dword parameters + * 0. START + * 1. COUNT: 13:0 (max is 0x3FFF) + * 2. CLEAR_VALUE: Value to write into HIZ RAM. + */ +#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700 + +/* Draws a set of primitives using vertex buffers pointed by the state data. + * At least 2 Parameters: + * 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword. + * 2 to end: Data or indices (see other 3D_DRAW_* packets for details) + */ +#define R300_PACKET3_3D_DRAW_128 0x00003900 + /* END: Packet 3 commands */ @@ -1633,3 +3248,5 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #endif /* _R300_REG_H */ /* *INDENT-ON* */ + +/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */ diff --git a/r300/r300_render.c b/r300/r300_render.c index cc13e9a..0a199e6 100644 --- a/r300/r300_render.c +++ b/r300/r300_render.c @@ -45,6 +45,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * obviously this does work... Further investigation is needed. * * \author Nicolai Haehnle <prefect_@gmx.net> + * + * \todo Add immediate implementation back? Perhaps this is useful if there are + * no bugs... */ #include "glheader.h" @@ -71,12 +74,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_reg.h" #include "r300_tex.h" #include "r300_emit.h" +#include "r300_fragprog.h" extern int future_hw_tcl_on; /** * \brief Convert a OpenGL primitive type into a R300 primitive type. */ -static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim) +int r300PrimitiveType(r300ContextPtr rmesa, int prim) { switch (prim & PRIM_MODE_MASK) { case GL_POINTS: @@ -116,7 +120,7 @@ static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim) } } -static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) +int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) { int verts_off = 0; @@ -168,16 +172,13 @@ static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) return num_verts - verts_off; } -static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, - int elt_size) +static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) { r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_dma_region *rvb = &rmesa->state.elt_dma; void *out; - assert(elt_size == 2 || elt_size == 4); - - if (r300IsGartMemory(rmesa, elts, n_elts * elt_size)) { + if (r300IsGartMemory(rmesa, elts, n_elts * 4)) { rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; rvb->start = ((char *)elts) - rvb->address; rvb->aos_offset = @@ -189,66 +190,27 @@ static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, _mesa_exit(-1); } - r300AllocDmaRegion(rmesa, rvb, n_elts * elt_size, elt_size); + r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4); rvb->aos_offset = GET_START(rvb); out = rvb->address + rvb->start; - memcpy(out, elts, n_elts * elt_size); + memcpy(out, elts, n_elts * 4); } static void r300FireEB(r300ContextPtr rmesa, unsigned long addr, - int vertex_count, int type, int elt_size) + int vertex_count, int type) { int cmd_reserved = 0; int cmd_written = 0; drm_radeon_cmd_header_t *cmd = NULL; - unsigned long t_addr; - unsigned long magic_1, magic_2; - - assert(elt_size == 2 || elt_size == 4); - - if (addr & (elt_size - 1)) { - WARN_ONCE("Badly aligned buffer\n"); - return; - } - - magic_1 = (addr % 32) / 4; - t_addr = addr & ~0x1d; - magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1; - start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); - if (elt_size == 4) { - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - (vertex_count << 16) | type | - R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - (vertex_count << 16) | type); - } + start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0); + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); -#ifdef OPTIMIZE_ELTS - if (elt_size == 4) { - e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); - e32(addr); - } else { - e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2); - e32(t_addr); - } -#else + start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2); e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); e32(addr); -#endif - - if (elt_size == 4) { - e32(vertex_count); - } else { -#ifdef OPTIMIZE_ELTS - e32(magic_2); -#else - e32((vertex_count + 1) / 2); -#endif - } + e32(vertex_count); } static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) @@ -263,26 +225,23 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, offset); - start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1); + start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1); e32(nr); + for (i = 0; i + 1 < nr; i += 2) { - e32((rmesa->state.aos[i].aos_size << 0) - | (rmesa->state.aos[i].aos_stride << 8) - | (rmesa->state.aos[i + 1].aos_size << 16) - | (rmesa->state.aos[i + 1].aos_stride << 24) - ); - e32(rmesa->state.aos[i].aos_offset + - offset * 4 * rmesa->state.aos[i].aos_stride); - e32(rmesa->state.aos[i + 1].aos_offset + - offset * 4 * rmesa->state.aos[i + 1].aos_stride); + e32((rmesa->state.aos[i].aos_size << 0) | + (rmesa->state.aos[i].aos_stride << 8) | + (rmesa->state.aos[i + 1].aos_size << 16) | + (rmesa->state.aos[i + 1].aos_stride << 24)); + + e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride); + e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride); } if (nr & 1) { - e32((rmesa->state.aos[nr - 1].aos_size << 0) - | (rmesa->state.aos[nr - 1].aos_stride << 8) - ); - e32(rmesa->state.aos[nr - 1].aos_offset + - offset * 4 * rmesa->state.aos[nr - 1].aos_stride); + e32((rmesa->state.aos[nr - 1].aos_size << 0) | + (rmesa->state.aos[nr - 1].aos_stride << 8)); + e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride); } } @@ -292,130 +251,78 @@ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) int cmd_written = 0; drm_radeon_cmd_header_t *cmd = NULL; - start_packet3(RADEON_CP_PACKET3_3D_DRAW_VBUF_2, 0); - e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) - | type); + start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); + e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); } static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, int start, int end, int prim) { int type, num_verts; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; - type = r300PrimitiveType(rmesa, ctx, prim); + type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); if (type < 0 || num_verts <= 0) return; - if (rmesa->state.VB.Elts) { - r300EmitAOS(rmesa, rmesa->state.aos_count, start); + if (vb->Elts) { if (num_verts > 65535) { /* not implemented yet */ WARN_ONCE("Too many elts\n"); return; } - r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, - rmesa->state.VB.elt_size); - r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, - num_verts, type, rmesa->state.VB.elt_size); + /* Note: The following is incorrect, but it's the best I can do + * without a major refactoring of how DMA memory is handled. + * The problem: Ensuring that both vertex arrays *and* index + * arrays are at the right position, and then ensuring that + * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted + * at once. + * + * So why is the following incorrect? Well, it seems like + * allocating the index array might actually evict the vertex + * arrays. *sigh* + */ + r300EmitElts(ctx, vb->Elts, num_verts); + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type); } else { r300EmitAOS(rmesa, rmesa->state.aos_count, start); r300FireAOS(rmesa, num_verts, type); } } -#define CONV_VB(a, b) rvb->AttribPtr[(a)].size = vb->b->size, \ - rvb->AttribPtr[(a)].type = GL_FLOAT, \ - rvb->AttribPtr[(a)].stride = vb->b->stride, \ - rvb->AttribPtr[(a)].data = vb->b->data - -static void radeon_vb_to_rvb(r300ContextPtr rmesa, - struct radeon_vertex_buffer *rvb, - struct vertex_buffer *vb) -{ - int i; - GLcontext *ctx; - ctx = rmesa->radeon.glCtx; - - memset(rvb, 0, sizeof(*rvb)); - - rvb->Elts = vb->Elts; - rvb->elt_size = 4; - rvb->elt_min = 0; - rvb->elt_max = vb->Count; - - rvb->Count = vb->Count; - - if (hw_tcl_on) { - CONV_VB(VERT_ATTRIB_POS, ObjPtr); - } else { - assert(vb->ClipPtr); - CONV_VB(VERT_ATTRIB_POS, ClipPtr); - } - - CONV_VB(VERT_ATTRIB_NORMAL, NormalPtr); - CONV_VB(VERT_ATTRIB_COLOR0, ColorPtr[0]); - CONV_VB(VERT_ATTRIB_COLOR1, SecondaryColorPtr[0]); - CONV_VB(VERT_ATTRIB_FOG, FogCoordPtr); - - for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) - CONV_VB(VERT_ATTRIB_TEX0 + i, TexCoordPtr[i]); - - for (i = 0; i < MAX_VERTEX_PROGRAM_ATTRIBS; i++) - CONV_VB(VERT_ATTRIB_GENERIC0 + i, - AttribPtr[VERT_ATTRIB_GENERIC0 + i]); - - rvb->Primitive = vb->Primitive; - rvb->PrimitiveCount = vb->PrimitiveCount; - rvb->LockFirst = rvb->LockCount = 0; - rvb->lock_uptodate = GL_FALSE; -} - static GLboolean r300RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct radeon_vertex_buffer *VB = &rmesa->state.VB; int i; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - if (stage) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - radeon_vb_to_rvb(rmesa, VB, &tnl->vb); - } - r300UpdateShaders(rmesa); if (r300EmitArrays(ctx)) return GL_TRUE; r300UpdateShaderStates(rmesa); - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); - + r300EmitCacheFlush(rmesa); r300EmitState(rmesa); - for (i = 0; i < VB->PrimitiveCount; i++) { - GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); - GLuint start = VB->Primitive[i].start; - GLuint end = VB->Primitive[i].start + VB->Primitive[i].count; + for (i = 0; i < vb->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); + GLuint start = vb->Primitive[i].start; + GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; r300RunRenderPrimitive(rmesa, ctx, start, end, prim); } - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); + r300EmitCacheFlush(rmesa); #ifdef USER_BUFFERS r300UseArrays(ctx); @@ -439,13 +346,26 @@ static GLboolean r300RunRender(GLcontext * ctx, static int r300Fallback(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_fragment_program *fp = (struct r300_fragment_program *) + /* Do we need to use new-style shaders? + * Also is there a better way to do this? */ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; - - if (fp) { - if (!fp->translated) - r300TranslateFragmentShader(r300, fp); - FALLBACK_IF(!fp->translated); + if (fp) { + if (!fp->translated) { + r500TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } + } else { + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + if (fp) { + if (!fp->translated) { + r300TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } } FALLBACK_IF(ctx->RenderMode != GL_RENDER); @@ -457,16 +377,12 @@ static int r300Fallback(GLcontext * ctx) || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[1])); - FALLBACK_IF(ctx->Color.ColorLogicOpEnabled); - if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) FALLBACK_IF(ctx->Point.PointSprite); if (!r300->disable_lowimpact_fallback) { - FALLBACK_IF(ctx->Polygon.OffsetPoint); - FALLBACK_IF(ctx->Polygon.OffsetLine); FALLBACK_IF(ctx->Polygon.StippleFlag); - FALLBACK_IF(ctx->Multisample.Enabled); + FALLBACK_IF(ctx->Multisample._Enabled); FALLBACK_IF(ctx->Line.StippleFlag); FALLBACK_IF(ctx->Line.SmoothFlag); FALLBACK_IF(ctx->Point.SmoothFlag); @@ -478,12 +394,17 @@ static int r300Fallback(GLcontext * ctx) static GLboolean r300RunNonTCLRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); if (r300Fallback(ctx) >= R300_FALLBACK_RAST) return GL_TRUE; + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return GL_TRUE; + return r300RunRender(ctx, stage); } diff --git a/r300/r300_shader.c b/r300/r300_shader.c index 59fe17b..f30fd98 100644 --- a/r300/r300_shader.c +++ b/r300/r300_shader.c @@ -1,8 +1,7 @@ -#include "glheader.h" -#include "macros.h" -#include "enums.h" -#include "program.h" +#include "main/glheader.h" + +#include "shader/program.h" #include "tnl/tnl.h" #include "r300_context.h" #include "r300_fragprog.h" @@ -10,8 +9,10 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, GLuint id) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp; - struct r300_fragment_program *fp; + struct r300_fragment_program *r300_fp; + struct r500_fragment_program *r500_fp; switch (target) { case GL_VERTEX_STATE_PROGRAM_NV: @@ -20,14 +21,27 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); case GL_FRAGMENT_PROGRAM_ARB: - fp = CALLOC_STRUCT(r300_fragment_program); - fp->ctx = ctx; - return _mesa_init_fragment_program(ctx, &fp->mesa_program, - target, id); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r500_fp = CALLOC_STRUCT(r500_fragment_program); + r500_fp->ctx = ctx; + return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, + target, id); + } else { + r300_fp = CALLOC_STRUCT(r300_fragment_program); + return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, + target, id); + } + case GL_FRAGMENT_PROGRAM_NV: - fp = CALLOC_STRUCT(r300_fragment_program); - return _mesa_init_fragment_program(ctx, &fp->mesa_program, - target, id); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r500_fp = CALLOC_STRUCT(r500_fragment_program); + return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, + target, id); + } else { + r300_fp = CALLOC_STRUCT(r300_fragment_program); + return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, + target, id); + } default: _mesa_problem(ctx, "Bad target in r300NewProgram"); } @@ -43,17 +57,23 @@ static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) static void r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp = (void *)prog; - struct r300_fragment_program *fp = (struct r300_fragment_program *)prog; + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; + struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog; switch (target) { case GL_VERTEX_PROGRAM_ARB: vp->progs = NULL; break; case GL_FRAGMENT_PROGRAM_ARB: - fp->translated = GL_FALSE; + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500_fp->translated = GL_FALSE; + else + r300_fp->translated = GL_FALSE; break; } + /* need this for tcl fallbacks */ _tnl_program_string(ctx, target, prog); } @@ -61,7 +81,7 @@ r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) static GLboolean r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { - return 1; + return GL_TRUE; } void r300InitShaderFuncs(struct dd_function_table *functions) diff --git a/r300/r300_state.c b/r300/r300_state.c index 2aaf041..15cd053 100644 --- a/r300/r300_state.c +++ b/r300/r300_state.c @@ -65,20 +65,33 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drirenderbuffer.h" +extern int future_hw_tcl_on; +extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx); + static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) { - GLubyte color[4]; r300ContextPtr rmesa = R300_CONTEXT(ctx); R300_STATECHANGE(rmesa, blend_color); - CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]); - CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]); - CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]); - CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + GLuint r = IROUND(cf[0]*1023.0f); + GLuint g = IROUND(cf[1]*1023.0f); + GLuint b = IROUND(cf[2]*1023.0f); + GLuint a = IROUND(cf[3]*1023.0f); - rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0], - color[1], color[2]); + rmesa->hw.blend_color.cmd[1] = r | (a << 16); + rmesa->hw.blend_color.cmd[2] = b | (g << 16); + } else { + GLubyte color[4]; + CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]); + CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]); + CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]); + CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]); + + rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0], + color[1], color[2]); + } } /** @@ -184,7 +197,7 @@ static void r300SetBlendCntl(r300ContextPtr r300, int func, int eqn, */ #if 0 if (new_ablend == new_cblend) { - new_cblend |= R300_BLEND_NO_SEPARATE; + new_cblend |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; } #endif new_cblend |= cbits; @@ -290,7 +303,9 @@ static void r300SetBlendState(GLcontext * ctx) r300SetBlendCntl(r300, func, eqn, - R300_BLEND_UNKNOWN | R300_BLEND_ENABLE, funcA, eqnA); + (R300_SEPARATE_ALPHA_ENABLE | + R300_READ_ENABLE | + R300_ALPHA_BLEND_ENABLE), funcA, eqnA); } static void r300BlendEquationSeparate(GLcontext * ctx, @@ -307,6 +322,83 @@ static void r300BlendFuncSeparate(GLcontext * ctx, } /** + * Translate LogicOp enums into hardware representation. + * Both use a very logical bit-wise layout, but unfortunately the order + * of bits is reversed. + */ +static GLuint translate_logicop(GLenum logicop) +{ + GLuint bits = logicop - GL_CLEAR; + bits = ((bits & 1) << 3) | ((bits & 2) << 1) | ((bits & 4) >> 1) | ((bits & 8) >> 3); + return bits << R300_RB3D_ROPCNTL_ROP_SHIFT; +} + +/** + * Used internally to update the r300->hw hardware state to match the + * current OpenGL state. + */ +static void r300SetLogicOpState(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + R300_STATECHANGE(r300, rop); + if (RGBA_LOGICOP_ENABLED(ctx)) { + r300->hw.rop.cmd[1] = R300_RB3D_ROPCNTL_ROP_ENABLE | + translate_logicop(ctx->Color.LogicOp); + } else { + r300->hw.rop.cmd[1] = 0; + } +} + +/** + * Called by Mesa when an application program changes the LogicOp state + * via glLogicOp. + */ +static void r300LogicOpcode(GLcontext *ctx, GLenum logicop) +{ + if (RGBA_LOGICOP_ENABLED(ctx)) + r300SetLogicOpState(ctx); +} + +static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLint p; + GLint *ip; + + /* no VAP UCP on non-TCL chipsets */ + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return; + + p = (GLint) plane - (GLint) GL_CLIP_PLANE0; + ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + R300_STATECHANGE( rmesa, vpucp[p] ); + rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3]; +} + +static void r300SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint p; + + /* no VAP UCP on non-TCL chipsets */ + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return; + + p = cap - GL_CLIP_PLANE0; + R300_STATECHANGE(r300, vap_clip_cntl); + if (state) { + r300->hw.vap_clip_cntl.cmd[1] |= (R300_VAP_UCP_ENABLE_0 << p); + r300ClipPlane(ctx, cap, NULL); + } else { + r300->hw.vap_clip_cntl.cmd[1] &= ~(R300_VAP_UCP_ENABLE_0 << p); + } +} + +/** * Update our tracked culling state based on Mesa's state. */ static void r300UpdateCulling(GLcontext * ctx) @@ -314,43 +406,82 @@ static void r300UpdateCulling(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); uint32_t val = 0; - R300_STATECHANGE(r300, cul); if (ctx->Polygon.CullFlag) { - if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) - val = R300_CULL_FRONT | R300_CULL_BACK; - else if (ctx->Polygon.CullFaceMode == GL_FRONT) + switch (ctx->Polygon.CullFaceMode) { + case GL_FRONT: val = R300_CULL_FRONT; - else + break; + case GL_BACK: val = R300_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + val = R300_CULL_FRONT | R300_CULL_BACK; + break; + default: + break; + } + } - if (ctx->Polygon.FrontFace == GL_CW) - val |= R300_FRONT_FACE_CW; - else - val |= R300_FRONT_FACE_CCW; + switch (ctx->Polygon.FrontFace) { + case GL_CW: + val |= R300_FRONT_FACE_CW; + break; + case GL_CCW: + val |= R300_FRONT_FACE_CCW; + break; + default: + break; } + + R300_STATECHANGE(r300, cul); r300->hw.cul.cmd[R300_CUL_CULL] = val; } +static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + R300_STATECHANGE(r300, occlusion_cntl); + if (state) { + r300->hw.occlusion_cntl.cmd[1] |= (3 << 0); + } else { + r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0); + } +} + +static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + return (fp && fp->WritesDepth); + } else { + struct r500_fragment_program* fp = + (struct r500_fragment_program*)(char*) + ctx->FragmentProgram._Current; + return (fp && fp->writes_depth); + } +} + static void r300SetEarlyZState(GLcontext * ctx) { - /* updates register R300_RB3D_EARLY_Z (0x4F14) - if depth test is not enabled it should be R300_EARLY_Z_DISABLE - if depth is enabled and alpha not it should be R300_EARLY_Z_ENABLE - if depth and alpha is enabled it should be R300_EARLY_Z_DISABLE - */ r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint topZ = R300_ZTOP_ENABLE; - R300_STATECHANGE(r300, zstencil_format); if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) - /* disable early Z */ - r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; - else { - if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) - /* enable early Z */ - r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_ENABLE; - else - /* disable early Z */ - r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; + topZ = R300_ZTOP_DISABLE; + if (current_fragment_program_writes_depth(ctx)) + topZ = R300_ZTOP_DISABLE; + + if (topZ != r300->hw.zstencil_format.cmd[2]) { + /* Note: This completely reemits the stencil format. + * I have not tested whether this is strictly necessary, + * or if emitting a write to ZB_ZTOP is enough. + */ + R300_STATECHANGE(r300, zstencil_format); + r300->hw.zstencil_format.cmd[2] = topZ; } } @@ -365,41 +496,43 @@ static void r300SetAlphaState(GLcontext * ctx) switch (ctx->Color.AlphaFunc) { case GL_NEVER: - pp_misc |= R300_ALPHA_TEST_FAIL; + pp_misc |= R300_FG_ALPHA_FUNC_NEVER; break; case GL_LESS: - pp_misc |= R300_ALPHA_TEST_LESS; + pp_misc |= R300_FG_ALPHA_FUNC_LESS; break; case GL_EQUAL: - pp_misc |= R300_ALPHA_TEST_EQUAL; + pp_misc |= R300_FG_ALPHA_FUNC_EQUAL; break; case GL_LEQUAL: - pp_misc |= R300_ALPHA_TEST_LEQUAL; + pp_misc |= R300_FG_ALPHA_FUNC_LE; break; case GL_GREATER: - pp_misc |= R300_ALPHA_TEST_GREATER; + pp_misc |= R300_FG_ALPHA_FUNC_GREATER; break; case GL_NOTEQUAL: - pp_misc |= R300_ALPHA_TEST_NEQUAL; + pp_misc |= R300_FG_ALPHA_FUNC_NOTEQUAL; break; case GL_GEQUAL: - pp_misc |= R300_ALPHA_TEST_GEQUAL; + pp_misc |= R300_FG_ALPHA_FUNC_GE; break; case GL_ALWAYS: - /*pp_misc |= R300_ALPHA_TEST_PASS; */ + /*pp_misc |= FG_ALPHA_FUNC_ALWAYS; */ really_enabled = GL_FALSE; break; } if (really_enabled) { - pp_misc |= R300_ALPHA_TEST_ENABLE; - pp_misc |= (refByte & R300_REF_ALPHA_MASK); + pp_misc |= R300_FG_ALPHA_FUNC_ENABLE; + pp_misc |= R500_FG_ALPHA_FUNC_8BIT; + pp_misc |= (refByte & R300_FG_ALPHA_FUNC_VAL_MASK); } else { pp_misc = 0x0; } R300_STATECHANGE(r300, at); r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; + r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; r300SetEarlyZState(ctx); } @@ -439,152 +572,53 @@ static void r300SetDepthState(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, zs); - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE; - r300->hw.zs.cmd[R300_ZS_CNTL_1] &= - ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE|R300_STENCIL_FRONT_BACK; + r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); - if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) { + if (ctx->Depth.Test) { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE; if (ctx->Depth.Mask) - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= - R300_RB3D_Z_TEST_AND_WRITE; - else - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST; - - r300->hw.zs.cmd[R300_ZS_CNTL_1] |= - translate_func(ctx->Depth. - Func) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; - } else { - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_DISABLED_1; + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_WRITE_ENABLE; r300->hw.zs.cmd[R300_ZS_CNTL_1] |= - translate_func(GL_NEVER) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; + translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT; } r300SetEarlyZState(ctx); } -static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ); - -/** - * Handle glEnable()/glDisable(). - * - * \note Mesa already filters redundant calls to glEnable/glDisable. - */ -static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) +static void r300SetStencilState(GLcontext * ctx, GLboolean state) { r300ContextPtr r300 = R300_CONTEXT(ctx); - GLuint p; - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(cap), - state ? "GL_TRUE" : "GL_FALSE"); - - switch (cap) { - /* Fast track this one... - */ - case GL_TEXTURE_1D: - case GL_TEXTURE_2D: - case GL_TEXTURE_3D: - break; - case GL_FOG: - R300_STATECHANGE(r300, fogs); + if (r300->state.stencil.hw_stencil) { + R300_STATECHANGE(r300, zs); if (state) { - r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FOG_ENABLE; - - ctx->Driver.Fogfv(ctx, GL_FOG_MODE, NULL); - ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, - &ctx->Fog.Density); - ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); - ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); - ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= + R300_STENCIL_ENABLE; } else { - r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FOG_ENABLE; - } - - break; - - case GL_ALPHA_TEST: - r300SetAlphaState(ctx); - break; - - case GL_BLEND: - case GL_COLOR_LOGIC_OP: - r300SetBlendState(ctx); - break; - - - case GL_CLIP_PLANE0: - case GL_CLIP_PLANE1: - case GL_CLIP_PLANE2: - case GL_CLIP_PLANE3: - case GL_CLIP_PLANE4: - case GL_CLIP_PLANE5: - /* no VAP UCP on non-TCL chipsets */ - if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - return; - - p = cap-GL_CLIP_PLANE0; - R300_STATECHANGE( r300, vap_clip_cntl ); - if (state) { - r300->hw.vap_clip_cntl.cmd[1] |= (R300_VAP_UCP_ENABLE_0<<p); - r300ClipPlane( ctx, cap, NULL ); - } - else { - r300->hw.vap_clip_cntl.cmd[1] &= ~(R300_VAP_UCP_ENABLE_0<<p); + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= + ~R300_STENCIL_ENABLE; } - break; - case GL_DEPTH_TEST: - r300SetDepthState(ctx); - break; - - case GL_STENCIL_TEST: - if (r300->state.stencil.hw_stencil) { - R300_STATECHANGE(r300, zs); - if (state) { - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= - R300_RB3D_STENCIL_ENABLE; - } else { - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= - ~R300_RB3D_STENCIL_ENABLE; - } - } else { + } else { #if R200_MERGED - FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state); + FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state); #endif - } - break; - - case GL_CULL_FACE: - r300UpdateCulling(ctx); - break; - - case GL_POLYGON_OFFSET_POINT: - case GL_POLYGON_OFFSET_LINE: - break; - - case GL_POLYGON_OFFSET_FILL: - R300_STATECHANGE(r300, occlusion_cntl); - if (state) { - r300->hw.occlusion_cntl.cmd[1] |= (3 << 0); - } else { - r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0); - } - break; - default: - radeonEnable(ctx, cap, state); - return; } } static void r300UpdatePolygonMode(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - uint32_t hw_mode = 0; + uint32_t hw_mode = R300_GA_POLY_MODE_DISABLE; + /* Only do something if a polygon mode is wanted, default is GL_FILL */ if (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL) { GLenum f, b; + /* Handle GL_CW (clock wise and GL_CCW (counter clock wise) + * correctly by selecting the correct front and back face + */ if (ctx->Polygon.FrontFace == GL_CCW) { f = ctx->Polygon.FrontMode; b = ctx->Polygon.BackMode; @@ -593,29 +627,30 @@ static void r300UpdatePolygonMode(GLcontext * ctx) b = ctx->Polygon.FrontMode; } - hw_mode |= R300_PM_ENABLED; + /* Enable polygon mode */ + hw_mode |= R300_GA_POLY_MODE_DUAL; switch (f) { case GL_LINE: - hw_mode |= R300_PM_FRONT_LINE; + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_LINE; break; - case GL_POINT: /* noop */ - hw_mode |= R300_PM_FRONT_POINT; + case GL_POINT: + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_POINT; break; case GL_FILL: - hw_mode |= R300_PM_FRONT_FILL; + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_TRI; break; } switch (b) { case GL_LINE: - hw_mode |= R300_PM_BACK_LINE; + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_LINE; break; - case GL_POINT: /* noop */ - hw_mode |= R300_PM_BACK_POINT; + case GL_POINT: + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_POINT; break; case GL_FILL: - hw_mode |= R300_PM_BACK_FILL; + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_TRI; break; } } @@ -624,6 +659,9 @@ static void r300UpdatePolygonMode(GLcontext * ctx) R300_STATECHANGE(r300, polygon_mode); r300->hw.polygon_mode.cmd[1] = hw_mode; } + + r300->hw.polygon_mode.cmd[2] = 0x00000001; + r300->hw.polygon_mode.cmd[3] = 0x00000000; } /** @@ -680,9 +718,10 @@ static void r300ColorMask(GLcontext * ctx, GLboolean r, GLboolean g, GLboolean b, GLboolean a) { r300ContextPtr r300 = R300_CONTEXT(ctx); - int mask = (r ? R300_COLORMASK0_R : 0) | - (g ? R300_COLORMASK0_G : 0) | - (b ? R300_COLORMASK0_B : 0) | (a ? R300_COLORMASK0_A : 0); + int mask = (r ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | + (g ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | + (b ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | + (a ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0); if (mask != r300->hw.cmk.cmd[R300_CMK_COLORMASK]) { R300_STATECHANGE(r300, cmk); @@ -708,15 +747,13 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) switch (pname) { case GL_FOG_MODE: - if (!ctx->Fog.Enabled) - return; switch (ctx->Fog.Mode) { case GL_LINEAR: R300_STATECHANGE(r300, fogs); r300->hw.fogs.cmd[R300_FOGS_STATE] = (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) | - R300_FOG_MODE_LINEAR; + cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | + R300_FG_FOG_BLEND_FN_LINEAR; if (ctx->Fog.Start == ctx->Fog.End) { fogScale.f = -1.0; @@ -733,8 +770,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) R300_STATECHANGE(r300, fogs); r300->hw.fogs.cmd[R300_FOGS_STATE] = (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) | - R300_FOG_MODE_EXP; + cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | + R300_FG_FOG_BLEND_FN_EXP; fogScale.f = 0.0933 * ctx->Fog.Density; fogStart.f = 0.0; break; @@ -742,8 +779,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) R300_STATECHANGE(r300, fogs); r300->hw.fogs.cmd[R300_FOGS_STATE] = (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) | - R300_FOG_MODE_EXP2; + cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | + R300_FG_FOG_BLEND_FN_EXP2; fogScale.f = 0.3 * ctx->Fog.Density; fogStart.f = 0.0; default: @@ -801,14 +838,32 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) } } +static void r300SetFogState(GLcontext * ctx, GLboolean state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + R300_STATECHANGE(r300, fogs); + if (state) { + r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FG_FOG_BLEND_ENABLE; + + r300Fogfv(ctx, GL_FOG_MODE, NULL); + r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); + r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); + r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); + r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); + } else { + r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FG_FOG_BLEND_ENABLE; + } +} + /* ============================================================= * Point state */ static void r300PointSize(GLcontext * ctx, GLfloat size) { r300ContextPtr r300 = R300_CONTEXT(ctx); - - size = ctx->Point._Size; + /* same size limits for AA, non-AA points */ + size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); R300_STATECHANGE(r300, ps); r300->hw.ps.cmd[R300_PS_POINTSIZE] = @@ -816,6 +871,31 @@ static void r300PointSize(GLcontext * ctx, GLfloat size) ((int)(size * 6) << R300_POINTSIZE_Y_SHIFT); } +static void r300PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + switch (pname) { + case GL_POINT_SIZE_MIN: + R300_STATECHANGE(r300, ga_point_minmax); + r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MIN_MASK; + r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MinSize * 6.0); + break; + case GL_POINT_SIZE_MAX: + R300_STATECHANGE(r300, ga_point_minmax); + r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MAX_MASK; + r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MaxSize * 6.0) + << R300_GA_POINT_MINMAX_MAX_SHIFT; + break; + case GL_POINT_DISTANCE_ATTENUATION: + break; + case GL_POINT_FADE_THRESHOLD_SIZE: + break; + default: + break; + } +} + /* ============================================================= * Line state */ @@ -823,11 +903,12 @@ static void r300LineWidth(GLcontext * ctx, GLfloat widthf) { r300ContextPtr r300 = R300_CONTEXT(ctx); - widthf = ctx->Line._Width; - + widthf = CLAMP(widthf, + ctx->Const.MinPointSize, + ctx->Const.MaxPointSize); R300_STATECHANGE(r300, lcntl); - r300->hw.lcntl.cmd[1] = (int)(widthf * 6.0); - r300->hw.lcntl.cmd[1] |= R300_LINE_CNT_VE; + r300->hw.lcntl.cmd[1] = + R300_LINE_CNT_HO | R300_LINE_CNT_VE | (int)(widthf * 6.0); } static void r300PolygonMode(GLcontext * ctx, GLenum face, GLenum mode) @@ -873,6 +954,7 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) r300ContextPtr rmesa = R300_CONTEXT(ctx); R300_STATECHANGE(rmesa, shade); + rmesa->hw.shade.cmd[1] = 0x00000002; switch (mode) { case GL_FLAT: rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT; @@ -883,6 +965,8 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) default: return; } + rmesa->hw.shade.cmd[3] = 0x00000000; + rmesa->hw.shade.cmd[4] = 0x00000000; } static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, @@ -891,37 +975,36 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint refmask = (((ctx->Stencil. - Ref[0] & 0xff) << R300_RB3D_ZS2_STENCIL_REF_SHIFT) | ((ctx-> - Stencil. - ValueMask - [0] & - 0xff) - << - R300_RB3D_ZS2_STENCIL_MASK_SHIFT)); + Ref[0] & 0xff) << R300_STENCILREF_SHIFT) | ((ctx-> + Stencil. + ValueMask + [0] & + 0xff) + << + R300_STENCILMASK_SHIFT)); GLuint flag; R300_STATECHANGE(rmesa, zs); - + rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK << - R300_RB3D_ZS1_FRONT_FUNC_SHIFT) + R300_S_FRONT_FUNC_SHIFT) | (R300_ZS_MASK << - R300_RB3D_ZS1_BACK_FUNC_SHIFT)); + R300_S_BACK_FUNC_SHIFT)); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= - ~((R300_RB3D_ZS2_STENCIL_MASK << - R300_RB3D_ZS2_STENCIL_REF_SHIFT) | - (R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT)); + ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) | + (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT)); flag = translate_func(ctx->Stencil.Function[0]); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= - (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT); + (flag << R300_S_FRONT_FUNC_SHIFT); if (ctx->Stencil._TestTwoSide) flag = translate_func(ctx->Stencil.Function[1]); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= - (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT); + (flag << R300_S_BACK_FUNC_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask; } @@ -931,11 +1014,12 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) R300_STATECHANGE(rmesa, zs); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= - ~(R300_RB3D_ZS2_STENCIL_MASK << - R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT); + ~(R300_STENCILREF_MASK << + R300_STENCILWRITEMASK_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= (ctx->Stencil. - WriteMask[0] & 0xff) << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT; + WriteMask[0] & R300_STENCILREF_MASK) << + R300_STENCILWRITEMASK_SHIFT; } static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, @@ -946,49 +1030,37 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, R300_STATECHANGE(rmesa, zs); /* It is easier to mask what's left.. */ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= - (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT) | - (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | - (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT); + (R300_ZS_MASK << R300_Z_FUNC_SHIFT) | + (R300_ZS_MASK << R300_S_FRONT_FUNC_SHIFT) | + (R300_ZS_MASK << R300_S_BACK_FUNC_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (translate_stencil_op(ctx->Stencil.FailFunc[0]) << - R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) + R300_S_FRONT_SFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << - R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT) + R300_S_FRONT_ZFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << - R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT); + R300_S_FRONT_ZPASS_OP_SHIFT); if (ctx->Stencil._TestTwoSide) { rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (translate_stencil_op(ctx->Stencil.FailFunc[1]) << - R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) + R300_S_BACK_SFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZFailFunc[1]) << - R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT) + R300_S_BACK_ZFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZPassFunc[1]) << - R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT); + R300_S_BACK_ZPASS_OP_SHIFT); } else { rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (translate_stencil_op(ctx->Stencil.FailFunc[0]) << - R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) + R300_S_BACK_SFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << - R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT) + R300_S_BACK_ZFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << - R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT); + R300_S_BACK_ZPASS_OP_SHIFT); } } -static void r300ClearStencil(GLcontext * ctx, GLint s) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - rmesa->state.stencil.clear = - ((GLuint) (ctx->Stencil.Clear & 0xff) | - (R300_RB3D_ZS2_STENCIL_MASK << - R300_RB3D_ZS2_STENCIL_MASK_SHIFT) | ((ctx->Stencil. - WriteMask[0] & 0xff) << - R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT)); -} - /* ============================================================= * Window position and viewport transformation */ @@ -1076,12 +1148,12 @@ void r300UpdateDrawBuffer(GLcontext * ctx) struct gl_framebuffer *fb = ctx->DrawBuffer; driRenderbuffer *drb; - if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) { + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { /* draw to front */ drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT]. Renderbuffer; - } else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) { + } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { /* draw to back */ drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT]. @@ -1253,8 +1325,8 @@ static unsigned long gen_fixed_filter(unsigned long f) (R300_TX_CLAMP << R300_TX_WRAP_T_SHIFT)) { needs_fixing |= 2; } - if ((f & ((7 - 1) << R300_TX_WRAP_Q_SHIFT)) == - (R300_TX_CLAMP << R300_TX_WRAP_Q_SHIFT)) { + if ((f & ((7 - 1) << R300_TX_WRAP_R_SHIFT)) == + (R300_TX_CLAMP << R300_TX_WRAP_R_SHIFT)) { needs_fixing |= 4; } @@ -1262,7 +1334,7 @@ static unsigned long gen_fixed_filter(unsigned long f) return f; mag = f & R300_TX_MAG_FILTER_MASK; - min = f & R300_TX_MIN_FILTER_MASK; + min = f & (R300_TX_MIN_FILTER_MASK|R300_TX_MIN_FILTER_MIP_MASK); /* TODO: Check for anisto filters too */ if ((mag != R300_TX_MAG_FILTER_NEAREST) @@ -1294,12 +1366,100 @@ static unsigned long gen_fixed_filter(unsigned long f) f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT; } if (needs_fixing & 4) { - f &= ~((7 - 1) << R300_TX_WRAP_Q_SHIFT); - f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_Q_SHIFT; + f &= ~((7 - 1) << R300_TX_WRAP_R_SHIFT); + f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT; } return f; } +static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int i; + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code = &fp->code; + + R300_STATECHANGE(r300, fpt); + + for (i = 0; i < code->tex.length; i++) { + int unit; + int opcode; + unsigned long val; + + unit = code->tex.inst[i] >> R300_TEX_ID_SHIFT; + unit &= 15; + + val = code->tex.inst[i]; + val &= ~R300_TEX_ID_MASK; + + opcode = + (val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT; + if (opcode == R300_TEX_OP_KIL) { + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + if (tmu_mappings[unit] >= 0) { + val |= + tmu_mappings[unit] << + R300_TEX_ID_SHIFT; + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + // We get here when the corresponding texture image is incomplete + // (e.g. incomplete mipmaps etc.) + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } + } + } + + r300->hw.fpt.cmd[R300_FPT_CMD_0] = + cmdpacket0(R300_US_TEX_INST_0, code->tex.length); +} + +static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +{ + int i; + struct r500_fragment_program *fp = (struct r500_fragment_program *) + (char *)ctx->FragmentProgram._Current; + struct r500_fragment_program_code *code = &fp->code; + + /* find all the texture instructions and relocate the texture units */ + for (i = 0; i < code->inst_end + 1; i++) { + if ((code->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) { + uint32_t val; + int unit, opcode, new_unit; + + val = code->inst[i].inst1; + + unit = (val >> 16) & 0xf; + + val &= ~(0xf << 16); + + opcode = val & (0x7 << 22); + if (opcode == R500_TEX_INST_TEXKILL) { + new_unit = 0; + } else { + if (tmu_mappings[unit] >= 0) { + new_unit = tmu_mappings[unit]; + } else { + new_unit = 0; + } + } + val |= R500_TEX_ID(new_unit); + code->inst[i].inst1 = val; + } + } +} + +static GLuint translate_lod_bias(GLfloat bias) +{ + GLint b = (int)(bias*32); + if (b >= (1 << 9)) + b = (1 << 9)-1; + else if (b < -(1 << 9)) + b = -(1 << 9); + return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK; +} + static void r300SetupTextures(GLcontext * ctx) { int i, mtu; @@ -1363,8 +1523,14 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + hw_tmu] = gen_fixed_filter(t->filter) | (hw_tmu << 28); - /* Currently disabled! */ - r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80; + /* Note: There is a LOD bias per texture unit and a LOD bias + * per texture object. We add them here to get the correct behaviour. + * (The per-texture object LOD bias was introduced in OpenGL 1.4 + * and is not present in the EXT_texture_object extension). + */ + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->filter_1 | + translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias); r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->size; r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + @@ -1395,7 +1561,7 @@ static void r300SetupTextures(GLcontext * ctx) } r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_FILTER_0, last_hw_tmu + 1); + cmdpacket0(R300_TX_FILTER0_0, last_hw_tmu + 1); r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1); r300->hw.tex.size.cmd[R300_TEX_CMD_0] = @@ -1403,7 +1569,7 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.format.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1); r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_PITCH_0, last_hw_tmu + 1); + cmdpacket0(R300_TX_FORMAT2_0, last_hw_tmu + 1); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1); r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = @@ -1414,39 +1580,18 @@ static void r300SetupTextures(GLcontext * ctx) if (!fp) /* should only happenen once, just after context is created */ return; - R300_STATECHANGE(r300, fpt); - - for (i = 0; i < fp->tex.length; i++) { - int unit; - int opcode; - unsigned long val; - - unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT; - unit &= 15; - - val = fp->tex.inst[i]; - val &= ~R300_FPITX_IMAGE_MASK; - - opcode = - (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT; - if (opcode == R300_FPITX_OP_KIL) { - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } else { - if (tmu_mappings[unit] >= 0) { - val |= - tmu_mappings[unit] << - R300_FPITX_IMAGE_SHIFT; - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } else { - // We get here when the corresponding texture image is incomplete - // (e.g. incomplete mipmaps etc.) - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + if (fp->mesa_program.UsesKill && last_hw_tmu < 0) { + // The KILL operation requires the first texture unit + // to be enabled. + r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_FILTER0_0, 1); } - } - - r300->hw.fpt.cmd[R300_FPT_CMD_0] = - cmdpacket0(R300_PFS_TEXI_0, fp->tex.length); + r300SetupFragmentShaderTextures(ctx, tmu_mappings); + } else + r500SetupFragmentShaderTextures(ctx, tmu_mappings); if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", @@ -1466,28 +1611,22 @@ static void r300SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); /* I'm still unsure if these are needed */ - GLuint interp_magic[8] = { - 0x00, - R300_RS_INTERP_1_UNKNOWN, - R300_RS_INTERP_2_UNKNOWN, - R300_RS_INTERP_3_UNKNOWN, - 0x00, - 0x00, - 0x00, - 0x00 - }; + GLuint interp_col[8]; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; union r300_outputs_written OutputsWritten; GLuint InputsRead; int fp_reg, high_rr; - int in_texcoords, col_interp_nr; - int i; + int col_interp_nr; + int rs_tex_count = 0, rs_col_count = 0; + int i, count; + + memset(interp_col, 0, sizeof(interp_col)); if (hw_tcl_on) - OutputsWritten.vp_outputs = - CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; else - RENDERINPUTS_COPY(OutputsWritten.index_bitset, - r300->state.render_inputs_bitset); + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); if (ctx->FragmentProgram._Current) InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; @@ -1500,9 +1639,9 @@ static void r300SetupRSUnit(GLcontext * ctx) R300_STATECHANGE(r300, rc); R300_STATECHANGE(r300, rr); - fp_reg = in_texcoords = col_interp_nr = high_rr = 0; + fp_reg = col_interp_nr = high_rr = 0; - r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0; + r300->hw.rr.cmd[R300_RR_INST_1] = 0; if (InputsRead & FRAG_BIT_WPOS) { for (i = 0; i < ctx->Const.MaxTextureUnits; i++) @@ -1518,96 +1657,267 @@ static void r300SetupRSUnit(GLcontext * ctx) InputsRead &= ~FRAG_BIT_WPOS; } + if (InputsRead & FRAG_BIT_COL0) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + interp_col[0] |= R300_RS_COL_PTR(rs_col_count); + if (count == 3) + interp_col[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + rs_col_count += count; + } + else + interp_col[0] = R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + + if (InputsRead & FRAG_BIT_COL1) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + if (count == 3) + interp_col[1] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB0); + interp_col[1] |= R300_RS_COL_PTR(1); + rs_col_count += count; + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 - | R300_RS_INTERP_USED - | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - | interp_magic[i]; + int swiz; - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; + /* with TCL we always seem to route 4 components */ + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count; + switch(count) { + case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; + case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; + default: + case 1: + case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; + }; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] |= swiz; + + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { + + rs_tex_count += count; + //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R300_RS_ROUTE_ENABLE | i /* source INTERP */ - | (fp_reg << R300_RS_ROUTE_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R300_RS_INST_TEX_CN_WRITE | i /* source INTERP */ + | (fp_reg << R300_RS_INST_TEX_ADDR_SHIFT); high_rr = fp_reg; - if (!R300_OUTPUTS_WRITTEN_TEST - (OutputsWritten, VERT_RESULT_TEX0 + i, - _TNL_ATTRIB_TEX(i))) { - /* Passing invalid data here can lock the GPU. */ - WARN_ONCE - ("fragprog wants coords for tex%d, vp doesn't provide them!\n", - i); - //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base); - //_mesa_exit(-1); + /* Passing invalid data here can lock the GPU. */ + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + InputsRead &= ~(FRAG_BIT_TEX0 << i); + fp_reg++; + } else { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); } - InputsRead &= ~(FRAG_BIT_TEX0 << i); - fp_reg++; } - /* Need to count all coords enabled at vof */ - if (R300_OUTPUTS_WRITTEN_TEST - (OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) - in_texcoords++; } if (InputsRead & FRAG_BIT_COL0) { - if (!R300_OUTPUTS_WRITTEN_TEST - (OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - WARN_ONCE - ("fragprog wants col0, vp doesn't provide it\n"); - goto out; /* FIXME */ - //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base); - //_mesa_exit(-1); + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT); + InputsRead &= ~FRAG_BIT_COL0; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); } + } + + if (InputsRead & FRAG_BIT_COL1) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + r300->hw.rr.cmd[R300_RR_INST_1] |= R300_RS_INST_COL_ID(1) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT); + InputsRead &= ~FRAG_BIT_COL1; + if (high_rr < 1) + high_rr = 1; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } + } - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 - | R300_RS_ROUTE_0_COLOR - | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); - InputsRead &= ~FRAG_BIT_COL0; + /* Need at least one. This might still lock as the values are undefined... */ + if (rs_tex_count == 0 && col_interp_nr == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT); col_interp_nr++; } - out: + + r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT) + | (col_interp_nr << R300_IC_COUNT_SHIFT) + | R300_HIRES_EN; + + assert(high_rr >= 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1); + r300->hw.rc.cmd[2] = high_rr; + + if (InputsRead) + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); +} + +static void r500SetupRSUnit(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + /* I'm still unsure if these are needed */ + GLuint interp_col[8]; + union r300_outputs_written OutputsWritten; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint InputsRead; + int fp_reg, high_rr; + int rs_col_count = 0; + int in_texcoords, col_interp_nr; + int i, count; + + memset(interp_col, 0, sizeof(interp_col)); + if (hw_tcl_on) + OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + else + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); + + if (ctx->FragmentProgram._Current) + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + else { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; /* This should only ever happen once.. */ + } + + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + fp_reg = col_interp_nr = high_rr = in_texcoords = 0; + + r300->hw.rr.cmd[R300_RR_INST_1] = 0; + + if (InputsRead & FRAG_BIT_WPOS) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (!(InputsRead & (FRAG_BIT_TEX0 << i))) + break; + + if (i == ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tno free texcoord found...\n"); + _mesa_exit(-1); + } + + InputsRead |= (FRAG_BIT_TEX0 << i); + InputsRead &= ~FRAG_BIT_WPOS; + } + + if (InputsRead & FRAG_BIT_COL0) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + interp_col[0] |= R500_RS_COL_PTR(rs_col_count); + if (count == 3) + interp_col[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + rs_col_count += count; + } + else + interp_col[0] = R500_RS_COL_FMT(R300_RS_COL_FMT_0001); if (InputsRead & FRAG_BIT_COL1) { - if (!R300_OUTPUTS_WRITTEN_TEST - (OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - WARN_ONCE - ("fragprog wants col1, vp doesn't provide it\n"); - //_mesa_exit(-1); + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + interp_col[1] |= R500_RS_COL_PTR(1); + if (count == 3) + interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB0); + rs_col_count += count; + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + GLuint swiz = 0; + + /* with TCL we always seem to route 4 components */ + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + /* always have on texcoord */ + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT; + if (count >= 2) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; + else + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; + + if (count >= 3) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT; + else + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + + if (count == 4) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT; + else + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + + } else + swiz = (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | swiz; + + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + //assert(r300->state.texture.tc_count != 0); + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ + | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT); + high_rr = fp_reg; + + /* Passing invalid data here can lock the GPU. */ + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + InputsRead &= ~(FRAG_BIT_TEX0 << i); + fp_reg++; + } else { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + } } + } - r300->hw.rr.cmd[R300_RR_ROUTE_1] |= - R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | - (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); - InputsRead &= ~FRAG_BIT_COL1; - if (high_rr < 1) - high_rr = 1; - col_interp_nr++; + if (InputsRead & FRAG_BIT_COL0) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); + InputsRead &= ~FRAG_BIT_COL0; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } + } + + if (InputsRead & FRAG_BIT_COL1) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); + InputsRead &= ~FRAG_BIT_COL1; + if (high_rr < 1) + high_rr = 1; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } } /* Need at least one. This might still lock as the values are undefined... */ if (in_texcoords == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 - | R300_RS_ROUTE_0_COLOR - | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); col_interp_nr++; } - r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_RS_CNTL_TC_CNT_SHIFT) - | (col_interp_nr << R300_RS_CNTL_CI_CNT_SHIFT) - | R300_RS_CNTL_0_UNKNOWN_18; + r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_IT_COUNT_SHIFT) + | (col_interp_nr << R300_IC_COUNT_SHIFT) + | R300_HIRES_EN; assert(high_rr >= 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = - cmdpacket0(R300_RS_ROUTE_0, high_rr + 1); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1); r300->hw.rc.cmd[2] = 0xC0 | high_rr; if (InputsRead) - WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", - InputsRead); + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); } -#define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) + + #define bump_vpu_count(ptr, new_count) do{\ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ @@ -1616,9 +1926,7 @@ static void r300SetupRSUnit(GLcontext * ctx) if(_nc>_p->vpu.count)_p->vpu.count=_nc;\ }while(0) -void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, struct - r300_vertex_shader_fragment - *vsf) +static INLINE void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, struct r300_vertex_shader_fragment *vsf) { int i; @@ -1626,8 +1934,7 @@ void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, s return; if (vsf->length & 0x3) { - fprintf(stderr, - "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n"); + fprintf(stderr, "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n"); _mesa_exit(-1); } @@ -1635,147 +1942,172 @@ void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, s case 0: R300_STATECHANGE(r300, vpi); for (i = 0; i < vsf->length; i++) - r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + - 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vpi.cmd, - vsf->length + 4 * (dest & 0xff)); + r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vpi.cmd, vsf->length + 4 * (dest & 0xff)); break; case 2: R300_STATECHANGE(r300, vpp); for (i = 0; i < vsf->length; i++) - r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + - 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vpp.cmd, - vsf->length + 4 * (dest & 0xff)); + r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vpp.cmd, vsf->length + 4 * (dest & 0xff)); break; case 4: R300_STATECHANGE(r300, vps); for (i = 0; i < vsf->length; i++) - r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = - (vsf->body.d[i]); - bump_vpu_count(r300->hw.vps.cmd, - vsf->length + 4 * (dest & 0xff)); + r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vps.cmd, vsf->length + 4 * (dest & 0xff)); break; default: - fprintf(stderr, - "%s:%s don't know how to handle dest %04x\n", - __FILE__, __FUNCTION__, dest); + fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); _mesa_exit(-1); } } -/* just a skeleton for now.. */ +#define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c)) -/* Generate a vertex shader that simply transforms vertex and texture coordinates, - while leaving colors intact. Nothing fancy (like lights) - If implementing lights make a copy first, so it is easy to switch between the two versions */ -static void r300GenerateSimpleVertexShader(r300ContextPtr r300) +static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, + GLuint output_count, GLuint temp_count) { - int i; - GLuint o_reg = 0; - - /* Allocate parameters */ - r300->state.vap_param.transform_offset = 0x0; /* transform matrix */ - r300->state.vertex_shader.param_offset = 0x0; - r300->state.vertex_shader.param_count = 0x4; /* 4 vector values - 4x4 matrix */ - - r300->state.vertex_shader.program_start = 0x0; - r300->state.vertex_shader.unknown_ptr1 = 0x4; /* magic value ? */ - r300->state.vertex_shader.program_end = 0x0; - - r300->state.vertex_shader.unknown_ptr2 = 0x0; /* magic value */ - r300->state.vertex_shader.unknown_ptr3 = 0x4; /* magic value */ + int vtx_mem_size; + int pvs_num_slots; + int pvs_num_cntrls; + + /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS. + * See r500 docs 6.5.2 - done in emit */ + + /* avoid division by zero */ + if (input_count == 0) input_count = 1; + if (output_count == 0) output_count = 1; + if (temp_count == 0) temp_count = 1; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vtx_mem_size = 128; + else + vtx_mem_size = 72; + + pvs_num_slots = MIN3(10, vtx_mem_size/input_count, vtx_mem_size/output_count); + pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count); + + R300_STATECHANGE(rmesa, vap_cntl); + if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = + (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) | + (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= R500_TCL_STATE_OPTIMIZATION; + } else + /* not sure about non-tcl */ + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT); - r300->state.vertex_shader.unknown1.length = 0; - r300->state.vertex_shader.unknown2.length = 0; +} -#define WRITE_OP(oper,source1,source2,source3) {\ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].op=(oper); \ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[0]=(source1); \ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[1]=(source2); \ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[2]=(source3); \ - r300->state.vertex_shader.program_end++; \ +static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) +{ + struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader); + GLuint o_reg = 0; + GLuint i_reg = 0; + int i; + int inst_count = 0; + int param_count = 0; + int program_end = 0; + + for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) { + if (rmesa->state.sw_tcl_inputs[i] != -1) { + prog->program.body.i[program_end + 0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, GL_FALSE, GL_FALSE, o_reg++, VSF_FLAG_ALL, PVS_DST_REG_OUT); + prog->program.body.i[program_end + 1] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + program_end += 4; + i_reg++; + } } - for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) - if (r300->state.sw_tcl_inputs[i] != -1) { - WRITE_OP(EASY_VSF_OP(MUL, o_reg++, ALL, RESULT), - VSF_REG(r300->state.sw_tcl_inputs[i]), - VSF_ATTR_UNITY(r300->state. - sw_tcl_inputs[i]), - VSF_UNITY(r300->state.sw_tcl_inputs[i]) - ) + prog->program.length = program_end; - } + r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, + &(prog->program)); + inst_count = (prog->program.length / 4) - 1; - r300->state.vertex_shader.program_end--; /* r300 wants program length to be one more - no idea why */ - r300->state.vertex_shader.program.length = - (r300->state.vertex_shader.program_end + 1) * 4; + r300VapCntl(rmesa, i_reg, o_reg, 0); - r300->state.vertex_shader.unknown_ptr1 = r300->state.vertex_shader.program_end; /* magic value ? */ - r300->state.vertex_shader.unknown_ptr2 = r300->state.vertex_shader.program_end; /* magic value ? */ - r300->state.vertex_shader.unknown_ptr3 = r300->state.vertex_shader.program_end; /* magic value ? */ + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = + (0 << R300_PVS_FIRST_INST_SHIFT) | + (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = + (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = + (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); +} +static int bit_count (int x) +{ + x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U); + x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U); + x = (x >> 16) + (x & 0xffff); + x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f); + return (x >> 8) + (x & 0x00ff); } -static void r300SetupVertexProgram(r300ContextPtr rmesa) +static void r300SetupRealVertexProgram(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; - int inst_count; - int param_count; - struct r300_vertex_program *prog = - (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + struct r300_vertex_program *prog = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + int inst_count = 0; + int param_count = 0; - ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + /* FIXME: r300SetupVertexProgramFragment */ R300_STATECHANGE(rmesa, vpp); param_count = - r300VertexProgUpdateParams(ctx, (struct r300_vertex_program_cont *) - ctx->VertexProgram._Current /*prog */ , + r300VertexProgUpdateParams(ctx, + (struct r300_vertex_program_cont *) + ctx->VertexProgram._Current, (float *)&rmesa->hw.vpp. cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - /* Reset state, in case we don't use something */ - ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; - ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); + inst_count = (prog->program.length / 4) - 1; - setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, &(prog->program)); - -#if 0 - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1, - &(rmesa->state.vertex_shader.unknown1)); - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2, - &(rmesa->state.vertex_shader.unknown2)); -#endif - - inst_count = prog->program.length / 4 - 1; + r300VapCntl(rmesa, bit_count(prog->key.InputsRead), + bit_count(prog->key.OutputsWritten), prog->num_temporaries); R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) - | (inst_count /*pos_end */ << R300_PVS_CNTL_1_POS_END_SHIFT) - | (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); + (0 << R300_PVS_FIRST_INST_SHIFT) | + (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) - | (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); + (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (0 /*rmesa->state.vertex_shader.unknown_ptr2 */ << - R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) - | (inst_count /*rmesa->state.vertex_shader.unknown_ptr3 */ << - 0); - - /* This is done for vertex shader fragments, but also needs to be done for vap_pvs, - so I leave it as a reminder */ -#if 0 - reg_start(R300_VAP_PVS_WAITIDLE, 0); - e32(0x00000000); -#endif + (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); } -static void r300SetupVertexShader(r300ContextPtr rmesa) +static void r300SetupVertexProgram(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; @@ -1788,50 +2120,71 @@ static void r300SetupVertexShader(r300ContextPtr rmesa) 0x400 area might have something to do with pixel shaders as it appears right after pfs programming. 0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */ //setup_vertex_shader_fragment(rmesa, 0x406, &unk4); - if (hw_tcl_on - && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))-> - translated) { - r300SetupVertexProgram(rmesa); - return; + if (hw_tcl_on && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->translated) { + r300SetupRealVertexProgram(rmesa); + } else { + /* FIXME: This needs to be replaced by vertex shader generation code. */ + r300SetupDefaultVertexProgram(rmesa); } - /* This needs to be replaced by vertex shader generation code */ - r300GenerateSimpleVertexShader(rmesa); - - setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, - &(rmesa->state.vertex_shader.program)); - -#if 0 - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1, - &(rmesa->state.vertex_shader.unknown1)); - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2, - &(rmesa->state.vertex_shader.unknown2)); -#endif +} - R300_STATECHANGE(rmesa, pvs); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (rmesa->state.vertex_shader. - program_start << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) - | (rmesa->state.vertex_shader. - unknown_ptr1 << R300_PVS_CNTL_1_POS_END_SHIFT) - | (rmesa->state.vertex_shader. - program_end << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (rmesa->state.vertex_shader. - param_offset << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) - | (rmesa->state.vertex_shader. - param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (rmesa->state.vertex_shader. - unknown_ptr2 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) - | (rmesa->state.vertex_shader.unknown_ptr3 << 0); +/** + * Enable/Disable states. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) +{ + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(cap), + state ? "GL_TRUE" : "GL_FALSE"); - /* This is done for vertex shader fragments, but also needs to be done for vap_pvs, - so I leave it as a reminder */ -#if 0 - reg_start(R300_VAP_PVS_WAITIDLE, 0); - e32(0x00000000); -#endif + switch (cap) { + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + /* empty */ + break; + case GL_FOG: + r300SetFogState(ctx, state); + break; + case GL_ALPHA_TEST: + r300SetAlphaState(ctx); + break; + case GL_COLOR_LOGIC_OP: + r300SetLogicOpState(ctx); + /* fall-through, because logic op overrides blending */ + case GL_BLEND: + r300SetBlendState(ctx); + break; + case GL_CLIP_PLANE0: + case GL_CLIP_PLANE1: + case GL_CLIP_PLANE2: + case GL_CLIP_PLANE3: + case GL_CLIP_PLANE4: + case GL_CLIP_PLANE5: + r300SetClipPlaneState(ctx, cap, state); + break; + case GL_DEPTH_TEST: + r300SetDepthState(ctx); + break; + case GL_STENCIL_TEST: + r300SetStencilState(ctx, state); + break; + case GL_CULL_FACE: + r300UpdateCulling(ctx); + break; + case GL_POLYGON_OFFSET_POINT: + case GL_POLYGON_OFFSET_LINE: + case GL_POLYGON_OFFSET_FILL: + r300SetPolygonOffsetState(ctx, state); + break; + default: + radeonEnable(ctx, cap, state); + break; + } } /** @@ -1848,12 +2201,6 @@ static void r300ResetHwState(r300ContextPtr r300) if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "%s\n", __FUNCTION__); - /* This is a place to initialize registers which - have bitfields accessed by different functions - and not all bits are used */ - - /* go and compute register values from GL state */ - r300UpdateWindow(ctx); r300ColorMask(ctx, @@ -1879,17 +2226,11 @@ static void r300ResetHwState(r300ContextPtr r300) r300UpdateTextureState(ctx); r300SetBlendState(ctx); + r300SetLogicOpState(ctx); r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef); r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); - /* Initialize magic registers - TODO : learn what they really do, or get rid of - those we don't have to touch */ - if (!has_tcl) - r300->hw.vap_cntl.cmd[1] = 0x0014045a; - else - r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */ r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA | R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA @@ -1898,123 +2239,137 @@ static void r300ResetHwState(r300ContextPtr r300) | R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT; r300->hw.vte.cmd[2] = 0x00000008; - r300->hw.unk2134.cmd[1] = 0x00FFFFFF; - r300->hw.unk2134.cmd[2] = 0x00000000; - if (_mesa_little_endian()) - r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP; - else - r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP; + r300->hw.vap_vf_max_vtx_indx.cmd[1] = 0x00FFFFFF; + r300->hw.vap_vf_max_vtx_indx.cmd[2] = 0x00000000; + +#ifdef MESA_LITTLE_ENDIAN + r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP; +#else + r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP; +#endif /* disable VAP/TCL on non-TCL capable chips */ if (!has_tcl) r300->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS; - r300->hw.unk21DC.cmd[1] = 0xAAAAAAAA; + r300->hw.vap_psc_sgn_norm_cntl.cmd[1] = 0xAAAAAAAA; - r300->hw.vap_clip_cntl.cmd[1] = R300_221C_NORMAL; + /* XXX: Other families? */ + if (has_tcl) { + r300->hw.vap_clip_cntl.cmd[1] = R300_PS_UCP_MODE_DIST_COP; - r300->hw.unk2220.cmd[1] = r300PackFloat32(1.0); - r300->hw.unk2220.cmd[2] = r300PackFloat32(1.0); - r300->hw.unk2220.cmd[3] = r300PackFloat32(1.0); - r300->hw.unk2220.cmd[4] = r300PackFloat32(1.0); + r300->hw.vap_clip.cmd[1] = r300PackFloat32(1.0); /* X */ + r300->hw.vap_clip.cmd[2] = r300PackFloat32(1.0); /* X */ + r300->hw.vap_clip.cmd[3] = r300PackFloat32(1.0); /* Y */ + r300->hw.vap_clip.cmd[4] = r300PackFloat32(1.0); /* Y */ - /* what about other chips than r300 or rv350??? */ - if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) - r300->hw.unk2288.cmd[1] = R300_2288_R300; - else - r300->hw.unk2288.cmd[1] = R300_2288_RV350; + switch (r300->radeon.radeonScreen->chip_family) { + case CHIP_FAMILY_R300: + r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_R300; + break; + default: + r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_RV350; + break; + } + } r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE | R300_GB_LINE_STUFF_ENABLE - | R300_GB_TRIANGLE_STUFF_ENABLE /*| R300_GB_UNK31 */ ; + | R300_GB_TRIANGLE_STUFF_ENABLE; r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666; r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666; - if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350)) - r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = - R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_R300 | - R300_GB_TILE_SIZE_16; - else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) - r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = - R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_RV410 | - R300_GB_TILE_SIZE_16; - else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) - r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = - R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_R420 | - R300_GB_TILE_SIZE_16; - else - r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = - R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_RV300 | - R300_GB_TILE_SIZE_16; - /* set to 0 when fog is disabled? */ + + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = + R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16 /*| R300_GB_SUBPIXEL_1_16*/; + switch (r300->radeon.radeonScreen->num_gb_pipes) { + case 1: + default: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_RV300; + break; + case 2: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_R300; + break; + case 3: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_R420_3P; + break; + case 4: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_R420; + break; + } + + /* XXX: set to 0 when fog is disabled? */ r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = R300_GB_FOG_SELECT_1_1_W; - r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = R300_AA_DISABLE; /* No antialiasing */ - r300->hw.unk4200.cmd[1] = r300PackFloat32(0.0); - r300->hw.unk4200.cmd[2] = r300PackFloat32(0.0); - r300->hw.unk4200.cmd[3] = r300PackFloat32(1.0); - r300->hw.unk4200.cmd[4] = r300PackFloat32(1.0); + /* XXX: Enable anti-aliasing? */ + r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE; - r300->hw.unk4214.cmd[1] = 0x00050005; + r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0); + r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0); + r300->hw.ga_point_s0.cmd[3] = r300PackFloat32(1.0); + r300->hw.ga_point_s0.cmd[4] = r300PackFloat32(1.0); - r300PointSize(ctx, 0.0); + r300->hw.ga_triangle_stipple.cmd[1] = 0x00050005; - r300->hw.unk4230.cmd[1] = 0x18000006; - r300->hw.unk4230.cmd[2] = 0x00020006; - r300->hw.unk4230.cmd[3] = r300PackFloat32(1.0 / 192.0); + r300PointSize(ctx, 1.0); - r300LineWidth(ctx, 0.0); + r300->hw.ga_point_minmax.cmd[1] = 0x18000006; + r300->hw.ga_point_minmax.cmd[2] = 0x00020006; + r300->hw.ga_point_minmax.cmd[3] = r300PackFloat32(1.0 / 192.0); - r300->hw.unk4260.cmd[1] = 0; - r300->hw.unk4260.cmd[2] = r300PackFloat32(0.0); - r300->hw.unk4260.cmd[3] = r300PackFloat32(1.0); + r300LineWidth(ctx, 1.0); + + r300->hw.ga_line_stipple.cmd[1] = 0; + r300->hw.ga_line_stipple.cmd[2] = r300PackFloat32(0.0); + r300->hw.ga_line_stipple.cmd[3] = r300PackFloat32(1.0); - r300->hw.shade.cmd[1] = 0x00000002; r300ShadeModel(ctx, ctx->Light.ShadeModel); - r300->hw.shade.cmd[3] = 0x00000000; - r300->hw.shade.cmd[4] = 0x00000000; r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode); r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode); - r300->hw.polygon_mode.cmd[2] = 0x00000001; - r300->hw.polygon_mode.cmd[3] = 0x00000000; r300->hw.zbias_cntl.cmd[1] = 0x00000000; r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, ctx->Polygon.OffsetUnits); + r300Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint); + r300Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine); r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill); - r300->hw.unk42C0.cmd[1] = 0x4B7FFFFF; - r300->hw.unk42C0.cmd[2] = 0x00000000; + r300->hw.su_depth_scale.cmd[1] = 0x4B7FFFFF; + r300->hw.su_depth_scale.cmd[2] = 0x00000000; - r300->hw.unk43A4.cmd[1] = 0x0000001C; - r300->hw.unk43A4.cmd[2] = 0x2DA49525; + r300->hw.sc_hyperz.cmd[1] = 0x0000001C; + r300->hw.sc_hyperz.cmd[2] = 0x2DA49525; - r300->hw.unk43E8.cmd[1] = 0x00FFFFFF; + r300->hw.sc_screendoor.cmd[1] = 0x00FFFFFF; - r300->hw.unk46A4.cmd[1] = 0x00001B01; - r300->hw.unk46A4.cmd[2] = 0x00001B0F; - r300->hw.unk46A4.cmd[3] = 0x00001B0F; - r300->hw.unk46A4.cmd[4] = 0x00001B0F; - r300->hw.unk46A4.cmd[5] = 0x00000001; + r300->hw.us_out_fmt.cmd[1] = R500_OUT_FMT_C4_8 | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[2] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[3] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W24; r300Enable(ctx, GL_FOG, ctx->Fog.Enabled); - ctx->Driver.Fogfv(ctx, GL_FOG_MODE, NULL); - ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); - ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); - ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); - ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); - ctx->Driver.Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL); + r300Fogfv(ctx, GL_FOG_MODE, NULL); + r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); + r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); + r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); + r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); + r300Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL); - r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; - r300->hw.unk4BD8.cmd[1] = 0; + r300->hw.fg_depth_src.cmd[1] = 0; - r300->hw.unk4E00.cmd[1] = 0; + r300->hw.rb3d_cctl.cmd[1] = 0; r300BlendColor(ctx, ctx->Color.BlendColor); - r300->hw.blend_color.cmd[2] = 0; - r300->hw.blend_color.cmd[3] = 0; /* Again, r300ClearBuffer uses this */ r300->hw.cb.cmd[R300_CB_OFFSET] = @@ -2030,39 +2385,20 @@ static void r300ResetHwState(r300ContextPtr r300) if (r300->radeon.sarea->tiling_enabled) r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; - r300->hw.unk4E50.cmd[1] = 0; - r300->hw.unk4E50.cmd[2] = 0; - r300->hw.unk4E50.cmd[3] = 0; - r300->hw.unk4E50.cmd[4] = 0; - r300->hw.unk4E50.cmd[5] = 0; - r300->hw.unk4E50.cmd[6] = 0; - r300->hw.unk4E50.cmd[7] = 0; - r300->hw.unk4E50.cmd[8] = 0; - r300->hw.unk4E50.cmd[9] = 0; - - r300->hw.unk4E88.cmd[1] = 0; + r300->hw.rb3d_dither_ctl.cmd[1] = 0; + r300->hw.rb3d_dither_ctl.cmd[2] = 0; + r300->hw.rb3d_dither_ctl.cmd[3] = 0; + r300->hw.rb3d_dither_ctl.cmd[4] = 0; + r300->hw.rb3d_dither_ctl.cmd[5] = 0; + r300->hw.rb3d_dither_ctl.cmd[6] = 0; + r300->hw.rb3d_dither_ctl.cmd[7] = 0; + r300->hw.rb3d_dither_ctl.cmd[8] = 0; + r300->hw.rb3d_dither_ctl.cmd[9] = 0; - r300->hw.unk4EA0.cmd[1] = 0x00000000; - r300->hw.unk4EA0.cmd[2] = 0xffffffff; + r300->hw.rb3d_aaresolve_ctl.cmd[1] = 0; - switch (ctx->Visual.depthBits) { - case 16: - r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; - break; - case 24: - r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; - break; - default: - fprintf(stderr, "Error: Unsupported depth %d... exiting\n", - ctx->Visual.depthBits); - _mesa_exit(-1); - - } - /* z compress? */ - //r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32; - - r300->hw.zstencil_format.cmd[3] = 0x00000003; - r300->hw.zstencil_format.cmd[4] = 0x00000000; + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; r300->hw.zb.cmd[R300_ZB_OFFSET] = r300->radeon.radeonScreen->depthOffset + @@ -2070,37 +2406,51 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch; if (r300->radeon.sarea->tiling_enabled) { - /* Turn off when clearing buffers ? */ - r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTH_TILE_ENABLE; + /* XXX: Turn off when clearing buffers ? */ + r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE; if (ctx->Visual.depthBits == 24) r300->hw.zb.cmd[R300_ZB_PITCH] |= - R300_DEPTH_MICROTILE_ENABLE; + R300_DEPTHMICROTILE_TILED; + } + + r300->hw.zb_depthclearvalue.cmd[1] = 0; + + switch (ctx->Visual.depthBits) { + case 16: + r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_16BIT_INT_Z; + break; + case 24: + r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; + break; + default: + fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits); + _mesa_exit(-1); } - r300->hw.unk4F28.cmd[1] = 0; + r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE; + r300->hw.zstencil_format.cmd[3] = 0x00000003; + r300->hw.zstencil_format.cmd[4] = 0x00000000; + r300SetEarlyZState(ctx); r300->hw.unk4F30.cmd[1] = 0; r300->hw.unk4F30.cmd[2] = 0; - r300->hw.unk4F44.cmd[1] = 0; + r300->hw.zb_hiz_offset.cmd[1] = 0; - r300->hw.unk4F54.cmd[1] = 0; + r300->hw.zb_hiz_pitch.cmd[1] = 0; + r300VapCntl(r300, 0, 0, 0); if (has_tcl) { r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0; r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0; r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0); r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0; } -//END: TODO + r300->hw.all_dirty = GL_TRUE; } - -extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx); - -extern int future_hw_tcl_on; void r300UpdateShaders(r300ContextPtr rmesa) { GLcontext *ctx; @@ -2136,18 +2486,40 @@ void r300UpdateShaders(r300ContextPtr rmesa) hw_tcl_on = future_hw_tcl_on = 0; r300ResetHwState(rmesa); + r300UpdateStateParameters(ctx, _NEW_PROGRAM); return; } - r300UpdateStateParameters(ctx, _NEW_PROGRAM); } + r300UpdateStateParameters(ctx, _NEW_PROGRAM); +} +static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, + struct gl_program *program, struct prog_src_register srcreg) +{ + static const GLfloat dummy[4] = { 0, 0, 0, 0 }; + + switch(srcreg.File) { + case PROGRAM_LOCAL_PARAM: + return program->LocalParams[srcreg.Index]; + case PROGRAM_ENV_PARAM: + return ctx->FragmentProgram.Parameters[srcreg.Index]; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + return program->Parameters->ParameterValues[srcreg.Index]; + default: + _mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n"); + return dummy; + } } + static void r300SetupPixelShader(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; struct r300_fragment_program *fp = (struct r300_fragment_program *) (char *)ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code; int i, k; if (!fp) /* should only happenen once, just after context is created */ @@ -2159,62 +2531,143 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) __FUNCTION__); return; } -#define OUTPUT_FIELD(st, reg, field) \ - R300_STATECHANGE(rmesa, st); \ - for(i=0;i<=fp->alu_end;i++) \ - rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=fp->alu.inst[i].field;\ - rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmdpacket0(reg, fp->alu_end+1); + code = &fp->code; - OUTPUT_FIELD(fpi[0], R300_PFS_INSTR0_0, inst0); - OUTPUT_FIELD(fpi[1], R300_PFS_INSTR1_0, inst1); - OUTPUT_FIELD(fpi[2], R300_PFS_INSTR2_0, inst2); - OUTPUT_FIELD(fpi[3], R300_PFS_INSTR3_0, inst3); -#undef OUTPUT_FIELD + r300SetupTextures(ctx); + + R300_STATECHANGE(rmesa, fpi[0]); + R300_STATECHANGE(rmesa, fpi[1]); + R300_STATECHANGE(rmesa, fpi[2]); + R300_STATECHANGE(rmesa, fpi[3]); + rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu.length); + rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu.length); + rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu.length); + rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0; + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1; + rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2; + rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3; + } R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3); + rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx; + rmesa->hw.fp.cmd[R300_FP_CNTL2] = + (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | + (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); /* I just want to say, the way these nodes are stored.. weird.. */ - for (i = 0, k = (4 - (fp->cur_node + 1)); i < 4; i++, k++) { - if (i < (fp->cur_node + 1)) { + for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) { + if (i < (code->cur_node + 1)) { rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = - (fp->node[i]. - alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT) - | (fp->node[i]. - alu_end << R300_PFS_NODE_ALU_END_SHIFT) - | (fp->node[i]. - tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT) - | (fp->node[i]. - tex_end << R300_PFS_NODE_TEX_END_SHIFT) - | fp->node[i].flags; /* ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0); */ + (code->node[i].alu_offset << R300_ALU_START_SHIFT) | + (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) | + (code->node[i].tex_offset << R300_TEX_START_SHIFT) | + (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) | + code->node[i].flags; } else { rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; } } - /* PFS_CNTL_0 */ - rmesa->hw.fp.cmd[R300_FP_CNTL0] = - fp->cur_node | (fp->first_node_has_tex << 3); - /* PFS_CNTL_1 */ - rmesa->hw.fp.cmd[R300_FP_CNTL1] = fp->max_temp_idx; - /* PFS_CNTL_2 */ - rmesa->hw.fp.cmd[R300_FP_CNTL2] = - (fp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT) - | (fp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT) - | (fp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT) - | (fp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT); - R300_STATECHANGE(rmesa, fpp); - for (i = 0; i < fp->const_nr; i++) { - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = - r300PackFloat24(fp->constant[i][0]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = - r300PackFloat24(fp->constant[i][1]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = - r300PackFloat24(fp->constant[i][2]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = - r300PackFloat24(fp->constant[i][3]); + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4); + for (i = 0; i < code->const_nr; i++) { + const GLfloat *constant = get_fragmentprogram_constant(ctx, + &fp->mesa_program.Base, code->constant[i]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]); + } +} + +#define bump_r500fp_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/6; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + +#define bump_r500fp_const_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + +static void r500SetupPixelShader(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r500_fragment_program *fp = (struct r500_fragment_program *) + (char *)ctx->FragmentProgram._Current; + int i; + struct r500_fragment_program_code *code; + + if (!fp) /* should only happenen once, just after context is created */ + return; + + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; + + r500TranslateFragmentShader(rmesa, fp); + if (!fp->translated) { + fprintf(stderr, "%s: No valid fragment shader, exiting\n", + __FUNCTION__); + return; + } + code = &fp->code; + + if (fp->mesa_program.FogOption != GL_NONE) { + /* Enable HW fog. Try not to squish GL context. + * (Anybody sane remembered to set glFog() opts first!) */ + r300SetFogState(ctx, GL_TRUE); + ctx->Fog.Mode = fp->mesa_program.FogOption; + r300Fogfv(ctx, GL_FOG_MODE, NULL); + } else + /* Make sure HW is matching GL context. */ + r300SetFogState(ctx, ctx->Fog.Enabled); + + r300SetupTextures(ctx); + + R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; + + rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] = + R500_US_CODE_START_ADDR(code->inst_offset) | + R500_US_CODE_END_ADDR(code->inst_end); + rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] = + R500_US_CODE_RANGE_ADDR(code->inst_offset) | + R500_US_CODE_RANGE_SIZE(code->inst_end); + rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] = + R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */ + + R300_STATECHANGE(rmesa, r500fp); + /* Emit our shader... */ + for (i = 0; i < code->inst_end+1; i++) { + rmesa->hw.r500fp.cmd[i*6+1] = code->inst[i].inst0; + rmesa->hw.r500fp.cmd[i*6+2] = code->inst[i].inst1; + rmesa->hw.r500fp.cmd[i*6+3] = code->inst[i].inst2; + rmesa->hw.r500fp.cmd[i*6+4] = code->inst[i].inst3; + rmesa->hw.r500fp.cmd[i*6+5] = code->inst[i].inst4; + rmesa->hw.r500fp.cmd[i*6+6] = code->inst[i].inst5; + } + + bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6); + + R300_STATECHANGE(rmesa, r500fp_const); + for (i = 0; i < code->const_nr; i++) { + const GLfloat *constant = get_fragmentprogram_constant(ctx, + &fp->mesa_program.Base, code->constant[i]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]); } - rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = - cmdpacket0(R300_PFS_PARAM_0_X, fp->const_nr * 4); + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); + } void r300UpdateShaderStates(r300ContextPtr rmesa) @@ -2223,13 +2676,29 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) ctx = rmesa->radeon.glCtx; r300UpdateTextureState(ctx); + r300SetEarlyZState(ctx); - r300SetupPixelShader(rmesa); - r300SetupTextures(ctx); + GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; + if (current_fragment_program_writes_depth(ctx)) + fgdepthsrc = R300_FG_DEPTH_SRC_SHADER; + if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) { + R300_STATECHANGE(rmesa, fg_depth_src); + rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc; + } + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupPixelShader(rmesa); + else + r300SetupPixelShader(rmesa); + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupRSUnit(ctx); + else + r300SetupRSUnit(ctx); if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - r300SetupVertexShader(rmesa); - r300SetupRSUnit(ctx); + r300SetupVertexProgram(rmesa); + } /** @@ -2269,16 +2738,12 @@ void r300InitState(r300ContextPtr r300) switch (ctx->Visual.depthBits) { case 16: r300->state.depth.scale = 1.0 / (GLfloat) 0xffff; - depth_fmt = R300_DEPTH_FORMAT_16BIT_INT_Z; - r300->state.stencil.clear = 0x00000000; + depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z; break; case 24: r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff; - depth_fmt = R300_DEPTH_FORMAT_24BIT_INT_Z; - r300->state.stencil.clear = 0x00ff0000; + depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; break; - - default: fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits); @@ -2301,29 +2766,15 @@ static void r300RenderMode(GLcontext * ctx, GLenum mode) (void)mode; } -static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0; - GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; - - R300_STATECHANGE( rmesa, vpucp[p] ); - rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; - rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; - rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2]; - rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3]; -} - - void r300UpdateClipPlanes( GLcontext *ctx ) { r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint p; - + for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; - + R300_STATECHANGE( rmesa, vpucp[p] ); rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; @@ -2353,9 +2804,12 @@ void r300InitStateFuncs(struct dd_function_table *functions) functions->Fogfv = r300Fogfv; functions->FrontFace = r300FrontFace; functions->ShadeModel = r300ShadeModel; + functions->LogicOpcode = r300LogicOpcode; + + /* ARB_point_parameters */ + functions->PointParameterfv = r300PointParameter; /* Stencil related */ - functions->ClearStencil = r300ClearStencil; functions->StencilFuncSeparate = r300StencilFuncSeparate; functions->StencilMaskSeparate = r300StencilMaskSeparate; functions->StencilOpSeparate = r300StencilOpSeparate; diff --git a/r300/r300_state.h b/r300/r300_state.h index 21a49b7..0589ab7 100644 --- a/r300/r300_state.h +++ b/r300/r300_state.h @@ -37,8 +37,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" +#define R300_NEWPRIM( rmesa ) \ + do { \ + if ( rmesa->dma.flush ) \ + rmesa->dma.flush( rmesa ); \ + } while (0) + #define R300_STATECHANGE(r300, atom) \ do { \ + R300_NEWPRIM(r300); \ r300->hw.atom.dirty = GL_TRUE; \ r300->hw.is_dirty = GL_TRUE; \ } while(0) @@ -58,13 +65,16 @@ do { \ \ } while (0) -extern void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state); -extern void r300InitState(r300ContextPtr r300); -extern void r300InitStateFuncs(struct dd_function_table *functions); -extern void r300UpdateViewportOffset(GLcontext * ctx); -extern void r300UpdateDrawBuffer(GLcontext * ctx); - -extern void r300UpdateShaders(r300ContextPtr rmesa); -extern void r300UpdateShaderStates(r300ContextPtr rmesa); +// r300_state.c +extern int future_hw_tcl_on; +void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx); +void r300UpdateViewportOffset (GLcontext * ctx); +void r300UpdateDrawBuffer (GLcontext * ctx); +void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state); +void r300UpdateShaders (r300ContextPtr rmesa); +void r300UpdateShaderStates (r300ContextPtr rmesa); +void r300InitState (r300ContextPtr r300); +void r300UpdateClipPlanes (GLcontext * ctx); +void r300InitStateFuncs (struct dd_function_table *functions); #endif /* __R300_STATE_H__ */ diff --git a/r300/r300_swtcl.c b/r300/r300_swtcl.c new file mode 100644 index 0000000..8aebd9b --- /dev/null +++ b/r300/r300_swtcl.c @@ -0,0 +1,697 @@ +/************************************************************************** + +Copyright (C) 2007 Dave Airlie + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Dave Airlie <airlied@linux.ie> + */ + +/* derived from r200 swtcl path */ + + + +#include "glheader.h" +#include "mtypes.h" +#include "colormac.h" +#include "enums.h" +#include "image.h" +#include "imports.h" +#include "light.h" +#include "macros.h" + +#include "swrast/s_context.h" +#include "swrast/s_fog.h" +#include "swrast_setup/swrast_setup.h" +#include "math/m_translate.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" +#include "tnl/t_pipeline.h" + +#include "r300_context.h" +#include "r300_swtcl.h" +#include "r300_state.h" +#include "r300_ioctl.h" +#include "r300_emit.h" +#include "r300_mem.h" + +static void flush_last_swtcl_prim( r300ContextPtr rmesa ); + + +void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset); +void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr); +#define EMIT_ATTR( ATTR, STYLE ) \ +do { \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->swtcl.vertex_attr_count++; \ +} while (0) + +#define EMIT_PAD( N ) \ +do { \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \ + rmesa->swtcl.vertex_attr_count++; \ +} while (0) + +static void r300SetVertexFormat( GLcontext *ctx ) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + DECLARE_RENDERINPUTS(index_bitset); + GLuint InputsRead = 0, OutputsWritten = 0; + int vap_fmt_0 = 0; + int vap_vte_cntl = 0; + int offset = 0; + int vte = 0; + GLint inputs[VERT_ATTRIB_MAX]; + GLint tab[VERT_ATTRIB_MAX]; + int swizzle[VERT_ATTRIB_MAX][4]; + GLuint i, nr; + GLuint sz, vap_fmt_1 = 0; + + DECLARE_RENDERINPUTS(render_inputs_bitset); + RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); + RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); + RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); + + vte = rmesa->hw.vte.cmd[1]; + vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT); + /* Important: + */ + if ( VB->NdcPtr != NULL ) { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; + vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT; + } + else { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + vte |= R300_VTX_W0_FMT; + } + + assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); + rmesa->swtcl.vertex_attr_count = 0; + + /* EMIT_ATTR's must be in order as they tell t_vertex.c how to + * build up a hardware vertex. + */ + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) { + sz = VB->AttribPtr[VERT_ATTRIB_POS]->size; + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 ); + offset = sz; + } else { + offset = 4; + EMIT_PAD(4 * sizeof(float)); + } + + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { + EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); + vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + offset += 1; + } + + if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) { + sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size; + rmesa->swtcl.coloroffset = offset; + InputsRead |= 1 << VERT_ATTRIB_COLOR0; + OutputsWritten |= 1 << VERT_RESULT_COL0; + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 ); + offset += sz; + } + + rmesa->swtcl.specoffset = 0; + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size; + rmesa->swtcl.specoffset = offset; + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 ); + InputsRead |= 1 << VERT_ATTRIB_COLOR1; + OutputsWritten |= 1 << VERT_RESULT_COL1; + } + + if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + sz = VB->TexCoordPtr[i]->size; + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 ); + vap_fmt_1 |= sz << (3 * i); + } + } + } + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { + inputs[i] = nr++; + } else { + inputs[i] = -1; + } + } + + /* Fixed, apply to vir0 only */ + if (InputsRead & (1 << VERT_ATTRIB_POS)) + inputs[VERT_ATTRIB_POS] = 0; + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + inputs[VERT_ATTRIB_COLOR0] = 2; + if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) + inputs[VERT_ATTRIB_COLOR1] = 3; + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) + if (InputsRead & (1 << i)) + inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { + tab[nr++] = i; + } + } + + for (i = 0; i < nr; i++) { + int ci; + + swizzle[i][0] = SWIZZLE_ZERO; + swizzle[i][1] = SWIZZLE_ZERO; + swizzle[i][2] = SWIZZLE_ZERO; + swizzle[i][3] = SWIZZLE_ONE; + + for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) { + swizzle[i][ci] = ci; + } + } + + R300_NEWPRIM(rmesa); + R300_STATECHANGE(rmesa, vir[0]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = + r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], + VB->AttribPtr, inputs, tab, nr); + R300_STATECHANGE(rmesa, vir[1]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = + r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr); + + R300_STATECHANGE(rmesa, vic); + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); + + R300_STATECHANGE(rmesa, vof); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; + + rmesa->swtcl.vertex_size = + _tnl_install_attrs( ctx, + rmesa->swtcl.vertex_attrs, + rmesa->swtcl.vertex_attr_count, + NULL, 0 ); + + rmesa->swtcl.vertex_size /= 4; + + RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); + + + R300_STATECHANGE(rmesa, vte); + rmesa->hw.vte.cmd[1] = vte; + rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size; +} + + +/* Flush vertices in the current dma region. + */ +static void flush_last_swtcl_prim( r300ContextPtr rmesa ) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + rmesa->dma.flush = NULL; + + if (rmesa->dma.current.buf) { + struct r300_dma_region *current = &rmesa->dma.current; + GLuint current_offset = GET_START(current); + + assert (current->start + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + current->ptr); + + if (rmesa->dma.current.start != rmesa->dma.current.ptr) { + + r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__); + + r300EmitState(rmesa); + + r300EmitVertexAOS( rmesa, + rmesa->swtcl.vertex_size, + current_offset); + + r300EmitVbufPrim( rmesa, + rmesa->swtcl.hw_primitive, + rmesa->swtcl.numverts); + + r300EmitCacheFlush(rmesa); + } + + rmesa->swtcl.numverts = 0; + current->start = current->ptr; + } +} + +/* Alloc space in the current dma region. + */ +static void * +r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) +{ + GLuint bytes = vsize * nverts; + + if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) + r300RefillCurrentDmaRegion( rmesa, bytes); + + if (!rmesa->dma.flush) { + rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + rmesa->dma.flush = flush_last_swtcl_prim; + } + + ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); + ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); + ASSERT( rmesa->dma.current.start + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + rmesa->dma.current.ptr ); + + { + GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr); + rmesa->dma.current.ptr += bytes; + rmesa->swtcl.numverts += nverts; + return head; + } +} + +static GLuint reduced_prim[] = { + GL_POINTS, + GL_LINES, + GL_LINES, + GL_LINES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, +}; + +static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); +static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); +//static void r300ResetLineStipple( GLcontext *ctx ); + +/*********************************************************************** + * Emit primitives as inline vertices * + ***********************************************************************/ + + +#define HAVE_POINTS 1 +#define HAVE_LINES 1 +#define HAVE_LINE_STRIPS 1 +#define HAVE_TRIANGLES 1 +#define HAVE_TRI_STRIPS 1 +#define HAVE_TRI_STRIP_1 0 +#define HAVE_TRI_FANS 1 +#define HAVE_QUADS 0 +#define HAVE_QUAD_STRIPS 0 +#define HAVE_POLYGONS 1 +#define HAVE_ELTS 1 + +#undef LOCAL_VARS +#undef ALLOC_VERTS +#define CTX_ARG r300ContextPtr rmesa +#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size +#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 ) +#define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + const char *r300verts = (char *)rmesa->swtcl.verts; +#define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) +#define VERTEX r300Vertex +#define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) +#define PRINT_VERTEX(x) +#undef TAG +#define TAG(x) r300_##x +#include "tnl_dd/t_dd_triemit.h" + + + +/*********************************************************************** + * Macros for t_dd_tritmp.h to draw basic primitives * + ***********************************************************************/ + +#define QUAD( a, b, c, d ) r300_quad( rmesa, a, b, c, d ) +#define TRI( a, b, c ) r300_triangle( rmesa, a, b, c ) +#define LINE( a, b ) r300_line( rmesa, a, b ) +#define POINT( a ) r300_point( rmesa, a ) + +/*********************************************************************** + * Build render functions from dd templates * + ***********************************************************************/ + +#define R300_TWOSIDE_BIT 0x01 +#define R300_UNFILLED_BIT 0x02 +#define R300_MAX_TRIFUNC 0x04 + +static struct { + tnl_points_func points; + tnl_line_func line; + tnl_triangle_func triangle; + tnl_quad_func quad; +} rast_tab[R300_MAX_TRIFUNC]; + +#define DO_FALLBACK 0 +#define DO_UNFILLED (IND & R300_UNFILLED_BIT) +#define DO_TWOSIDE (IND & R300_TWOSIDE_BIT) +#define DO_FLAT 0 +#define DO_OFFSET 0 +#define DO_TRI 1 +#define DO_QUAD 1 +#define DO_LINE 1 +#define DO_POINTS 1 +#define DO_FULL_QUAD 1 + +#define HAVE_RGBA 1 +#define HAVE_SPEC 1 +#define HAVE_BACK_COLORS 0 +#define HAVE_HW_FLATSHADE 1 +#define TAB rast_tab + +#define DEPTH_SCALE 1.0 +#define UNFILLED_TRI unfilled_tri +#define UNFILLED_QUAD unfilled_quad +#define VERT_X(_v) _v->v.x +#define VERT_Y(_v) _v->v.y +#define VERT_Z(_v) _v->v.z +#define AREA_IS_CCW( a ) (a < 0) +#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int))) + +/* Only used to pull back colors into vertices (ie, we know color is + * floating point). + */ +#define R300_COLOR( dst, src ) \ +do { \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]); \ +} while (0) + +#define VERT_SET_RGBA( v, c ) if (coloroffset) R300_COLOR( v->ub4[coloroffset], c ) +#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset] +#define VERT_SAVE_RGBA( idx ) if (coloroffset) color[idx] = v[idx]->ui[coloroffset] +#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx] + +#define R300_SPEC( dst, src ) \ +do { \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ +} while (0) + +#define VERT_SET_SPEC( v, c ) if (specoffset) R300_SPEC( v->ub4[specoffset], c ) +#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset]) +#define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset] +#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx] + +#undef LOCAL_VARS +#undef TAG +#undef INIT + +#define LOCAL_VARS(n) \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + GLuint color[n], spec[n]; \ + GLuint coloroffset = rmesa->swtcl.coloroffset; \ + GLuint specoffset = rmesa->swtcl.specoffset; \ + (void) color; (void) spec; (void) coloroffset; (void) specoffset; + +/*********************************************************************** + * Helpers for rendering unfilled primitives * + ***********************************************************************/ + +#define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] ) +#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive +#undef TAG +#define TAG(x) x +#include "tnl_dd/t_dd_unfilled.h" +#undef IND + + +/*********************************************************************** + * Generate GL render functions * + ***********************************************************************/ + + +#define IND (0) +#define TAG(x) x +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_TWOSIDE_BIT) +#define TAG(x) x##_twoside +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_UNFILLED_BIT) +#define TAG(x) x##_unfilled +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT) +#define TAG(x) x##_twoside_unfilled +#include "tnl_dd/t_dd_tritmp.h" + + + +static void init_rast_tab( void ) +{ + init(); + init_twoside(); + init_unfilled(); + init_twoside_unfilled(); +} + +/**********************************************************************/ +/* Render unclipped begin/end objects */ +/**********************************************************************/ + +#define RENDER_POINTS( start, count ) \ + for ( ; start < count ; start++) \ + r300_point( rmesa, VERT(start) ) +#define RENDER_LINE( v0, v1 ) \ + r300_line( rmesa, VERT(v0), VERT(v1) ) +#define RENDER_TRI( v0, v1, v2 ) \ + r300_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) ) +#define RENDER_QUAD( v0, v1, v2, v3 ) \ + r300_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) ) +#define INIT(x) do { \ + r300RenderPrimitive( ctx, x ); \ +} while (0) +#undef LOCAL_VARS +#define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + const GLuint vertsize = rmesa->swtcl.vertex_size; \ + const char *r300verts = (char *)rmesa->swtcl.verts; \ + const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ + const GLboolean stipple = ctx->Line.StippleFlag; \ + (void) elt; (void) stipple; +#define RESET_STIPPLE //if ( stipple ) r200ResetLineStipple( ctx ); +#define RESET_OCCLUSION +#define PRESERVE_VB_DEFS +#define ELT(x) (x) +#define TAG(x) r300_##x##_verts +#include "tnl/t_vb_rendertmp.h" +#undef ELT +#undef TAG +#define TAG(x) r300_##x##_elts +#define ELT(x) elt[x] +#include "tnl/t_vb_rendertmp.h" + + + + +/**********************************************************************/ +/* Choose render functions */ +/**********************************************************************/ +static void r300ChooseRenderState( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint index = 0; + GLuint flags = ctx->_TriangleCaps; + + if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT; + if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; + + if (index != rmesa->swtcl.RenderIndex) { + tnl->Driver.Render.Points = rast_tab[index].points; + tnl->Driver.Render.Line = rast_tab[index].line; + tnl->Driver.Render.ClippedLine = rast_tab[index].line; + tnl->Driver.Render.Triangle = rast_tab[index].triangle; + tnl->Driver.Render.Quad = rast_tab[index].quad; + + if (index == 0) { + tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts; + tnl->Driver.Render.PrimTabElts = r300_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly; + } else { + tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; + tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; + } + + rmesa->swtcl.RenderIndex = index; + } +} + + +static void r300RenderStart(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + // fprintf(stderr, "%s\n", __FUNCTION__); + + r300ChooseRenderState(ctx); + r300SetVertexFormat(ctx); + + r300UpdateShaders(rmesa); + r300UpdateShaderStates(rmesa); + + r300EmitCacheFlush(rmesa); + + if (rmesa->dma.flush != 0 && + rmesa->dma.flush != flush_last_swtcl_prim) + rmesa->dma.flush( rmesa ); + +} + +static void r300RenderFinish(GLcontext *ctx) +{ +} + +static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + if (rmesa->swtcl.hw_primitive != hwprim) { + R300_NEWPRIM( rmesa ); + rmesa->swtcl.hw_primitive = hwprim; + } +} + +static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) +{ + + r300ContextPtr rmesa = R300_CONTEXT(ctx); + rmesa->swtcl.render_primitive = prim; + + if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) + return; + + r300RasterPrimitive( ctx, reduced_prim[prim] ); + // fprintf(stderr, "%s\n", __FUNCTION__); + +} + +static void r300ResetLineStipple(GLcontext *ctx) +{ + + +} + +void r300InitSwtcl(GLcontext *ctx) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + static int firsttime = 1; + + if (firsttime) { + init_rast_tab(); + firsttime = 0; + } + + tnl->Driver.Render.Start = r300RenderStart; + tnl->Driver.Render.Finish = r300RenderFinish; + tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; + tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple; + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + /* FIXME: what are these numbers? */ + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + 48 * sizeof(GLfloat) ); + + rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; + rmesa->swtcl.RenderIndex = ~0; + rmesa->swtcl.render_primitive = GL_TRIANGLES; + rmesa->swtcl.hw_primitive = 0; + + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); + RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); + + _tnl_need_projected_coords( ctx, GL_FALSE ); + r300ChooseRenderState(ctx); + + _mesa_validate_all_lighting_tables( ctx ); + + tnl->Driver.NotifyMaterialChange = + _mesa_validate_all_lighting_tables; +} + +void r300DestroySwtcl(GLcontext *ctx) +{ +} + +void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset) +{ + int cmd_reserved = 0; + int cmd_written = 0; + + drm_radeon_cmd_header_t *cmd = NULL; + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", + __FUNCTION__, vertex_size, offset); + + start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2); + e32(1); + e32(vertex_size | (vertex_size << 8)); + e32(offset); +} + +void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) +{ + + int cmd_reserved = 0; + int cmd_written = 0; + int type, num_verts; + drm_radeon_cmd_header_t *cmd = NULL; + + type = r300PrimitiveType(rmesa, primitive); + num_verts = r300NumVerts(rmesa, vertex_nr, primitive); + + start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); + e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); +} diff --git a/r300/r300_swtcl.h b/r300/r300_swtcl.h new file mode 100644 index 0000000..2ea6ced --- /dev/null +++ b/r300/r300_swtcl.h @@ -0,0 +1,45 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> - original r200 code + * Dave Airlie <airlied@linux.ie> + */ + +#ifndef __R300_SWTCL_H__ +#define __R300_SWTCL_H__ + +#include "mtypes.h" +#include "swrast/swrast.h" +#include "r300_context.h" + +extern void r300InitSwtcl( GLcontext *ctx ); +extern void r300DestroySwtcl( GLcontext *ctx ); + +#endif diff --git a/r300/r300_tex.c b/r300/r300_tex.c index 2a21c61..f7f4972 100644 --- a/r300/r300_tex.c +++ b/r300/r300_tex.c @@ -52,129 +52,59 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "xmlpool.h" + +static unsigned int translate_wrap_mode(GLenum wrapmode) +{ + switch(wrapmode) { + case GL_REPEAT: return R300_TX_REPEAT; + case GL_CLAMP: return R300_TX_CLAMP; + case GL_CLAMP_TO_EDGE: return R300_TX_CLAMP_TO_EDGE; + case GL_CLAMP_TO_BORDER: return R300_TX_CLAMP_TO_BORDER; + case GL_MIRRORED_REPEAT: return R300_TX_REPEAT | R300_TX_MIRRORED; + case GL_MIRROR_CLAMP_EXT: return R300_TX_CLAMP | R300_TX_MIRRORED; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: return R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED; + default: + _mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__); + return 0; + } +} + + /** - * Set the texture wrap modes. + * Update the cached hardware registers based on the current texture wrap modes. * * \param t Texture object whose wrap modes are to be set - * \param swrap Wrap mode for the \a s texture coordinate - * \param twrap Wrap mode for the \a t texture coordinate */ - -static void r300SetTexWrap(r300TexObjPtr t, GLenum swrap, GLenum twrap, - GLenum rwrap) +static void r300UpdateTexWrap(r300TexObjPtr t) { - unsigned long hw_swrap = 0, hw_twrap = 0, hw_qwrap = 0; + struct gl_texture_object *tObj = t->base.tObj; t->filter &= - ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_Q_MASK); + ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK); - switch (swrap) { - case GL_REPEAT: - hw_swrap |= R300_TX_REPEAT; - break; - case GL_CLAMP: - hw_swrap |= R300_TX_CLAMP; - break; - case GL_CLAMP_TO_EDGE: - hw_swrap |= R300_TX_CLAMP_TO_EDGE; - break; - case GL_CLAMP_TO_BORDER: - hw_swrap |= R300_TX_CLAMP_TO_BORDER; - break; - case GL_MIRRORED_REPEAT: - hw_swrap |= R300_TX_REPEAT | R300_TX_MIRRORED; - break; - case GL_MIRROR_CLAMP_EXT: - hw_swrap |= R300_TX_CLAMP | R300_TX_MIRRORED; - break; - case GL_MIRROR_CLAMP_TO_EDGE_EXT: - hw_swrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; - break; - case GL_MIRROR_CLAMP_TO_BORDER_EXT: - hw_swrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED; - break; - default: - _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__); - } + t->filter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT; - switch (twrap) { - case GL_REPEAT: - hw_twrap |= R300_TX_REPEAT; - break; - case GL_CLAMP: - hw_twrap |= R300_TX_CLAMP; - break; - case GL_CLAMP_TO_EDGE: - hw_twrap |= R300_TX_CLAMP_TO_EDGE; - break; - case GL_CLAMP_TO_BORDER: - hw_twrap |= R300_TX_CLAMP_TO_BORDER; - break; - case GL_MIRRORED_REPEAT: - hw_twrap |= R300_TX_REPEAT | R300_TX_MIRRORED; - break; - case GL_MIRROR_CLAMP_EXT: - hw_twrap |= R300_TX_CLAMP | R300_TX_MIRRORED; - break; - case GL_MIRROR_CLAMP_TO_EDGE_EXT: - hw_twrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; - break; - case GL_MIRROR_CLAMP_TO_BORDER_EXT: - hw_twrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED; - break; - default: - _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__); - } + if (tObj->Target != GL_TEXTURE_1D) { + t->filter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT; - switch (rwrap) { - case GL_REPEAT: - hw_qwrap |= R300_TX_REPEAT; - break; - case GL_CLAMP: - hw_qwrap |= R300_TX_CLAMP; - break; - case GL_CLAMP_TO_EDGE: - hw_qwrap |= R300_TX_CLAMP_TO_EDGE; - break; - case GL_CLAMP_TO_BORDER: - hw_qwrap |= R300_TX_CLAMP_TO_BORDER; - break; - case GL_MIRRORED_REPEAT: - hw_qwrap |= R300_TX_REPEAT | R300_TX_MIRRORED; - break; - case GL_MIRROR_CLAMP_EXT: - hw_qwrap |= R300_TX_CLAMP | R300_TX_MIRRORED; - break; - case GL_MIRROR_CLAMP_TO_EDGE_EXT: - hw_qwrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; - break; - case GL_MIRROR_CLAMP_TO_BORDER_EXT: - hw_qwrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED; - break; - default: - _mesa_problem(NULL, "bad R wrap mode in %s", __FUNCTION__); + if (tObj->Target == GL_TEXTURE_3D) + t->filter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT; } - - t->filter |= hw_swrap << R300_TX_WRAP_S_SHIFT; - t->filter |= hw_twrap << R300_TX_WRAP_T_SHIFT; - t->filter |= hw_qwrap << R300_TX_WRAP_Q_SHIFT; } -static void r300SetTexMaxAnisotropy(r300TexObjPtr t, GLfloat max) +static GLuint aniso_filter(GLfloat anisotropy) { - - t->filter &= ~R300_TX_MAX_ANISO_MASK; - - if (max <= 1.0) { - t->filter |= R300_TX_MAX_ANISO_1_TO_1; - } else if (max <= 2.0) { - t->filter |= R300_TX_MAX_ANISO_2_TO_1; - } else if (max <= 4.0) { - t->filter |= R300_TX_MAX_ANISO_4_TO_1; - } else if (max <= 8.0) { - t->filter |= R300_TX_MAX_ANISO_8_TO_1; + if (anisotropy >= 16.0) { + return R300_TX_MAX_ANISO_16_TO_1; + } else if (anisotropy >= 8.0) { + return R300_TX_MAX_ANISO_8_TO_1; + } else if (anisotropy >= 4.0) { + return R300_TX_MAX_ANISO_4_TO_1; + } else if (anisotropy >= 2.0) { + return R300_TX_MAX_ANISO_2_TO_1; } else { - t->filter |= R300_TX_MAX_ANISO_16_TO_1; + return R300_TX_MAX_ANISO_1_TO_1; } } @@ -184,54 +114,47 @@ static void r300SetTexMaxAnisotropy(r300TexObjPtr t, GLfloat max) * \param t Texture whose filter modes are to be set * \param minf Texture minification mode * \param magf Texture magnification mode + * \param anisotropy Maximum anisotropy level */ - -static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf) +static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) { - GLuint anisotropy = (t->filter & R300_TX_MAX_ANISO_MASK); + t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK); + t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY; - t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MAG_FILTER_MASK); + /* Note that EXT_texture_filter_anisotropic is extremely vague about + * how anisotropic filtering interacts with the "normal" filter modes. + * When anisotropic filtering is enabled, we override min and mag + * filter settings completely. This includes driconf's settings. + */ + if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) { + t->filter |= R300_TX_MAG_FILTER_ANISO + | R300_TX_MIN_FILTER_ANISO + | R300_TX_MIN_FILTER_MIP_LINEAR + | aniso_filter(anisotropy); + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy); + return; + } - if (anisotropy == R300_TX_MAX_ANISO_1_TO_1) { - switch (minf) { - case GL_NEAREST: - t->filter |= R300_TX_MIN_FILTER_NEAREST; - break; - case GL_LINEAR: - t->filter |= R300_TX_MIN_FILTER_LINEAR; - break; - case GL_NEAREST_MIPMAP_NEAREST: - t->filter |= R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST; - break; - case GL_NEAREST_MIPMAP_LINEAR: - t->filter |= R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR; - break; - case GL_LINEAR_MIPMAP_NEAREST: - t->filter |= R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST; - break; - case GL_LINEAR_MIPMAP_LINEAR: - t->filter |= R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR; - break; - } - } else { - switch (minf) { - case GL_NEAREST: - t->filter |= R300_TX_MIN_FILTER_ANISO_NEAREST; - break; - case GL_LINEAR: - t->filter |= R300_TX_MIN_FILTER_ANISO_LINEAR; - break; - case GL_NEAREST_MIPMAP_NEAREST: - case GL_LINEAR_MIPMAP_NEAREST: - t->filter |= - R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST; - break; - case GL_NEAREST_MIPMAP_LINEAR: - case GL_LINEAR_MIPMAP_LINEAR: - t->filter |= - R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR; - break; - } + switch (minf) { + case GL_NEAREST: + t->filter |= R300_TX_MIN_FILTER_NEAREST; + break; + case GL_LINEAR: + t->filter |= R300_TX_MIN_FILTER_LINEAR; + break; + case GL_NEAREST_MIPMAP_NEAREST: + t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR; + break; + case GL_LINEAR_MIPMAP_NEAREST: + t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case GL_LINEAR_MIPMAP_LINEAR: + t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR; + break; } /* Note we don't have 3D mipmaps so only use the mag filter setting @@ -249,7 +172,7 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf) static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4]) { - t->pp_border_color = PACK_COLOR_8888(c[0], c[1], c[2], c[3]); + t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); } /** @@ -277,9 +200,8 @@ static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj) make_empty_list(&t->base); - r300SetTexWrap(t, texObj->WrapS, texObj->WrapT, texObj->WrapR); - r300SetTexMaxAnisotropy(t, texObj->MaxAnisotropy); - r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter); + r300UpdateTexWrap(t); + r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); r300SetTexBorderColor(t, texObj->_BorderChan); } @@ -294,27 +216,20 @@ static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat, const GLubyte littleEndian = *((const GLubyte *)&ui); if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) || - (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE - && !littleEndian) || (srcFormat == GL_ABGR_EXT - && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) - || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE - && littleEndian)) { + (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) { return &_mesa_texformat_rgba8888; - } else - if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) - || (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE - && littleEndian) || (srcFormat == GL_ABGR_EXT - && srcType == GL_UNSIGNED_INT_8_8_8_8) - || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE - && !littleEndian)) { + } else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || + (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) { return &_mesa_texformat_rgba8888_rev; - } else if (srcFormat == GL_BGRA && - ((srcType == GL_UNSIGNED_BYTE && !littleEndian) || - srcType == GL_UNSIGNED_INT_8_8_8_8)) { + } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) || + srcType == GL_UNSIGNED_INT_8_8_8_8)) { return &_mesa_texformat_argb8888_rev; - } else if (srcFormat == GL_BGRA && - ((srcType == GL_UNSIGNED_BYTE && littleEndian) || - srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) { + } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) || + srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) { return &_mesa_texformat_argb8888; } else return _dri_texformat_argb8888; @@ -489,6 +404,25 @@ static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx, case GL_RGBA32F_ARB: return &_mesa_texformat_rgba_float32; + case GL_DEPTH_COMPONENT: + case GL_DEPTH_COMPONENT16: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH_COMPONENT32: +#if 0 + switch (type) { + case GL_UNSIGNED_BYTE: + case GL_UNSIGNED_SHORT: + return &_mesa_texformat_z16; + case GL_UNSIGNED_INT: + return &_mesa_texformat_z32; + case GL_UNSIGNED_INT_24_8_EXT: + default: + return &_mesa_texformat_z24_s8; + } +#else + return &_mesa_texformat_z16; +#endif + default: _mesa_problem(ctx, "unexpected internalFormat 0x%x in r300ChooseTextureFormat", @@ -563,34 +497,31 @@ r300ValidateClientStorage(GLcontext * ctx, GLenum target, return 0; } - { - GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, - format, type); + GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, + format, type); - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: srcRowStride %d/%x\n", - __FUNCTION__, srcRowStride, srcRowStride); + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: srcRowStride %d/%x\n", + __FUNCTION__, srcRowStride, srcRowStride); - /* Could check this later in upload, pitch restrictions could be - * relaxed, but would need to store the image pitch somewhere, - * as packing details might change before image is uploaded: - */ - if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride) - || (srcRowStride & 63)) - return 0; + /* Could check this later in upload, pitch restrictions could be + * relaxed, but would need to store the image pitch somewhere, + * as packing details might change before image is uploaded: + */ + if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride) + || (srcRowStride & 63)) + return 0; - /* Have validated that _mesa_transfer_teximage would be a straight - * memcpy at this point. NOTE: future calls to TexSubImage will - * overwrite the client data. This is explicitly mentioned in the - * extension spec. - */ - texImage->Data = (void *)pixels; - texImage->IsClientData = GL_TRUE; - texImage->RowStride = - srcRowStride / texImage->TexFormat->TexelBytes; + /* Have validated that _mesa_transfer_teximage would be a straight + * memcpy at this point. NOTE: future calls to TexSubImage will + * overwrite the client data. This is explicitly mentioned in the + * extension spec. + */ + texImage->Data = (void *)pixels; + texImage->IsClientData = GL_TRUE; + texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes; - return 1; - } + return 1; } static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level, @@ -967,60 +898,6 @@ r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level, t->dirty_images[0] |= (1 << level); } -static void r300TexEnv(GLcontext * ctx, GLenum target, - GLenum pname, const GLfloat * param) -{ - if (RADEON_DEBUG & DEBUG_STATE) { - fprintf(stderr, "%s( %s )\n", - __FUNCTION__, _mesa_lookup_enum_by_nr(pname)); - } - - /* This is incorrect: Need to maintain this data for each of - * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch - * between them according to _ReallyEnabled. - */ - switch (pname) { - case GL_TEXTURE_LOD_BIAS_EXT:{ -#if 0 /* Needs to be relocated in order to make sure we got the right tmu */ - GLfloat bias, min; - GLuint b; - - /* The R300's LOD bias is a signed 2's complement value with a - * range of -16.0 <= bias < 16.0. - * - * NOTE: Add a small bias to the bias for conform mipsel.c test. - */ - bias = *param + .01; - min = - driQueryOptionb(&rmesa->radeon.optionCache, - "no_neg_lod_bias") ? 0.0 : -16.0; - bias = CLAMP(bias, min, 16.0); - - /* 0.0 - 16.0 == 0x0 - 0x1000 */ - /* 0.0 - -16.0 == 0x1001 - 0x1fff */ - b = 0x1000 / 16.0 * bias; - b &= R300_LOD_BIAS_MASK; - - if (b != - (rmesa->hw.tex.unknown1. - cmd[R300_TEX_VALUE_0 + - unit] & R300_LOD_BIAS_MASK)) { - R300_STATECHANGE(rmesa, tex.unknown1); - rmesa->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 + - unit] &= - ~R300_LOD_BIAS_MASK; - rmesa->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 + - unit] |= b; - } -#endif - break; - } - - default: - return; - } -} - /** * Changes variables and flags for a state update, which will happen at the * next UpdateTextureState @@ -1041,14 +918,13 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, case GL_TEXTURE_MIN_FILTER: case GL_TEXTURE_MAG_FILTER: case GL_TEXTURE_MAX_ANISOTROPY_EXT: - r300SetTexMaxAnisotropy(t, texObj->MaxAnisotropy); - r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter); + r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); break; case GL_TEXTURE_WRAP_S: case GL_TEXTURE_WRAP_T: case GL_TEXTURE_WRAP_R: - r300SetTexWrap(t, texObj->WrapS, texObj->WrapT, texObj->WrapR); + r300UpdateTexWrap(t); break; case GL_TEXTURE_BORDER_COLOR: @@ -1067,13 +943,24 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, driSwapOutTextureObject((driTextureObject *) t); break; + case GL_DEPTH_TEXTURE_MODE: + if (!texObj->Image[0][texObj->BaseLevel]) + return; + if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat + == GL_DEPTH_COMPONENT) { + r300SetDepthTexMode(texObj); + break; + } else { + /* If the texture isn't a depth texture, changing this + * state won't cause any changes to the hardware. + * Don't force a flush of texture state. + */ + return; + } + default: return; } - - /* Mark this texobj as dirty (one bit per tex unit) - */ - t->dirty_state = TEX_ALL; } static void r300BindTexture(GLcontext * ctx, GLenum target, @@ -1156,7 +1043,6 @@ void r300InitTextureFuncs(struct dd_function_table *functions) functions->DeleteTexture = r300DeleteTexture; functions->IsTextureResident = driIsTextureResident; - functions->TexEnv = r300TexEnv; functions->TexParameter = r300TexParameter; functions->CompressedTexImage2D = r300CompressedTexImage2D; diff --git a/r300/r300_tex.h b/r300/r300_tex.h index f67a8e6..b86d45b 100644 --- a/r300/r300_tex.h +++ b/r300/r300_tex.h @@ -35,6 +35,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef __r300_TEX_H__ #define __r300_TEX_H__ +extern void r300SetDepthTexMode(struct gl_texture_object *tObj); + extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); diff --git a/r300/r300_texmem.c b/r300/r300_texmem.c index e2e8355..69847a4 100644 --- a/r300/r300_texmem.c +++ b/r300/r300_texmem.c @@ -63,29 +63,16 @@ SOFTWARE. */ void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t) { + int i; + if (RADEON_DEBUG & DEBUG_TEXTURE) { fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)t, (void *)t->base.tObj); } - if (rmesa != NULL) { - unsigned i; - - for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) { - if (t == rmesa->state.texture.unit[i].texobj) { - rmesa->state.texture.unit[i].texobj = NULL; - /* This code below is meant to shorten state - pushed to the hardware by not programming - unneeded units. - - This does not appear to be worthwhile on R300 */ -#if 0 - remove_from_list(&rmesa->hw.tex[i]); - make_empty_list(&rmesa->hw.tex[i]); - remove_from_list(&rmesa->hw.cube[i]); - make_empty_list(&rmesa->hw.cube[i]); -#endif - } + for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) { + if (rmesa->state.texture.unit[i].texobj == t) { + rmesa->state.texture.unit[i].texobj = NULL; } } } @@ -362,7 +349,7 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, imageWidth = texImage->Width; imageHeight = texImage->Height; - offset = t->bufAddr + t->base.totalSize / 6 * face; + offset = t->bufAddr; if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { GLint imageX = 0; @@ -518,7 +505,7 @@ int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) t->base.lastLevel); } - if (!t || t->base.totalSize == 0) + if (t->base.totalSize == 0) return 0; if (RADEON_DEBUG & DEBUG_SYNC) { @@ -547,10 +534,6 @@ int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) /* hope it's safe to add that here... */ t->offset |= t->tile_bits; } - - /* Mark this texobj as dirty on all units: - */ - t->dirty_state = TEX_ALL; } /* Let the world know we've used this memory recently. diff --git a/r300/r300_texstate.c b/r300/r300_texstate.c index 8203189..d19832f 100644 --- a/r300/r300_texstate.c +++ b/r300/r300_texstate.c @@ -54,7 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5 \ || ((f) >= MESA_FORMAT_RGBA_FLOAT32 && \ (f) <= MESA_FORMAT_INTENSITY_FLOAT16)) \ - && tx_table_le[f].flag ) + && tx_table[f].flag ) #define _ASSIGN(entry, format) \ [ MESA_FORMAT_ ## entry ] = { format, 0, 1} @@ -70,53 +70,19 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. static const struct tx_table { GLuint format, filter, flag; -} tx_table_be[] = { - /* *INDENT-OFF* */ - _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), - _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), - _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), - _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), - _ASSIGN(RGB888, 0xffffffff), - _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), - _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), - _ASSIGN(ARGB4444, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), - _ASSIGN(ARGB4444_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), - _ASSIGN(ARGB1555, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), - _ASSIGN(ARGB1555_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), - _ASSIGN(AL88, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), - _ASSIGN(AL88_REV, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), - _ASSIGN(RGB332, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2)), - _ASSIGN(A8, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)), - _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)), - _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), - _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), - _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE ), - _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE), - _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)), - _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)), - _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)), - _ASSIGN(RGBA_DXT5, R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5)), - _ASSIGN(RGBA_FLOAT32, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32)), - _ASSIGN(RGBA_FLOAT16, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16)), - _ASSIGN(RGB_FLOAT32, 0xffffffff), - _ASSIGN(RGB_FLOAT16, 0xffffffff), - _ASSIGN(ALPHA_FLOAT32, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32)), - _ASSIGN(ALPHA_FLOAT16, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16)), - _ASSIGN(LUMINANCE_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32)), - _ASSIGN(LUMINANCE_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16)), - _ASSIGN(LUMINANCE_ALPHA_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32)), - _ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)), - _ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)), - _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)), - /* *INDENT-ON* */ -}; - -static const struct tx_table tx_table_le[] = { +} tx_table[] = { /* *INDENT-OFF* */ +#ifdef MESA_LITTLE_ENDIAN _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), +#else + _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), + _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), + _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), + _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), +#endif _ASSIGN(RGB888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)), _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), @@ -131,8 +97,8 @@ static const struct tx_table tx_table_le[] = { _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)), _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), - _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE ), - _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE), + _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE), + _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE), _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)), _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)), _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)), @@ -149,11 +115,186 @@ static const struct tx_table tx_table_le[] = { _ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)), _ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)), _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)), + _ASSIGN(Z16, R300_EASY_TX_FORMAT(X, X, X, X, X16)), + _ASSIGN(Z24_S8, R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8)), + _ASSIGN(Z32, R300_EASY_TX_FORMAT(X, X, X, X, X32)), /* *INDENT-ON* */ }; #undef _ASSIGN +void r300SetDepthTexMode(struct gl_texture_object *tObj) +{ + static const GLuint formats[3][3] = { + { + R300_EASY_TX_FORMAT(X, X, X, ONE, X16), + R300_EASY_TX_FORMAT(X, X, X, X, X16), + R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X16), + }, + { + R300_EASY_TX_FORMAT(X, X, X, ONE, X24_Y8), + R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8), + R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X24_Y8), + }, + { + R300_EASY_TX_FORMAT(X, X, X, ONE, X32), + R300_EASY_TX_FORMAT(X, X, X, X, X32), + R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X32), + }, + }; + const GLuint *format; + r300TexObjPtr t; + + if (!tObj) + return; + + t = (r300TexObjPtr) tObj->DriverData; + + + switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) { + case MESA_FORMAT_Z16: + format = formats[0]; + break; + case MESA_FORMAT_Z24_S8: + format = formats[1]; + break; + case MESA_FORMAT_Z32: + format = formats[2]; + break; + default: + /* Error...which should have already been caught by higher + * levels of Mesa. + */ + ASSERT(0); + return; + } + + switch (tObj->DepthMode) { + case GL_LUMINANCE: + t->format = format[0]; + break; + case GL_INTENSITY: + t->format = format[1]; + break; + case GL_ALPHA: + t->format = format[2]; + break; + default: + /* Error...which should have already been caught by higher + * levels of Mesa. + */ + ASSERT(0); + return; + } +} + + +/** + * Compute sizes and fill in offset and blit information for the given + * image (determined by \p face and \p level). + * + * \param curOffset points to the offset at which the image is to be stored + * and is updated by this function according to the size of the image. + */ +static void compute_tex_image_offset( + struct gl_texture_object *tObj, + GLuint face, + GLint level, + GLint* curOffset) +{ + r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + const struct gl_texture_image* texImage; + GLuint blitWidth = R300_BLIT_WIDTH_BYTES; + GLuint texelBytes; + GLuint size; + + texImage = tObj->Image[0][level + t->base.firstLevel]; + if (!texImage) + return; + + texelBytes = texImage->TexFormat->TexelBytes; + + /* find image size in bytes */ + if (texImage->IsCompressed) { + if ((t->format & R300_TX_FORMAT_DXT1) == + R300_TX_FORMAT_DXT1) { + // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format); + if ((texImage->Width + 3) < 8) /* width one block */ + size = texImage->CompressedSize * 4; + else if ((texImage->Width + 3) < 16) + size = texImage->CompressedSize * 2; + else + size = texImage->CompressedSize; + } else { + /* DXT3/5, 16 bytes per block */ + WARN_ONCE + ("DXT 3/5 suffers from multitexturing problems!\n"); + // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width); + if ((texImage->Width + 3) < 8) + size = texImage->CompressedSize * 2; + else + size = texImage->CompressedSize; + } + } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + size = + ((texImage->Width * texelBytes + + 63) & ~63) * texImage->Height; + blitWidth = 64 / texelBytes; + } else if (t->tile_bits & R300_TXO_MICRO_TILE) { + /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, + though the actual offset may be different (if texture is less than + 32 bytes width) to the untiled case */ + int w = (texImage->Width * texelBytes * 2 + 31) & ~31; + size = + (w * ((texImage->Height + 1) / 2)) * + texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); + } else { + int w = (texImage->Width * texelBytes + 31) & ~31; + size = w * texImage->Height * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); + } + assert(size > 0); + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", + texImage->Width, texImage->Height, + texImage->Depth, + texImage->TexFormat->TexelBytes, + texImage->InternalFormat); + + /* All images are aligned to a 32-byte offset */ + *curOffset = (*curOffset + 0x1f) & ~0x1f; + + if (texelBytes) { + /* fix x and y coords up later together with offset */ + t->image[face][level].x = *curOffset; + t->image[face][level].y = 0; + t->image[face][level].width = + MIN2(size / texelBytes, blitWidth); + t->image[face][level].height = + (size / texelBytes) / t->image[face][level].width; + } else { + t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES; + t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES; + t->image[face][level].width = + MIN2(size, R300_BLIT_WIDTH_BYTES); + t->image[face][level].height = size / t->image[face][level].width; + } + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, + "level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", + level, face, texImage->Width, texImage->Height, + t->image[face][level].x, t->image[face][level].y, + t->image[face][level].width, t->image[face][level].height, + size, *curOffset); + + *curOffset += size; +} + + + /** * This function computes the number of bytes of storage needed for * the given texture object (all mipmap levels, all cube faces). @@ -171,29 +312,22 @@ static void r300SetTexImages(r300ContextPtr rmesa, r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint curOffset, blitWidth; + GLint curOffset; GLint i, texelBytes; GLint numLevels; GLint log2Width, log2Height, log2Depth; /* Set the hardware texture format */ - if (!t->image_override && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) { - if (_mesa_little_endian()) { - t->format = - tx_table_le[baseImage->TexFormat->MesaFormat]. - format; - t->filter |= - tx_table_le[baseImage->TexFormat->MesaFormat]. - filter; + if (!t->image_override + && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) { + if (baseImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) { + r300SetDepthTexMode(tObj); } else { - t->format = - tx_table_be[baseImage->TexFormat->MesaFormat]. - format; - t->filter |= - tx_table_be[baseImage->TexFormat->MesaFormat]. - filter; + t->format = tx_table[baseImage->TexFormat->MesaFormat].format; } + + t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter; } else if (!t->image_override) { _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); @@ -217,8 +351,6 @@ static void r300SetTexImages(r300ContextPtr rmesa, * The idea is that we lay out the mipmap levels within a block of * memory organized as a rectangle of width BLIT_WIDTH_BYTES. */ - curOffset = 0; - blitWidth = R300_BLIT_WIDTH_BYTES; t->tile_bits = 0; /* figure out if this texture is suitable for tiling. */ @@ -248,94 +380,23 @@ static void r300SetTexImages(r300ContextPtr rmesa, } #endif - for (i = 0; i < numLevels; i++) { - const struct gl_texture_image *texImage; - GLuint size; - - texImage = tObj->Image[0][i + t->base.firstLevel]; - if (!texImage) - break; - - /* find image size in bytes */ - if (texImage->IsCompressed) { - if ((t->format & R300_TX_FORMAT_DXT1) == - R300_TX_FORMAT_DXT1) { - // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format); - if ((texImage->Width + 3) < 8) /* width one block */ - size = texImage->CompressedSize * 4; - else if ((texImage->Width + 3) < 16) - size = texImage->CompressedSize * 2; - else - size = texImage->CompressedSize; - } else { - /* DXT3/5, 16 bytes per block */ - WARN_ONCE - ("DXT 3/5 suffers from multitexturing problems!\n"); - // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width); - if ((texImage->Width + 3) < 8) - size = texImage->CompressedSize * 2; - else - size = texImage->CompressedSize; - } - } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - size = - ((texImage->Width * texelBytes + - 63) & ~63) * texImage->Height; - blitWidth = 64 / texelBytes; - } else if (t->tile_bits & R300_TXO_MICRO_TILE) { - /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, - though the actual offset may be different (if texture is less than - 32 bytes width) to the untiled case */ - int w = (texImage->Width * texelBytes * 2 + 31) & ~31; - size = - (w * ((texImage->Height + 1) / 2)) * - texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } else { - int w = (texImage->Width * texelBytes + 31) & ~31; - size = w * texImage->Height * texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } - assert(size > 0); - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", - texImage->Width, texImage->Height, - texImage->Depth, - texImage->TexFormat->TexelBytes, - texImage->InternalFormat); - - /* Align to 32-byte offset. It is faster to do this unconditionally - * (no branch penalty). - */ + curOffset = 0; - curOffset = (curOffset + 0x1f) & ~0x1f; + if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + ASSERT(log2Width == log2Height); + t->format |= R300_TX_FORMAT_CUBIC_MAP; - if (texelBytes) { - /* fix x and y coords up later together with offset */ - t->image[0][i].x = curOffset; - t->image[0][i].y = 0; - t->image[0][i].width = - MIN2(size / texelBytes, blitWidth); - t->image[0][i].height = - (size / texelBytes) / t->image[0][i].width; - } else { - t->image[0][i].x = curOffset % R300_BLIT_WIDTH_BYTES; - t->image[0][i].y = curOffset / R300_BLIT_WIDTH_BYTES; - t->image[0][i].width = - MIN2(size, R300_BLIT_WIDTH_BYTES); - t->image[0][i].height = size / t->image[0][i].width; + for(i = 0; i < numLevels; i++) { + GLuint face; + for(face = 0; face < 6; face++) + compute_tex_image_offset(tObj, face, i, &curOffset); } + } else { + if (tObj->Target == GL_TEXTURE_3D) + t->format |= R300_TX_FORMAT_3D; - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", - i, texImage->Width, texImage->Height, - t->image[0][i].x, t->image[0][i].y, - t->image[0][i].width, t->image[0][i].height, - size, curOffset); - - curOffset += size; + for (i = 0; i < numLevels; i++) + compute_tex_image_offset(tObj, 0, i, &curOffset); } /* Align the total size of texture memory block. @@ -343,43 +404,27 @@ static void r300SetTexImages(r300ContextPtr rmesa, t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; - /* Setup remaining cube face blits, if needed */ - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - GLuint face; - for (face = 1; face < 6; face++) { - for (i = 0; i < numLevels; i++) { - t->image[face][i].x = t->image[0][i].x; - t->image[face][i].y = t->image[0][i].y; - t->image[face][i].width = t->image[0][i].width; - t->image[face][i].height = - t->image[0][i].height; - } - } - t->base.totalSize *= 6; /* total texmem needed */ - } - - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - ASSERT(log2Width == log2Height); - t->format |= R300_TX_FORMAT_CUBIC_MAP; - } - t->size = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << R300_TX_WIDTHMASK_SHIFT) | ((tObj->Image[0][t->base.firstLevel]->Height - 1) << - R300_TX_HEIGHTMASK_SHIFT)) + R300_TX_HEIGHTMASK_SHIFT) + | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) << + R300_TX_DEPTHMASK_SHIFT)) | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT); + t->pitch = 0; + /* Only need to round to nearest 32 for textures, but the blitter * requires 64-byte aligned pitches, and we may/may not need the * blitter. NPOT only! */ if (baseImage->IsCompressed) { - t->pitch = + t->pitch |= (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - unsigned int align = blitWidth - 1; - t->pitch = ((tObj->Image[0][t->base.firstLevel]->Width * + unsigned int align = (64 / texelBytes) - 1; + t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); t->size |= R300_TX_SIZE_TXPITCH_EN; if (!t->image_override) @@ -387,14 +432,17 @@ static void r300SetTexImages(r300ContextPtr rmesa, (((tObj->Image[0][t->base.firstLevel]->Width) + align) & ~align) - 1; } else { - t->pitch = + t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); } - t->dirty_state = TEX_ALL; - - /* FYI: r300UploadTexImages( rmesa, t ) used to be called here */ + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + if (tObj->Image[0][t->base.firstLevel]->Width > 2048) + t->pitch_reg |= R500_TXWIDTH_BIT11; + if (tObj->Image[0][t->base.firstLevel]->Height > 2048) + t->pitch_reg |= R500_TXHEIGHT_BIT11; + } } /* ================================================================ @@ -526,27 +574,25 @@ static GLboolean r300UpdateTexture(GLcontext * ctx, int unit) */ rmesa->state.texture.unit[unit].texobj->base.bound &= - ~(1UL << unit); + ~(1 << unit); } rmesa->state.texture.unit[unit].texobj = t; - t->base.bound |= (1UL << unit); - t->dirty_state |= 1 << unit; + t->base.bound |= (1 << unit); driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */ } return !t->border_fallback; } -void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, +void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch) { - r300ContextPtr rmesa = - (r300ContextPtr)((__DRIcontextPrivate*)pDRICtx->private)->driverPrivate; + r300ContextPtr rmesa = pDRICtx->driverPrivate; struct gl_texture_object *tObj = - _mesa_lookup_texture(rmesa->radeon.glCtx, texname); + _mesa_lookup_texture(rmesa->radeon.glCtx, texname); r300TexObjPtr t; - int idx; + uint32_t pitch_val; if (!tObj) return; @@ -559,28 +605,30 @@ void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, return; t->offset = offset; - t->pitch_reg = pitch; + t->pitch_reg &= (1 << 13) -1; + pitch_val = pitch; switch (depth) { case 32: - idx = 2; - t->pitch_reg /= 4; + t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + t->filter |= tx_table[2].filter; + pitch_val /= 4; break; case 24: default: - idx = 4; - t->pitch_reg /= 4; + t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + t->filter |= tx_table[4].filter; + pitch_val /= 4; break; case 16: - idx = 5; - t->pitch_reg /= 2; + t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + t->filter |= tx_table[5].filter; + pitch_val /= 2; break; } + pitch_val--; - t->pitch_reg--; - - t->format = tx_table_le[idx].format; - t->filter |= tx_table_le[idx].filter; + t->pitch_reg |= pitch_val; } static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit) diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c index 1d90ade..c4e325e 100644 --- a/r300/r300_vertprog.c +++ b/r300/r300_vertprog.c @@ -1,6 +1,7 @@ /************************************************************************** -Copyright (C) 2005 Aapo Tahkola. +Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org> +Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com> All Rights Reserved. @@ -25,16 +26,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ -/** - * \file - * - * \author Aapo Tahkola <aet@rasterburn.org> - */ +/* Radeon R5xx Acceleration, Revision 1.2 */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" -#include "program.h" +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" @@ -42,64 +39,35 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" -#if SWIZZLE_X != VSF_IN_COMPONENT_X || \ - SWIZZLE_Y != VSF_IN_COMPONENT_Y || \ - SWIZZLE_Z != VSF_IN_COMPONENT_Z || \ - SWIZZLE_W != VSF_IN_COMPONENT_W || \ - SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \ - SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \ - WRITEMASK_X != VSF_FLAG_X || \ - WRITEMASK_Y != VSF_FLAG_Y || \ - WRITEMASK_Z != VSF_FLAG_Z || \ - WRITEMASK_W != VSF_FLAG_W -#error Cannot change these! -#endif +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ + ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ + t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ + (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ + t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(src[x].File), \ + VSF_FLAG_NONE) | (src[x].RelAddr << 4)) -#define SCALAR_FLAG (1<<31) -#define FLAG_MASK (1<<31) -#define OP_MASK (0xf) /* we are unlikely to have more than 15 */ -#define OPN(operator, ip) {#operator, OPCODE_##operator, ip} - -static struct { - char *name; - int opcode; - unsigned long ip; /* number of input operands and flags */ -} op_names[] = { - /* *INDENT-OFF* */ - OPN(ABS, 1), - OPN(ADD, 2), - OPN(ARL, 1 | SCALAR_FLAG), - OPN(DP3, 2), - OPN(DP4, 2), - OPN(DPH, 2), - OPN(DST, 2), - OPN(EX2, 1 | SCALAR_FLAG), - OPN(EXP, 1 | SCALAR_FLAG), - OPN(FLR, 1), - OPN(FRC, 1), - OPN(LG2, 1 | SCALAR_FLAG), - OPN(LIT, 1), - OPN(LOG, 1 | SCALAR_FLAG), - OPN(MAD, 3), - OPN(MAX, 2), - OPN(MIN, 2), - OPN(MOV, 1), - OPN(MUL, 2), - OPN(POW, 2 | SCALAR_FLAG), - OPN(RCP, 1 | SCALAR_FLAG), - OPN(RSQ, 1 | SCALAR_FLAG), - OPN(SGE, 2), - OPN(SLT, 2), - OPN(SUB, 2), - OPN(SWZ, 1), - OPN(XPD, 2), - OPN(RCC, 0), //extra - OPN(PRINT, 0), - OPN(END, 0) - /* *INDENT-ON* */ -}; - -#undef OPN +#define FREE_TEMPS() \ + do { \ + int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ + if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ + WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ + vp->native = GL_FALSE; \ + } \ + u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ + } while (0) int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program_cont *vp, float *dst) @@ -133,7 +101,6 @@ int r300VertexProgUpdateParams(GLcontext * ctx, paramList = mesa_vp->Base.Parameters; for (pi = 0; pi < paramList->NumParameters; pi++) { switch (paramList->Parameters[pi].Type) { - case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); @@ -143,7 +110,6 @@ int r300VertexProgUpdateParams(GLcontext * ctx, *dst++ = paramList->ParameterValues[pi][2]; *dst++ = paramList->ParameterValues[pi][3]; break; - default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); @@ -165,11 +131,11 @@ static unsigned long t_dst_class(enum register_file file) switch (file) { case PROGRAM_TEMPORARY: - return VSF_OUT_CLASS_TMP; + return PVS_DST_REG_TEMPORARY; case PROGRAM_OUTPUT: - return VSF_OUT_CLASS_RESULT; + return PVS_DST_REG_OUT; case PROGRAM_ADDRESS: - return VSF_OUT_CLASS_ADDR; + return PVS_DST_REG_A0; /* case PROGRAM_INPUT: case PROGRAM_LOCAL_PARAM: @@ -197,19 +163,17 @@ static unsigned long t_dst_index(struct r300_vertex_program *vp, static unsigned long t_src_class(enum register_file file) { - switch (file) { case PROGRAM_TEMPORARY: - return VSF_IN_CLASS_TMP; - + return PVS_SRC_REG_TEMPORARY; case PROGRAM_INPUT: - return VSF_IN_CLASS_ATTR; - + return PVS_SRC_REG_INPUT; case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: case PROGRAM_STATE_VAR: - return VSF_IN_CLASS_PARAM; + return PVS_SRC_REG_CONSTANT; /* case PROGRAM_OUTPUT: case PROGRAM_WRITE_ONLY: @@ -222,7 +186,7 @@ static unsigned long t_src_class(enum register_file file) } } -static __inline unsigned long t_swizzle(GLubyte swizzle) +static INLINE unsigned long t_swizzle(GLubyte swizzle) { /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ return swizzle; @@ -276,13 +240,15 @@ static unsigned long t_src_index(struct r300_vertex_program *vp, } } +/* these two functions should probably be merged... */ + static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src) { /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ - return MAKE_VSF_SOURCE(t_src_index(vp, src), + return PVS_SRC_OPERAND(t_src_index(vp, src), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 1)), t_swizzle(GET_SWZ(src->Swizzle, 2)), @@ -294,8 +260,10 @@ static unsigned long t_src(struct r300_vertex_program *vp, static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src) { - - return MAKE_VSF_SOURCE(t_src_index(vp, src), + /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), @@ -306,128 +274,687 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp, (src->RelAddr << 4); } -static unsigned long t_opcode(enum prog_opcode opcode) +static GLboolean valid_dst(struct r300_vertex_program *vp, + struct prog_dst_register *dst) { + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } - switch (opcode) { - /* *INDENT-OFF* */ - case OPCODE_ARL: return R300_VPI_OUT_OP_ARL; - case OPCODE_DST: return R300_VPI_OUT_OP_DST; - case OPCODE_EX2: return R300_VPI_OUT_OP_EX2; - case OPCODE_EXP: return R300_VPI_OUT_OP_EXP; - case OPCODE_FRC: return R300_VPI_OUT_OP_FRC; - case OPCODE_LG2: return R300_VPI_OUT_OP_LG2; - case OPCODE_LOG: return R300_VPI_OUT_OP_LOG; - case OPCODE_MAX: return R300_VPI_OUT_OP_MAX; - case OPCODE_MIN: return R300_VPI_OUT_OP_MIN; - case OPCODE_MUL: return R300_VPI_OUT_OP_MUL; - case OPCODE_RCP: return R300_VPI_OUT_OP_RCP; - case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ; - case OPCODE_SGE: return R300_VPI_OUT_OP_SGE; - case OPCODE_SLT: return R300_VPI_OUT_OP_SLT; - case OPCODE_DP4: return R300_VPI_OUT_OP_DOT; - /* *INDENT-ON* */ + return GL_TRUE; +} - default: - fprintf(stderr, "%s: Should not be called with opcode %d!", - __FUNCTION__, opcode); - } - _mesa_exit(-1); - return 0; +static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + + inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[3] = 0; + + return inst; } -static unsigned long op_operands(enum prog_opcode opcode) +static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) { - int i; + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} - /* Can we trust mesas opcodes to be in order ? */ - for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++) - if (op_names[i].opcode == opcode) - return op_names[i].ip; +static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} - fprintf(stderr, "op %d not found in op_names\n", opcode); - _mesa_exit(-1); - return 0; +static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = + PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, + t_src_class(src[1].File), + src[1]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; } -static GLboolean valid_dst(struct r300_vertex_program *vp, - struct prog_dst_register *dst) +static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) { - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); - } + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} - return GL_TRUE; +static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + PVS_SRC_SELECT_FORCE_1, + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; } -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ - t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ - (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ - t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \ - -#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) - -#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) - -#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) - -#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) - -#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) - -#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) +static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} -/* DP4 version seems to trigger some hw peculiarity */ -//#define PREFER_DP4 +static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} -#define FREE_TEMPS() \ - do { \ - if(u_temp_i < vp->num_temporaries) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \ - vp->native = GL_FALSE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) +static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} -static void r300TranslateVertexShader(struct r300_vertex_program *vp, - struct prog_instruction *vpi) +static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3], + int *u_temp_i) { - int i, cur_reg = 0; - VERTEX_SHADER_INSTRUCTION *o_inst; - unsigned long operands; - int are_srcs_scalar; - unsigned long hw_op; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, + GL_FALSE, + GL_FALSE, + *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + PVS_DST_REG_TEMPORARY); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(*u_temp_i, + PVS_SRC_SELECT_X, + PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, + PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, + /* Not 100% sure about this */ + (!src[0]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE + /*VSF_FLAG_ALL */ ); + inst[3] = __CONST(0, SWIZZLE_ZERO); + (*u_temp_i)--; + + return inst; +} - vp->pos_end = 0; /* Not supported yet */ - vp->program.length = 0; - /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ +static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} + + inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + + return inst; +} + +static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + GL_FALSE, + GL_TRUE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = t_src(vp, &src[2]); + + return inst; +} + +static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &src[1]); + + return inst; +} + +static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + +#if 0 + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + inst[3] = 0; +#else + inst[0] = + PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ONE); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); +#endif + + return inst; +} + +static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3], + int *u_temp_i) +{ + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + */ + + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + PVS_DST_REG_TEMPORARY); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W + t_src_class(src[1].File), + src[1]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + inst[3] = __CONST(1, SWIZZLE_ZERO); + inst += 4; + + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[3] = + PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, + PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE); + + (*u_temp_i)--; + + return inst; +} + +static void t_inputs_outputs(struct r300_vertex_program *vp) +{ + int i; + int cur_reg = 0; for (i = 0; i < VERT_ATTRIB_MAX; i++) vp->inputs[i] = -1; @@ -437,39 +964,71 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); - /* Assign outputs */ - if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) { vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) { vp->outputs[VERT_RESULT_COL0] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) - vp->outputs[VERT_RESULT_COL1] = cur_reg++; - -#if 0 /* Not supported yet */ - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) { + vp->outputs[VERT_RESULT_COL1] = + vp->outputs[VERT_RESULT_COL0] + 1; + cur_reg = vp->outputs[VERT_RESULT_COL1] + 1; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { + vp->outputs[VERT_RESULT_BFC0] = + vp->outputs[VERT_RESULT_COL0] + 2; + cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + vp->outputs[VERT_RESULT_BFC1] = + vp->outputs[VERT_RESULT_COL0] + 3; + cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1; + } +#if 0 + if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + } #endif - for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) - if (vp->key.OutputsWritten & (1 << i)) + for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { + if (vp->key.OutputsWritten & (1 << i)) { vp->outputs[i] = cur_reg++; + } + } +} + +static void r300TranslateVertexShader(struct r300_vertex_program *vp, + struct prog_instruction *vpi) +{ + int i; + GLuint *inst; + unsigned long num_operands; + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; + struct prog_src_register src[3]; + vp->pos_end = 0; /* Not supported yet */ + vp->program.length = 0; + /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ vp->translated = GL_TRUE; vp->native = GL_TRUE; - o_inst = vp->program.body.i; - for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) { + t_inputs_outputs(vp); + + for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END; + vpi++, inst += 4) { + FREE_TEMPS(); if (!valid_dst(vp, &vpi->DstReg)) { @@ -478,61 +1037,64 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, vpi->DstReg.Index = u_temp_i; } - operands = op_operands(vpi->Opcode); - are_srcs_scalar = operands & SCALAR_FLAG; - operands &= OP_MASK; + num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - for (i = 0; i < operands; i++) + /* copy the sources (src) from mesa into a local variable... is this needed? */ + for (i = 0; i < num_operands; i++) { src[i] = vpi->SrcReg[i]; + } - if (operands == 3) { /* TODO: scalars */ + if (num_operands == 3) { /* TODO: scalars */ if (CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2])) { - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, - VSF_FLAG_ALL, - VSF_OUT_CLASS_TMP); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), - SWIZZLE_X, SWIZZLE_Y, - SWIZZLE_Z, SWIZZLE_W, + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + u_temp_i, + VSF_FLAG_ALL, + PVS_DST_REG_TEMPORARY); + inst[1] = + PVS_SRC_OPERAND(t_src_index(vp, &src[2]), + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2]. RelAddr << 4); - - o_inst->src[1] = ZERO_SRC_2; - o_inst->src[2] = ZERO_SRC_2; - o_inst++; + inst[2] = __CONST(2, SWIZZLE_ZERO); + inst[3] = __CONST(2, SWIZZLE_ZERO); + inst += 4; src[2].File = PROGRAM_TEMPORARY; src[2].Index = u_temp_i; src[2].RelAddr = 0; u_temp_i--; } - } - if (operands >= 2) { + if (num_operands >= 2) { if (CMP_SRCS(src[1], src[0])) { - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, - VSF_FLAG_ALL, - VSF_OUT_CLASS_TMP); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - SWIZZLE_X, SWIZZLE_Y, - SWIZZLE_Z, SWIZZLE_W, + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + u_temp_i, + VSF_FLAG_ALL, + PVS_DST_REG_TEMPORARY); + inst[1] = + PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0]. RelAddr << 4); - - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - o_inst++; + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; src[0].File = PROGRAM_TEMPORARY; src[0].Index = u_temp_i; @@ -541,517 +1103,118 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, } } - /* These ops need special handling. */ switch (vpi->Opcode) { - case OPCODE_POW: - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_POW, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = t_src_scalar(vp, &src[1]); - goto next; - - case OPCODE_MOV: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - case OPCODE_SWZ: -#if 1 - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; -#else - hw_op = - (src[0].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ONE_SRC_0; - o_inst->src[2] = ZERO_SRC_0; -#endif - - goto next; - + case OPCODE_ABS: + inst = r300TranslateOpcodeABS(vp, vpi, inst, src); + break; case OPCODE_ADD: -#if 1 - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = ONE_SRC_0; - o_inst->src[1] = t_src(vp, &src[0]); - o_inst->src[2] = t_src(vp, &src[1]); -#else - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - -#endif - goto next; - - case OPCODE_MAD: - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == PROGRAM_TEMPORARY && - src[2].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = t_src(vp, &src[2]); - goto next; - - case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */ - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_DP3: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); - - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_SUB: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W -#if 1 - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ONE_SRC_0; - o_inst->src[2] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); -#else - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); - o_inst->src[2] = 0; -#endif - goto next; - - case OPCODE_ABS: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[2] = 0; - goto next; - + inst = r300TranslateOpcodeADD(vp, vpi, inst, src); + break; + case OPCODE_ARL: + inst = r300TranslateOpcodeARL(vp, vpi, inst, src); + break; + case OPCODE_DP3: + inst = r300TranslateOpcodeDP3(vp, vpi, inst, src); + break; + case OPCODE_DP4: + inst = r300TranslateOpcodeDP4(vp, vpi, inst, src); + break; + case OPCODE_DPH: + inst = r300TranslateOpcodeDPH(vp, vpi, inst, src); + break; + case OPCODE_DST: + inst = r300TranslateOpcodeDST(vp, vpi, inst, src); + break; + case OPCODE_EX2: + inst = r300TranslateOpcodeEX2(vp, vpi, inst, src); + break; + case OPCODE_EXP: + inst = r300TranslateOpcodeEXP(vp, vpi, inst, src); + break; case OPCODE_FLR: - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i, - t_dst_mask(vpi->DstReg. - WriteMask), - VSF_OUT_CLASS_TMP); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - o_inst++; - - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = MAKE_VSF_SOURCE(u_temp_i, - VSF_IN_COMPONENT_X, - VSF_IN_COMPONENT_Y, - VSF_IN_COMPONENT_Z, - VSF_IN_COMPONENT_W, - VSF_IN_CLASS_TMP, - /* Not 100% sure about this */ - (!src[0]. - NegateBase) ? - VSF_FLAG_ALL : - VSF_FLAG_NONE - /*VSF_FLAG_ALL */ ); - - o_inst->src[2] = ZERO_SRC_0; - u_temp_i--; - goto next; - - case OPCODE_LG2: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - goto next; - - case OPCODE_LIT: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - goto next; - - case OPCODE_DPH: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - VSF_IN_COMPONENT_ONE, - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_XPD: - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - NOTE: might need MAD_2 - */ - - o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i, - t_dst_mask(vpi->DstReg. - WriteMask), - VSF_OUT_CLASS_TMP); - - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1]. - File), - src[1]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - - o_inst->src[2] = ZERO_SRC_1; - o_inst++; - u_temp_i--; - - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1]. - File), - (!src[1]. - NegateBase) ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - o_inst->src[2] = MAKE_VSF_SOURCE(u_temp_i + 1, - VSF_IN_COMPONENT_X, - VSF_IN_COMPONENT_Y, - VSF_IN_COMPONENT_Z, - VSF_IN_COMPONENT_W, - VSF_IN_CLASS_TMP, - VSF_FLAG_NONE); - - goto next; - - case OPCODE_RCC: - fprintf(stderr, "Dont know how to handle op %d yet\n", - vpi->Opcode); - _mesa_exit(-1); + inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */ + &u_temp_i); + break; + case OPCODE_FRC: + inst = r300TranslateOpcodeFRC(vp, vpi, inst, src); + break; + case OPCODE_LG2: + inst = r300TranslateOpcodeLG2(vp, vpi, inst, src); + break; + case OPCODE_LIT: + inst = r300TranslateOpcodeLIT(vp, vpi, inst, src); + break; + case OPCODE_LOG: + inst = r300TranslateOpcodeLOG(vp, vpi, inst, src); + break; + case OPCODE_MAD: + inst = r300TranslateOpcodeMAD(vp, vpi, inst, src); + break; + case OPCODE_MAX: + inst = r300TranslateOpcodeMAX(vp, vpi, inst, src); + break; + case OPCODE_MIN: + inst = r300TranslateOpcodeMIN(vp, vpi, inst, src); + break; + case OPCODE_MOV: + inst = r300TranslateOpcodeMOV(vp, vpi, inst, src); + break; + case OPCODE_MUL: + inst = r300TranslateOpcodeMUL(vp, vpi, inst, src); + break; + case OPCODE_POW: + inst = r300TranslateOpcodePOW(vp, vpi, inst, src); break; - case OPCODE_END: + case OPCODE_RCP: + inst = r300TranslateOpcodeRCP(vp, vpi, inst, src); + break; + case OPCODE_RSQ: + inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src); + break; + case OPCODE_SGE: + inst = r300TranslateOpcodeSGE(vp, vpi, inst, src); + break; + case OPCODE_SLT: + inst = r300TranslateOpcodeSLT(vp, vpi, inst, src); + break; + case OPCODE_SUB: + inst = r300TranslateOpcodeSUB(vp, vpi, inst, src); + break; + case OPCODE_SWZ: + inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src); + break; + case OPCODE_XPD: + inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */ + &u_temp_i); break; default: + assert(0); break; } + } - o_inst->op = - MAKE_VSF_OP(t_opcode(vpi->Opcode), - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - if (are_srcs_scalar) { - switch (operands) { - case 1: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - break; - - case 2: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = t_src_scalar(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - break; - - case 3: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = t_src_scalar(vp, &src[1]); - o_inst->src[2] = t_src_scalar(vp, &src[2]); - break; - - default: - fprintf(stderr, - "scalars and op RCC not handled yet"); - _mesa_exit(-1); - break; - } - } else { - switch (operands) { - case 1: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - break; - - case 2: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - break; - - case 3: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = t_src(vp, &src[2]); - break; - - default: - fprintf(stderr, - "scalars and op RCC not handled yet"); - _mesa_exit(-1); - break; - } + /* Some outputs may be artificially added, to match the inputs + of the fragment program. Blank the outputs here. */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + if (vp->key.OutputsAdded & (1 << i)) { + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + vp->outputs[i], + VSF_FLAG_ALL, + PVS_DST_REG_OUT); + inst[1] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; } - next:; } - /* Will most likely segfault before we get here... fix later. */ - if (o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH / 4) { + vp->program.length = (inst - vp->program.body.i); + if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) { vp->program.length = 0; vp->native = GL_FALSE; - return; } - vp->program.length = (o_inst - vp->program.body.i) * 4; #if 0 fprintf(stderr, "hw program:\n"); for (i = 0; i < vp->program.length; i++) @@ -1059,6 +1222,9 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, #endif } +/* DP4 version seems to trigger some hw peculiarity */ +//#define PREFER_DP4 + static void position_invariant(struct gl_program *prog) { struct prog_instruction *vpi; @@ -1145,8 +1311,8 @@ static void position_invariant(struct gl_program *prog) assert(vpi->Opcode == OPCODE_END); } -static void insert_wpos(struct r300_vertex_program *vp, - struct gl_program *prog, GLuint temp_index) +static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, + GLuint temp_index) { struct prog_instruction *vpi; struct prog_instruction *vpi_insert; @@ -1206,8 +1372,8 @@ static void pos_as_texcoord(struct r300_vertex_program *vp, prog->NumTemporaries++; for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && - vpi->DstReg.Index == VERT_RESULT_HPOS) { + if (vpi->DstReg.File == PROGRAM_OUTPUT + && vpi->DstReg.Index == VERT_RESULT_HPOS) { vpi->DstReg.File = PROGRAM_TEMPORARY; vpi->DstReg.Index = tempregi; } @@ -1223,25 +1389,32 @@ static struct r300_vertex_program *build_program(struct r300_vertex_program_key vp = _mesa_calloc(sizeof(*vp)); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - vp->wpos_idx = wpos_idx; if (mesa_vp->IsPositionInvariant) { position_invariant(&mesa_vp->Base); } - if (wpos_idx > -1) + if (wpos_idx > -1) { pos_as_texcoord(vp, &mesa_vp->Base); + } assert(mesa_vp->Base.NumInstructions); - vp->num_temporaries = mesa_vp->Base.NumTemporaries; - r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); return vp; } +static void add_outputs(struct r300_vertex_program_key *key, GLint vert) +{ + if (key->OutputsWritten & (1 << vert)) + return; + + key->OutputsWritten |= 1 << vert; + key->OutputsAdded |= 1 << vert; +} + void r300SelectVertexShader(r300ContextPtr r300) { GLcontext *ctx = ctx = r300->radeon.glCtx; @@ -1253,10 +1426,10 @@ void r300SelectVertexShader(r300ContextPtr r300) GLint wpos_idx; vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; + wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten; InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS; - wpos_idx = -1; if (InputsRead & FRAG_BIT_WPOS) { for (i = 0; i < ctx->Const.MaxTextureUnits; i++) @@ -1268,31 +1441,34 @@ void r300SelectVertexShader(r300ContextPtr r300) _mesa_exit(-1); } - InputsRead |= (FRAG_BIT_TEX0 << i); + wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); wpos_idx = i; } - if (InputsRead & FRAG_BIT_COL0) - wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0; + add_outputs(&wanted_key, VERT_RESULT_HPOS); - if ((InputsRead & FRAG_BIT_COL1) /*|| - (InputsRead & FRAG_BIT_FOGC) */ ) - wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1; + if (InputsRead & FRAG_BIT_COL0) { + add_outputs(&wanted_key, VERT_RESULT_COL0); + } - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (InputsRead & (FRAG_BIT_TEX0 << i)) - wanted_key.OutputsWritten |= - 1 << (VERT_RESULT_TEX0 + i); + if (InputsRead & FRAG_BIT_COL1) { + add_outputs(&wanted_key, VERT_RESULT_COL1); + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + add_outputs(&wanted_key, VERT_RESULT_TEX0 + i); + } + } - wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; if (vpc->mesa_program.IsPositionInvariant) { /* we wan't position don't we ? */ wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); } for (vp = vpc->progs; vp; vp = vp->next) - if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == - 0) { + if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) + == 0) { r300->selected_vp = vp; return; } diff --git a/r300/r300_vertprog.h b/r300/r300_vertprog.h index 252d5a9..2f35f02 100644 --- a/r300/r300_vertprog.h +++ b/r300/r300_vertprog.h @@ -3,10 +3,24 @@ #include "r300_reg.h" -typedef struct { - GLuint op; - GLuint src[3]; -} VERTEX_SHADER_INSTRUCTION; +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + +#if 1 #define VSF_FLAG_X 1 #define VSF_FLAG_Y 2 @@ -16,74 +30,6 @@ typedef struct { #define VSF_FLAG_ALL 0xf #define VSF_FLAG_NONE 0 -#define VSF_OUT_CLASS_TMP 0 -#define VSF_OUT_CLASS_ADDR 1 -#define VSF_OUT_CLASS_RESULT 2 - -/* first DWORD of an instruction */ - -/* possible operations: - DOT, MUL, ADD, MAD, FRC, MAX, MIN, SGE, SLT, EXP, LOG, LIT, POW, RCP, RSQ, EX2, - LG2, MAD_2 */ - -#define MAKE_VSF_OP(op, out_reg_index, out_reg_fields, class) \ - ((op) \ - | ((out_reg_index) << R300_VPI_OUT_REG_INDEX_SHIFT) \ - | ((out_reg_fields) << 20) \ - | ( (class) << 8 ) ) - -#define EASY_VSF_OP(op, out_reg_index, out_reg_fields, class) \ - MAKE_VSF_OP(R300_VPI_OUT_OP_##op, out_reg_index, VSF_FLAG_##out_reg_fields, VSF_OUT_CLASS_##class) \ - -/* according to Nikolai, the subsequent 3 DWORDs are sources, use same define for each */ - -#define VSF_IN_CLASS_TMP 0 -#define VSF_IN_CLASS_ATTR 1 -#define VSF_IN_CLASS_PARAM 2 -#define VSF_IN_CLASS_NONE 9 - -#define VSF_IN_COMPONENT_X 0 -#define VSF_IN_COMPONENT_Y 1 -#define VSF_IN_COMPONENT_Z 2 -#define VSF_IN_COMPONENT_W 3 -#define VSF_IN_COMPONENT_ZERO 4 -#define VSF_IN_COMPONENT_ONE 5 - -#define MAKE_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \ - ( ((in_reg_index)<<R300_VPI_IN_REG_INDEX_SHIFT) \ - | ((comp_x)<<R300_VPI_IN_X_SHIFT) \ - | ((comp_y)<<R300_VPI_IN_Y_SHIFT) \ - | ((comp_z)<<R300_VPI_IN_Z_SHIFT) \ - | ((comp_w)<<R300_VPI_IN_W_SHIFT) \ - | ((negate)<<25) | ((class))) - -#define EASY_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \ - MAKE_VSF_SOURCE(in_reg_index, \ - VSF_IN_COMPONENT_##comp_x, \ - VSF_IN_COMPONENT_##comp_y, \ - VSF_IN_COMPONENT_##comp_z, \ - VSF_IN_COMPONENT_##comp_w, \ - VSF_IN_CLASS_##class, VSF_FLAG_##negate) - -/* special sources: */ - -/* (1.0,1.0,1.0,1.0) vector (ATTR, plain ) */ -#define VSF_ATTR_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, ATTR, NONE) -#define VSF_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, NONE, NONE) - -/* contents of unmodified register */ -#define VSF_REG(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, ATTR, NONE) - -/* contents of unmodified parameter */ -#define VSF_PARAM(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, PARAM, NONE) - -/* contents of unmodified temporary register */ -#define VSF_TMP(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, TMP, NONE) - -/* components of ATTR register */ -#define VSF_ATTR_X(reg) EASY_VSF_SOURCE(reg, X, X, X, X, ATTR, NONE) -#define VSF_ATTR_Y(reg) EASY_VSF_SOURCE(reg, Y, Y, Y, Y, ATTR, NONE) -#define VSF_ATTR_Z(reg) EASY_VSF_SOURCE(reg, Z, Z, Z, Z, ATTR, NONE) -#define VSF_ATTR_W(reg) EASY_VSF_SOURCE(reg, W, W, W, W, ATTR, NONE) +#endif #endif diff --git a/r300/r500_fragprog.c b/r300/r500_fragprog.c new file mode 100644 index 0000000..75dae86 --- /dev/null +++ b/r300/r500_fragprog.c @@ -0,0 +1,700 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r500_fragprog.h" + +#include "radeon_nqssadce.h" +#include "radeon_program_alu.h" + + +static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) +{ + gl_state_index fail_value_tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 + }; + struct prog_src_register reg = { 0, }; + + fail_value_tokens[2] = tmu; + reg.File = PROGRAM_STATE_VAR; + reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + * + */ +static GLboolean transform_TEX( + struct radeon_transform_context *t, + struct prog_instruction* orig_inst, void* data) +{ + struct r500_fragment_program_compiler *compiler = + (struct r500_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = inst.DstReg; + if (comparefunc == GL_ALWAYS) { + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); + } + return GL_TRUE; + } + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = radeonFindFreeTemporary(t); + inst.DstReg.WriteMask = WRITEMASK_XYZW; + } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) { + int tempreg = radeonFindFreeTemporary(t); + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; + } + + tgt = radeonAppendInstructions(t->Program, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; + int rcptemp = radeonFindFreeTemporary(t); + int pass, fail; + + tgt = radeonAppendInstructions(t->Program, 3); + + tgt[0].Opcode = OPCODE_RCP; + tgt[0].DstReg.File = PROGRAM_TEMPORARY; + tgt[0].DstReg.Index = rcptemp; + tgt[0].DstReg.WriteMask = WRITEMASK_W; + tgt[0].SrcReg[0] = inst.SrcReg[0]; + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + + tgt[1].Opcode = OPCODE_MAD; + tgt[1].DstReg = inst.DstReg; + tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[1].SrcReg[0] = inst.SrcReg[0]; + tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[1].Index = rcptemp; + tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; + tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[2].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + else + tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + + tgt[2].Opcode = OPCODE_CMP; + tgt[2].DstReg = orig_inst->DstReg; + tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; + tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; + tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); + } else if (destredirect) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = orig_inst->DstReg; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; + } + + return GL_TRUE; +} + + +static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); +} + + +/** + * Transform the program to support fragment.position. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. + * + * \todo if/when r5xx supports the radeon_program architecture, this is a + * likely candidate for code sharing. + */ +static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) +{ + GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; + + if (!(InputsRead & FRAG_BIT_WPOS)) + return; + + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); + + _mesa_insert_instructions(compiler->program, 0, 3); + fpi = compiler->program->Instructions; + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + for (; i < compiler->program->NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; + } + } + } +} + + +static void nqssadce_init(struct nqssadce_state* s) +{ + s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W; +} + +static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) +{ + GLuint relevant; + int i; + + if (opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXP || + opcode == OPCODE_KIL) { + if (reg.Abs) + return GL_FALSE; + + if (reg.NegateAbs) + reg.NegateBase ^= 15; + + if (opcode == OPCODE_KIL) { + if (reg.Swizzle != SWIZZLE_NOOP) + return GL_FALSE; + } else { + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz == SWIZZLE_NIL) { + reg.NegateBase &= ~(1 << i); + continue; + } + if (swz >= 4) + return GL_FALSE; + } + } + + if (reg.NegateBase) + return GL_FALSE; + + return GL_TRUE; + } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { + /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; + * if it doesn't fit perfectly into a .xyzw case... */ + if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs + && !reg.NegateBase && !reg.NegateAbs) + return GL_TRUE; + + return GL_FALSE; + } else { + /* ALU instructions support almost everything */ + if (reg.Abs) + return GL_TRUE; + + relevant = 0; + for(i = 0; i < 3; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant)) + return GL_FALSE; + + return GL_TRUE; + } +} + +/** + * Implement a MOV with a potentially non-native swizzle. + * + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. Therefore, we split the MOV into two instructions when necessary. + */ +static void nqssadce_build_swizzle(struct nqssadce_state *s, + struct prog_dst_register dst, struct prog_src_register src) +{ + struct prog_instruction *inst; + GLuint negatebase[2] = { 0, 0 }; + int i; + + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(src.Swizzle, i); + if (swz == SWIZZLE_NIL) + continue; + negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i; + } + + _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); + inst = s->Program->Instructions + s->IP; + + for(i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; + + inst->Opcode = OPCODE_MOV; + inst->DstReg = dst; + inst->DstReg.WriteMask = negatebase[i]; + inst->SrcReg[0] = src; + inst++; + s->IP++; + } +} + +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; + } +} + +static GLuint build_func(GLuint comparefunc) +{ + return comparefunc - GL_NEVER; +} + + +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct r500_fragment_program *fp, + struct r500_fragment_program_external_state *state) +{ + int unit; + + _mesa_bzero(state, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + +static void dump_program(struct r500_fragment_program_code *code); + +void r500TranslateFragmentShader(r300ContextPtr r300, + struct r500_fragment_program *fp) +{ + struct r500_fragment_program_external_state state; + + build_state(r300, fp, &state); + if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { + /* TODO: cache compiled programs */ + fp->translated = GL_FALSE; + _mesa_memcpy(&fp->state, &state, sizeof(state)); + } + + if (!fp->translated) { + struct r500_fragment_program_compiler compiler; + + compiler.r300 = r300; + compiler.fp = fp; + compiler.code = &fp->code; + compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: Initial program:\n"); + _mesa_print_program(compiler.program); + } + + insert_WPOS_trailer(&compiler); + + struct radeon_program_transformation transformations[] = { + { &transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(r300->radeon.glCtx, compiler.program, + 4, transformations); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after native rewrite:\n"); + _mesa_print_program(compiler.program); + } + + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &is_native_swizzle, + .BuildSwizzle = &nqssadce_build_swizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + _mesa_print_program(compiler.program); + } + + fp->translated = r500FragmentProgramEmit(&compiler); + + /* Subtle: Rescue any parameters that have been added during transformations */ + _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); + fp->mesa_program.Base.Parameters = compiler.program->Parameters; + compiler.program->Parameters = 0; + + _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0); + + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + if (fp->translated) { + _mesa_printf("Machine-readable code:\n"); + dump_program(&fp->code); + } + } + + } + + update_params(r300, fp); + +} + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "1/2"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; +} + +static char *toop(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static char *to_mask(int val) +{ + char *str = NULL; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + +static void dump_program(struct r500_fragment_program_code *code) +{ + + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + int n; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + + if (code->const_nr) { + fprintf(stderr, "--------\nConstants:\n"); + for (n = 0; n < code->const_nr; n++) { + fprintf(stderr, "Constant %d: %i[%i]\n", n, + code->constant[n].File, code->constant[n].Index); + } + fprintf(stderr, "--------\n"); + } + + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); + + switch(inst0 & 0x3) { + case 0: + case 1: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 31) & 0x1); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case 2: + break; + case 3: + inst = code->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = code->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + +} diff --git a/r300/r500_fragprog.h b/r300/r500_fragprog.h new file mode 100644 index 0000000..8641cee --- /dev/null +++ b/r300/r500_fragprog.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ +#ifndef __R500_FRAGPROG_H_ +#define __R500_FRAGPROG_H_ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "r300_context.h" +#include "r300_state.h" +#include "radeon_program.h" + +struct r500_fragment_program; + +extern void r500TranslateFragmentShader(r300ContextPtr r300, + struct r500_fragment_program *fp); + +struct r500_fragment_program_compiler { + r300ContextPtr r300; + struct r500_fragment_program *fp; + struct r500_fragment_program_code *code; + struct gl_program *program; +}; + +extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler); + +#endif diff --git a/r300/r500_fragprog_emit.c b/r300/r500_fragprog_emit.c new file mode 100644 index 0000000..4631235 --- /dev/null +++ b/r300/r500_fragprog_emit.c @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs <darktama@iinet.net.au> + * + * \author Jerome Glisse <j.glisse@gmail.com> + * + * \author Corbin Simpson <MostAwesomeDude@gmail.com> + * + * \todo Depth write, WPOS/FOGC inputs + * + * \todo FogOption + * + */ + +#include "r500_fragprog.h" + +#include "radeon_program_pair.h" + + +#define PROG_CODE \ + struct r500_fragment_program_compiler *c = (struct r500_fragment_program_compiler*)data; \ + struct r500_fragment_program_code *code = c->code + +#define error(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +/** + * Callback to register hardware constants. + */ +static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex) +{ + PROG_CODE; + + for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { + if (code->constant[*hwindex].File == file && + code->constant[*hwindex].Index == idx) + break; + } + + if (*hwindex >= code->const_nr) { + if (*hwindex >= PFS_NUM_CONST_REGS) { + error("Out of hw constants!\n"); + return GL_FALSE; + } + + code->const_nr++; + code->constant[*hwindex].File = file; + code->constant[*hwindex].Index = idx; + } + + return GL_TRUE; +} + +static GLuint translate_rgb_op(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + default: + error("translate_rgb_op(%d): unknown opcode\n", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + } +} + +static GLuint translate_alpha_op(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R500_ALPHA_OP_CMP; + case OPCODE_COS: return R500_ALPHA_OP_COS; + case OPCODE_DDX: return R500_ALPHA_OP_MDH; + case OPCODE_DDY: return R500_ALPHA_OP_MDV; + case OPCODE_DP3: return R500_ALPHA_OP_DP; + case OPCODE_DP4: return R500_ALPHA_OP_DP; + case OPCODE_EX2: return R500_ALPHA_OP_EX2; + case OPCODE_FRC: return R500_ALPHA_OP_FRC; + case OPCODE_LG2: return R500_ALPHA_OP_LN2; + default: + error("translate_alpha_op(%d): unknown opcode\n", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R500_ALPHA_OP_MAD; + case OPCODE_MAX: return R500_ALPHA_OP_MAX; + case OPCODE_MIN: return R500_ALPHA_OP_MIN; + case OPCODE_RCP: return R500_ALPHA_OP_RCP; + case OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case OPCODE_SIN: return R500_ALPHA_OP_SIN; + } +} + +static GLuint fix_hw_swizzle(GLuint swz) +{ + if (swz == 5) swz = 6; + if (swz == SWIZZLE_NIL) swz = 4; + return swz; +} + +static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) +{ + GLuint t = inst->RGB.Arg[arg].Source; + int comp; + t |= inst->RGB.Arg[arg].Negate << 11; + t |= inst->RGB.Arg[arg].Abs << 12; + + for(comp = 0; comp < 3; ++comp) + t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); + + return t; +} + +static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i) +{ + GLuint t = inst->Alpha.Arg[i].Source; + t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; + t |= inst->Alpha.Arg[i].Negate << 5; + t |= inst->Alpha.Arg[i].Abs << 6; + return t; +} + +static void use_temporary(struct r500_fragment_program_code* code, GLuint index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + +static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) +{ + if (!src.Constant) + use_temporary(code, src.Index); + return src.Index | src.Constant << 8; +} + + +/** + * Emit a paired ALU instruction. + */ +static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_alu: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + + code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode); + + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) + code->inst[ip].inst0 = R500_INST_TYPE_OUT; + else + code->inst[ip].inst0 = R500_INST_TYPE_ALU; + code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; + + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); + code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Alpha.DepthWriteMask) { + code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; + c->fp->writes_depth = GL_TRUE; + } + + code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); + code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); + use_temporary(code, inst->Alpha.DestIndex); + use_temporary(code, inst->RGB.DestIndex); + + if (inst->RGB.Saturate) + code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; + if (inst->Alpha.Saturate) + code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); + code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); + code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); + + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); + + code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; + + code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + + return GL_TRUE; +} + +static GLuint translate_strq_swizzle(struct prog_src_register src) +{ + GLuint swiz = 0; + int i; + for (i = 0; i < 4; i++) + swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2; + return swiz; +} + +/** + * Emit a single TEX instruction + */ +static GLboolean emit_tex(void *data, struct prog_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_tex: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + + code->inst[ip].inst0 = R500_INST_TYPE_TEX + | (inst->DstReg.WriteMask << 11) + | R500_INST_TEX_SEM_WAIT; + code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + + if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; + + switch (inst->Opcode) { + case OPCODE_KIL: + code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; + break; + case OPCODE_TEX: + code->inst[ip].inst1 |= R500_TEX_INST_LD; + break; + case OPCODE_TXB: + code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; + break; + case OPCODE_TXP: + code->inst[ip].inst1 |= R500_TEX_INST_PROJ; + break; + default: + error("emit_tex can't handle opcode %x\n", inst->Opcode); + } + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0]) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + return GL_TRUE; +} + +static const struct radeon_pair_handler pair_handler = { + .EmitConst = emit_const, + .EmitPaired = emit_paired, + .EmitTex = emit_tex, + .MaxHwTemps = 128 +}; + +GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler) +{ + struct r500_fragment_program_code *code = compiler->code; + + _mesa_bzero(code, sizeof(*code)); + code->max_temp_idx = 1; + code->inst_offset = 0; + code->inst_end = -1; + + if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) + return GL_FALSE; + + if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + /* This may happen when dead-code elimination is disabled or + * when most of the fragment program logic is leading to a KIL */ + if (code->inst_end >= 511) { + error("Introducing fake OUT: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; + } + + return GL_TRUE; +} diff --git a/r300/radeon_context.c b/r300/radeon_context.c index e9634b4..3fc724a 100644 --- a/r300/radeon_context.c +++ b/r300/radeon_context.c @@ -135,6 +135,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon, /* Fill in additional standard functions. */ radeonInitDriverFuncs(functions); + radeon->radeonScreen = screen; /* Allocate and initialize the Mesa context */ if (sharedContextPrivate) shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx; @@ -156,9 +157,8 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->dri.hwContext = driContextPriv->hHWContext; radeon->dri.hwLock = &sPriv->pSAREA->lock; radeon->dri.fd = sPriv->fd; - radeon->dri.drmMinor = sPriv->drmMinor; + radeon->dri.drmMinor = sPriv->drm_version.minor; - radeon->radeonScreen = screen; radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + screen->sarea_priv_offset); @@ -177,10 +177,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->do_usleeps ? "usleeps" : "busy waits", fthrottle_mode, radeon->radeonScreen->irq); - radeon->vblank_flags = (radeon->radeonScreen->irq != 0) - ? driGetDefaultVBlankFlags(&radeon->optionCache) : VBLANK_FLAG_NO_IRQ; - - (*dri_interface->getUST) (&radeon->swap_ust); + (*sPriv->systemTime->getUST) (&radeon->swap_ust); return GL_TRUE; } @@ -277,9 +274,15 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, radeon->glCtx); if (radeon->dri.drawable != driDrawPriv) { - driDrawableInitVBlank(driDrawPriv, - radeon->vblank_flags, - &radeon->vbl_seq); + if (driDrawPriv->swap_interval == (unsigned)-1) { + driDrawPriv->vblFlags = + (radeon->radeonScreen->irq != 0) + ? driGetDefaultVBlankFlags(&radeon-> + optionCache) + : VBLANK_FLAG_NO_IRQ; + + driDrawableInitVBlank(driDrawPriv); + } } radeon->dri.readable = driReadPriv; diff --git a/r300/radeon_context.h b/r300/radeon_context.h index 2f23941..7458d63 100644 --- a/r300/radeon_context.h +++ b/r300/radeon_context.h @@ -53,16 +53,6 @@ struct radeon_context; typedef struct radeon_context radeonContextRec; typedef struct radeon_context *radeonContextPtr; -#define TEX_0 0x1 -#define TEX_1 0x2 -#define TEX_2 0x4 -#define TEX_3 0x8 -#define TEX_4 0x10 -#define TEX_5 0x20 -#define TEX_6 0x40 -#define TEX_7 0x80 -#define TEX_ALL 0xff - /* Rasterizing fallbacks */ /* See correponding strings in r200_swtcl.c */ #define RADEON_FALLBACK_TEXTURE 0x0001 @@ -182,10 +172,7 @@ struct radeon_context { GLuint irqsEmitted; drm_radeon_irq_wait_t iw; - /* VBI / buffer swap */ - GLuint vbl_seq; - GLuint vblank_flags; - + /* buffer swap */ int64_t swap_ust; int64_t swap_missed_ust; diff --git a/r300/radeon_ioctl.c b/r300/radeon_ioctl.c index 0b8656b..0c1a195 100644 --- a/r300/radeon_ioctl.c +++ b/r300/radeon_ioctl.c @@ -157,13 +157,14 @@ static void radeonWaitForFrameCompletion(radeonContextPtr radeon) /* Copy the back color buffer to the front color buffer. */ -void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv, +void radeonCopyBuffer(__DRIdrawablePrivate * dPriv, const drm_clip_rect_t * rect) { radeonContextPtr radeon; GLint nbox, i, ret; GLboolean missed_target; int64_t ust; + __DRIscreenPrivate *psp = dPriv->driScreenPriv; assert(dPriv); assert(dPriv->driContextPriv); @@ -187,8 +188,7 @@ void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv, if (!rect) { UNLOCK_HARDWARE(radeon); - driWaitForVBlank(dPriv, &radeon->vbl_seq, radeon->vblank_flags, - &missed_target); + driWaitForVBlank(dPriv, &missed_target); LOCK_HARDWARE(radeon); } @@ -215,16 +215,18 @@ void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv, if (rect->y2 < b->y2) b->y2 = rect->y2; - if (b->x1 < b->x2 && b->y1 < b->y2) - b++; + if (b->x1 >= b->x2 || b->y1 >= b->y2) + continue; } - else - b++; + b++; n++; } radeon->sarea->nbox = n; + if (!n) + continue; + ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP); if (ret) { @@ -241,7 +243,7 @@ void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv, ((r300ContextPtr)radeon)->hw.all_dirty = GL_TRUE; radeon->swap_count++; - (*dri_interface->getUST) (&ust); + (*psp->systemTime->getUST) (&ust); if (missed_target) { radeon->swap_missed_count++; radeon->swap_missed_ust = ust - radeon->swap_ust; @@ -253,11 +255,12 @@ void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv, } } -void radeonPageFlip(const __DRIdrawablePrivate * dPriv) +void radeonPageFlip(__DRIdrawablePrivate * dPriv) { radeonContextPtr radeon; GLint ret; GLboolean missed_target; + __DRIscreenPrivate *psp = dPriv->driScreenPriv; assert(dPriv); assert(dPriv->driContextPriv); @@ -293,11 +296,10 @@ void radeonPageFlip(const __DRIdrawablePrivate * dPriv) */ radeonWaitForFrameCompletion(radeon); UNLOCK_HARDWARE(radeon); - driWaitForVBlank(dPriv, &radeon->vbl_seq, radeon->vblank_flags, - &missed_target); + driWaitForVBlank(dPriv, &missed_target); if (missed_target) { radeon->swap_missed_count++; - (void)(*dri_interface->getUST) (&radeon->swap_missed_ust); + (void)(*psp->systemTime->getUST) (&radeon->swap_missed_ust); } LOCK_HARDWARE(radeon); @@ -311,7 +313,7 @@ void radeonPageFlip(const __DRIdrawablePrivate * dPriv) } radeon->swap_count++; - (void)(*dri_interface->getUST) (&radeon->swap_ust); + (void)(*psp->systemTime->getUST) (&radeon->swap_ust); driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, radeon->sarea->pfCurrentPage); diff --git a/r300/radeon_ioctl.h b/r300/radeon_ioctl.h index 3a80d36..210001e 100644 --- a/r300/radeon_ioctl.h +++ b/r300/radeon_ioctl.h @@ -46,9 +46,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #endif #include "radeon_drm.h" -extern void radeonCopyBuffer(const __DRIdrawablePrivate * drawable, +extern void radeonCopyBuffer(__DRIdrawablePrivate * drawable, const drm_clip_rect_t * rect); -extern void radeonPageFlip(const __DRIdrawablePrivate * drawable); +extern void radeonPageFlip(__DRIdrawablePrivate * drawable); extern void radeonFlush(GLcontext * ctx); extern void radeonFinish(GLcontext * ctx); extern void radeonWaitForIdleLocked(radeonContextPtr radeon); diff --git a/r300/radeon_lock.c b/r300/radeon_lock.c index bc3c2d6..d54a821 100644 --- a/r300/radeon_lock.c +++ b/r300/radeon_lock.c @@ -68,8 +68,8 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa) } use_back = rmesa->glCtx->DrawBuffer ? - (rmesa->glCtx->DrawBuffer->_ColorDrawBufferMask[0] == - BUFFER_BIT_BACK_LEFT) : 1; + (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] == + BUFFER_BACK_LEFT) : 1; use_back ^= (rmesa->sarea->pfCurrentPage == 1); if (use_back) { diff --git a/r300/radeon_lock.h b/r300/radeon_lock.h index c47adc9..a344837 100644 --- a/r300/radeon_lock.h +++ b/r300/radeon_lock.h @@ -42,9 +42,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef __RADEON_LOCK_H__ #define __RADEON_LOCK_H__ -#if 0 -#include "r200_ioctl.h" -#endif #include "radeon_context.h" extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); diff --git a/r300/radeon_nqssadce.c b/r300/radeon_nqssadce.c new file mode 100644 index 0000000..97ce016 --- /dev/null +++ b/r300/radeon_nqssadce.c @@ -0,0 +1,284 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * "Not-quite SSA" and Dead-Code Elimination. + * + * @note This code uses SWIZZLE_NIL in a source register to indicate that + * the corresponding component is ignored by the corresponding instruction. + */ + +#include "radeon_nqssadce.h" + + +/** + * Return the @ref register_state for the given register (or 0 for untracked + * registers, i.e. constants). + */ +static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) +{ + switch(file) { + case PROGRAM_TEMPORARY: return &s->Temps[index]; + case PROGRAM_OUTPUT: return &s->Outputs[index]; + default: return 0; + } +} + + +/** + * Left multiplication of a register with a swizzle + * + * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. + */ +static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) +{ + struct prog_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.NegateBase = 0; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + + +static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, + struct prog_instruction *inst, GLint src, GLuint sourced) +{ + int i; + GLuint deswz_source = 0; + + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) { + GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); + deswz_source |= 1 << swz; + } else { + inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + } + + if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { + struct prog_dst_register dstreg = inst->DstReg; + dstreg.File = PROGRAM_TEMPORARY; + dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + dstreg.WriteMask = sourced; + + s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); + + inst = s->Program->Instructions + s->IP; + inst->SrcReg[src].File = PROGRAM_TEMPORARY; + inst->SrcReg[src].Index = dstreg.Index; + inst->SrcReg[src].Swizzle = 0; + inst->SrcReg[src].NegateBase = 0; + inst->SrcReg[src].Abs = 0; + inst->SrcReg[src].NegateAbs = 0; + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) + inst->SrcReg[src].Swizzle |= i << (3*i); + else + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + deswz_source = sourced; + } + + struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); + if (regstate) + regstate->Sourced |= deswz_source & 0xf; + + return inst; +} + + +static void rewrite_depth_out(struct prog_instruction *inst) +{ + if (inst->DstReg.WriteMask & WRITEMASK_Z) { + inst->DstReg.WriteMask = WRITEMASK_W; + } else { + inst->DstReg.WriteMask = 0; + return; + } + + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; + } +} + +static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) +{ + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int i; + for(i = 0; i < nsrc; ++i) + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) + inst->SrcReg[i].Index = newindex; +} + +static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) +{ + GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + int ip; + for(ip = 0; ip < s->IP; ++ip) { + struct prog_instruction* inst = s->Program->Instructions + ip; + if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) + inst->DstReg.Index = newindex; + unalias_srcregs(inst, oldindex, newindex); + } + unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); +} + + +/** + * Handle one instruction. + */ +static void process_instruction(struct nqssadce_state* s) +{ + struct prog_instruction *inst = s->Program->Instructions + s->IP; + + if (inst->Opcode == OPCODE_END) + return; + + if (inst->Opcode != OPCODE_KIL) { + if (s->Descr->RewriteDepthOut) { + if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPR) + rewrite_depth_out(inst); + } + + struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); + if (!regstate) { + _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", + inst->DstReg.File, inst->DstReg.Index); + return; + } + + inst->DstReg.WriteMask &= regstate->Sourced; + regstate->Sourced &= ~inst->DstReg.WriteMask; + + if (inst->DstReg.WriteMask == 0) { + _mesa_delete_instructions(s->Program, s->IP, 1); + return; + } + + if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) + unalias_temporary(s, inst->DstReg.Index); + } + + /* Attention: Due to swizzle emulation code, the following + * might change the instruction stream under us, so we have + * to be careful with the inst pointer. */ + switch (inst->Opcode) { + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_FRC: + case OPCODE_MOV: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); + break; + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + inst = track_used_srcreg(s, inst, 0, 0x1); + break; + case OPCODE_DP3: + inst = track_used_srcreg(s, inst, 0, 0x7); + inst = track_used_srcreg(s, inst, 1, 0x7); + break; + case OPCODE_DP4: + inst = track_used_srcreg(s, inst, 0, 0xf); + inst = track_used_srcreg(s, inst, 1, 0xf); + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + inst = track_used_srcreg(s, inst, 0, 0xf); + break; + default: + _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); + return; + } +} + + +void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) +{ + struct nqssadce_state s; + + _mesa_bzero(&s, sizeof(s)); + s.Ctx = ctx; + s.Program = p; + s.Descr = descr; + s.Descr->Init(&s); + s.IP = p->NumInstructions; + + while(s.IP > 0) { + s.IP--; + process_instruction(&s); + } +} diff --git a/r300/radeon_nqssadce.h b/r300/radeon_nqssadce.h new file mode 100644 index 0000000..a4f94ab --- /dev/null +++ b/r300/radeon_nqssadce.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_NQSSADCE_H_ +#define __RADEON_PROGRAM_NQSSADCE_H_ + +#include "radeon_program.h" + + +struct register_state { + /** + * Bitmask indicating which components of the register are sourced + * by later instructions. + */ + GLuint Sourced : 4; +}; + +/** + * Maintain state such as which registers are used, which registers are + * read from, etc. + */ +struct nqssadce_state { + GLcontext *Ctx; + struct gl_program *Program; + struct radeon_nqssadce_descr *Descr; + + /** + * All instructions after this instruction pointer have been dealt with. + */ + int IP; + + /** + * Which registers are read by subsequent instructions? + */ + struct register_state Temps[MAX_PROGRAM_TEMPS]; + struct register_state Outputs[VERT_RESULT_MAX]; +}; + + +/** + * This structure contains a description of the hardware in-so-far as + * it is required for the NqSSA-DCE pass. + */ +struct radeon_nqssadce_descr { + /** + * Fill in which outputs + */ + void (*Init)(struct nqssadce_state *); + + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + */ + GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); + + /** + * Emit (at the current IP) the instruction MOV dst, src; + * The transformation will work recursively on the emitted instruction(s). + */ + void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); + + /** + * Rewrite instructions that write to DEPR.z to write to DEPR.w + * instead (rewriting is done *before* the WriteMask test). + */ + GLboolean RewriteDepthOut; + void *Data; +}; + +void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); + +#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/r300/radeon_program.c b/r300/radeon_program.c new file mode 100644 index 0000000..da5e7ae --- /dev/null +++ b/r300/radeon_program.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include "shader/prog_print.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void radeonLocalTransform( + GLcontext *Ctx, + struct gl_program *program, + int num_transformations, + struct radeon_program_transformation* transformations) +{ + struct radeon_transform_context ctx; + int ip; + + ctx.Ctx = Ctx; + ctx.Program = program; + ctx.OldInstructions = program->Instructions; + ctx.OldNumInstructions = program->NumInstructions; + + program->Instructions = 0; + program->NumInstructions = 0; + + for(ip = 0; ip < ctx.OldNumInstructions; ++ip) { + struct prog_instruction *instr = ctx.OldInstructions + ip; + int i; + + for(i = 0; i < num_transformations; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(&ctx, instr, t->userData)) + break; + } + + if (i >= num_transformations) { + struct prog_instruction* dest = radeonAppendInstructions(program, 1); + _mesa_copy_instructions(dest, instr, 1); + } + } + + _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions); +} + + +static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count) +{ + GLuint i; + for (i = 0; i < count; i++) { + const struct prog_instruction *inst = insts + i; + const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); + GLuint k; + + for (k = 0; k < n; k++) { + if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) + used[inst->SrcReg[k].Index] = GL_TRUE; + } + } +} + +GLint radeonFindFreeTemporary(struct radeon_transform_context *t) +{ + GLboolean used[MAX_PROGRAM_TEMPS]; + GLuint i; + + _mesa_memset(used, 0, sizeof(used)); + scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions); + scan_instructions(used, t->OldInstructions, t->OldNumInstructions); + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!used[i]) + return i; + } + + return -1; +} + + +/** + * Append the given number of instructions to the program and return a + * pointer to the first new instruction. + */ +struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count) +{ + int oldnum = program->NumInstructions; + _mesa_insert_instructions(program, oldnum, count); + return program->Instructions + oldnum; +} diff --git a/r300/radeon_program.h b/r300/radeon_program.h new file mode 100644 index 0000000..2e01dd4 --- /dev/null +++ b/r300/radeon_program.h @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_H_ +#define __RADEON_PROGRAM_H_ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + + +enum { + CLAUSE_MIXED = 0, + CLAUSE_ALU, + CLAUSE_TEX +}; + +enum { + PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ +}; + +enum { + OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */ +}; + +#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) +#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) + +/** + * Transformation context that is passed to local transformations. + * + * Care must be taken with some operations during transformation, + * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary + */ +struct radeon_transform_context { + GLcontext *Ctx; + struct gl_program *Program; + struct prog_instruction *OldInstructions; + GLuint OldNumInstructions; +}; + +/** + * A transformation that can be passed to \ref radeonLocalTransform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return GL_TRUE, or return GL_FALSE if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + GLboolean (*function)( + struct radeon_transform_context*, + struct prog_instruction*, + void*); + void *userData; +}; + +void radeonLocalTransform( + GLcontext* ctx, + struct gl_program *program, + int num_transformations, + struct radeon_program_transformation* transformations); + +/** + * Find a usable free temporary register during program transformation + */ +GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx); + +struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count); + +#endif diff --git a/r300/radeon_program_alu.c b/r300/radeon_program_alu.c new file mode 100644 index 0000000..1ef71e7 --- /dev/null +++ b/r300/radeon_program_alu.c @@ -0,0 +1,658 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + +#include "shader/prog_parameter.h" + + +static struct prog_instruction *emit1(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg; + return fpi; +} + +static struct prog_instruction *emit2(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + return fpi; +} + +static struct prog_instruction *emit3(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, + struct prog_src_register SrcReg2) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + fpi->SrcReg[2] = SrcReg2; + return fpi; +} + +static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int swz) +{ + SrcReg->Swizzle &= ~(7 << (3*coordinate)); + SrcReg->Swizzle |= swz << (3*coordinate); +} + +static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate) +{ + SrcReg->NegateBase &= ~(1 << coordinate); + SrcReg->NegateBase |= (negate << coordinate); +} + +static struct prog_dst_register dstreg(int file, int index) +{ + struct prog_dst_register dst; + dst.File = file; + dst.Index = index; + dst.WriteMask = WRITEMASK_XYZW; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static struct prog_dst_register dstregtmpmask(int index, int mask) +{ + struct prog_dst_register dst; + dst.File = PROGRAM_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static const struct prog_src_register builtin_zero = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_0000 +}; +static const struct prog_src_register builtin_one = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_1111 +}; +static const struct prog_src_register srcreg_undefined = { + .File = PROGRAM_UNDEFINED, + .Index = 0, + .Swizzle = SWIZZLE_NOOP +}; + +static struct prog_src_register srcreg(int file, int index) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct prog_src_register srcregswz(int file, int index, int swz) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; +} + +static struct prog_src_register absolute(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.Abs = 1; + newreg.NegateBase = 0; + newreg.NegateAbs = 0; + return newreg; +} + +static struct prog_src_register negate(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.NegateAbs = !newreg.NegateAbs; + return newreg; +} + +static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) +{ + struct prog_src_register swizzled = reg; + swizzled.Swizzle = MAKE_SWIZZLE4( + x >= 4 ? x : GET_SWZ(reg.Swizzle, x), + y >= 4 ? y : GET_SWZ(reg.Swizzle, y), + z >= 4 ? z : GET_SWZ(reg.Swizzle, z), + w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); + return swizzled; +} + +static struct prog_src_register scalar(struct prog_src_register reg) +{ + return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); +} + +static void transform_ABS(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + struct prog_src_register src = inst->SrcReg[0]; + src.Abs = 1; + src.NegateBase = 0; + src.NegateAbs = 0; + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); +} + +static void transform_DPH(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + if (src0.NegateAbs) { + if (src0.Abs) { + int tempreg = radeonFindFreeTemporary(t); + emit1(t->Program, OPCODE_MOV, 0, dstreg(PROGRAM_TEMPORARY, tempreg), src0); + src0 = srcreg(src0.File, src0.Index); + } else { + src0.NegateAbs = 0; + src0.NegateBase ^= NEGATE_XYZW; + } + } + set_swizzle(&src0, 3, SWIZZLE_ONE); + set_negate_base(&src0, 3, 0); + emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); +} + +/** + * [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. + */ +static void transform_DST(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg, + swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), + swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); +} + +static void transform_FLR(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); + emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, + inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + +/** + * Definition of LIT (from ARB_fragment_program): + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + static const GLfloat LitConst[4] = { -127.999999 }; + + GLuint constant; + GLuint constant_swizzle; + GLuint temp; + int needTemporary = 0; + struct prog_src_register srctemp; + + constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); + + if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (inst->DstReg.File != PROGRAM_TEMPORARY) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = radeonFindFreeTemporary(t); + } else { + temp = inst->DstReg.Index; + } + srctemp = srcreg(PROGRAM_TEMPORARY, temp); + + // tmp.x = max(0.0, Src.x); + // tmp.y = max(0.0, Src.y); + // tmp.w = clamp(Src.z, -128+eps, 128-eps); + emit2(t->Program, OPCODE_MAX, 0, + dstregtmpmask(temp, WRITEMASK_XYW), + inst->SrcReg[0], + swizzle(srcreg(PROGRAM_CONSTANT, constant), + SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); + emit2(t->Program, OPCODE_MIN, 0, + dstregtmpmask(temp, WRITEMASK_Z), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); + + // tmp.w = Pow(tmp.y, tmp.w) + emit1(t->Program, OPCODE_LG2, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit2(t->Program, OPCODE_MUL, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); + emit1(t->Program, OPCODE_EX2, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + + // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, + dstregtmpmask(temp, WRITEMASK_Z), + negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + builtin_zero); + + // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, + dstregtmpmask(temp, WRITEMASK_XYW), + swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); + + if (needTemporary) + emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp); +} + +static void transform_LRP(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, + dstreg(PROGRAM_TEMPORARY, tempreg), + inst->SrcReg[1], negate(inst->SrcReg[2])); + emit3(t->Program, OPCODE_MAD, inst->SaturateMode, + inst->DstReg, + inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]); +} + +static void transform_POW(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); + struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); + tempdst.WriteMask = WRITEMASK_W; + tempsrc.Swizzle = SWIZZLE_WWWW; + + emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0])); + emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1])); + emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc); +} + +static void transform_RSQ(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0])); +} + +static void transform_SGE(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); +} + +static void transform_SLT(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); +} + +static void transform_SUB(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); +} + +static void transform_SWZ(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]); +} + +static void transform_XPD(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), + swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, + swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * Transforms RSQ to Radeon's native RSQ by explicitly setting + * absolute value. + * + * @note should be applicable to R300 and R500 fragment programs. + */ +GLboolean radeonTransformALU(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + switch(inst->Opcode) { + case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; + case OPCODE_DST: transform_DST(t, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; + case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; + case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; + case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE; + case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; + case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; + default: + return GL_FALSE; + } +} + + +static void sincos_constants(struct radeon_transform_context* t, GLuint *constants) +{ + static const GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.5, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } + }; + int i; + + for(i = 0; i < 2; ++i) { + GLuint swz; + constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); + ASSERT(swz == SWIZZLE_NOOP); + } +} + +/** + * Approximate sin(x), where x is clamped to (-pi/2, pi/2). + * + * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } + * MAD tmp.x, tmp.y, |src|, tmp.x + * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x + * MAD dest, tmp.y, weight, tmp.x + */ +static void sin_approx(struct radeon_transform_context* t, + struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) +{ + GLuint tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcreg(PROGRAM_CONSTANT, constants[0])); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); + emit3(t->Program, OPCODE_MAD, 0, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); +} + +/** + * Translate the trigonometric functions COS, SIN, and SCS + * using only the basic instructions + * MOV, ADD, MUL, MAD, FRC + */ +GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_COS && + inst->Opcode != OPCODE_SIN && + inst->Opcode != OPCODE_SCS) + return GL_FALSE; + + GLuint constants[2]; + GLuint tempreg = radeonFindFreeTemporary(t); + + sincos_constants(t, constants); + + if (inst->Opcode == OPCODE_COS) { + // MAD tmp.x, src, 1/(2*PI), 0.75 + // FRC tmp.x, tmp.x + // MAD tmp.z, tmp.x, 2*PI, -PI + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(t, inst->DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else if (inst->Opcode == OPCODE_SIN) { + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(t, inst->DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else { + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + struct prog_dst_register dst = inst->DstReg; + + dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; + sin_approx(t, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + constants); + + dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; + sin_approx(t, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + constants); + } + + return GL_TRUE; +} + + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * to include pre-scaling by 1/(2*PI) and taking the fractional + * part, so that the input to COS and SIN is always in the range [0,1). + * SCS is replaced by one COS and one SIN instruction. + * + * @warning This transformation implicitly changes the semantics of SIN and COS! + */ +GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_COS && + inst->Opcode != OPCODE_SIN && + inst->Opcode != OPCODE_SCS) + return GL_FALSE; + + static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; + GLuint temp; + GLuint constant; + GLuint constant_swizzle; + + temp = radeonFindFreeTemporary(t); + constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); + + emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), + srcreg(PROGRAM_TEMPORARY, temp)); + + if (inst->Opcode == OPCODE_COS) { + emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->Opcode == OPCODE_SIN) { + emit1(t->Program, OPCODE_SIN, inst->SaturateMode, + inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->Opcode == OPCODE_SCS) { + struct prog_dst_register moddst = inst->DstReg; + + if (inst->DstReg.WriteMask & WRITEMASK_X) { + moddst.WriteMask = WRITEMASK_X; + emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + if (inst->DstReg.WriteMask & WRITEMASK_Y) { + moddst.WriteMask = WRITEMASK_Y; + emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + } + + return GL_TRUE; +} + +/** + * Rewrite DDX/DDY instructions to properly work with r5xx shaders. + * The r5xx MDH/MDV instruction provides per-quad partial derivatives. + * It takes the form A*B+C. A and C are set by setting src0. B should be -1. + * + * @warning This explicitly changes the form of DDX and DDY! + */ + +GLboolean radeonTransformDeriv(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY) + return GL_FALSE; + + struct prog_src_register B = inst->SrcReg[1]; + + B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, + SWIZZLE_ONE, SWIZZLE_ONE); + B.NegateBase = NEGATE_XYZW; + + emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, + inst->SrcReg[0], B); + + return GL_TRUE; +} diff --git a/r300/radeon_program_alu.h b/r300/radeon_program_alu.h new file mode 100644 index 0000000..b459581 --- /dev/null +++ b/r300/radeon_program_alu.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +GLboolean radeonTransformALU( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformTrigSimple( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformTrigScale( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformDeriv( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/r300/radeon_program_pair.c b/r300/radeon_program_pair.c new file mode 100644 index 0000000..5ad50d2 --- /dev/null +++ b/r300/radeon_program_pair.c @@ -0,0 +1,1001 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Perform temporary register allocation and attempt to pair off instructions + * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction + * vs. ALU instruction scheduling. + */ + +#include "radeon_program_pair.h" + +#include "radeon_context.h" + +#include "shader/prog_print.h" + +#define error(fmt, args...) do { \ + _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + s->Error = GL_TRUE; \ +} while(0) + +struct pair_state_instruction { + GLuint IsTex:1; /**< Is a texture instruction */ + GLuint NeedRGB:1; /**< Needs the RGB ALU */ + GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ + GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + GLuint NumDependencies:5; + + /** + * Next instruction in the linked list of ready instructions. + */ + struct pair_state_instruction *NextReady; + + /** + * Values that this instruction writes + */ + struct reg_value *Values[4]; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + GLuint IP; /**< IP of the instruction that performs this access */ + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * PROGRAM_TEMPORARY. + */ +struct reg_value { + GLuint IP; /**< IP of the instruction that writes this value */ + struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ + + /** + * Unordered linked list of instructions that read from this value. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is calculated during @ref scan_instructions + * and continually decremented during code emission. + * When this count reaches zero, the instruction that writes the @ref Next value + * can be scheduled. + */ + GLuint NumReaders; +}; + +/** + * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register + * to the proper hardware temporary. + */ +struct pair_register_translation { + GLuint Allocated:1; + GLuint HwIndex:8; + GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ + + /** + * Notes the value that is currently contained in each component + * (only used for PROGRAM_TEMPORARY registers). + */ + struct reg_value *Value[4]; +}; + +struct pair_state { + GLcontext *Ctx; + struct gl_program *Program; + const struct radeon_pair_handler *Handler; + GLboolean Error; + GLboolean Debug; + GLboolean Verbose; + void *UserData; + + /** + * Translate Mesa registers to hardware registers + */ + struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; + struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; + + /** + * Derived information about program instructions. + */ + struct pair_state_instruction *Instructions; + + struct { + GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ + } HwTemps[128]; + + /** + * Linked list of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + struct pair_state_instruction *ReadyFullALU; + struct pair_state_instruction *ReadyRGB; + struct pair_state_instruction *ReadyAlpha; + struct pair_state_instruction *ReadyTEX; + + /** + * Pool of @ref reg_value structures for fast allocation. + */ + struct reg_value *ValuePool; + GLuint ValuePoolUsed; + struct reg_value_reader *ReaderPool; + GLuint ReaderPoolUsed; +}; + + +static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index) +{ + switch(file) { + case PROGRAM_TEMPORARY: return &s->Temps[index]; + case PROGRAM_INPUT: return &s->Inputs[index]; + default: return 0; + } +} + +static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex) +{ + struct pair_register_translation *t = get_register(s, file, index); + ASSERT(!s->HwTemps[hwindex].RefCount); + ASSERT(!t->Allocated); + s->HwTemps[hwindex].RefCount = t->RefCount; + t->Allocated = 1; + t->HwIndex = hwindex; +} + +static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) +{ + GLuint hwindex; + + struct pair_register_translation *t = get_register(s, file, index); + if (!t) { + _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index); + return 0; + } + + if (t->Allocated) + return t->HwIndex; + + for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) + if (!s->HwTemps[hwindex].RefCount) + break; + + if (hwindex >= s->Handler->MaxHwTemps) { + error("Ran out of hardware temporaries"); + return 0; + } + + alloc_hw_reg(s, file, index, hwindex); + return hwindex; +} + + +static void deref_hw_reg(struct pair_state *s, GLuint hwindex) +{ + if (!s->HwTemps[hwindex].RefCount) { + error("Hwindex %i refcount error", hwindex); + return; + } + + s->HwTemps[hwindex].RefCount--; +} + +static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst) +{ + pairinst->NextReady = *list; + *list = pairinst; +} + +/** + * The instruction at the given IP has become ready. Link it into the ready + * instructions. + */ +static void instruction_ready(struct pair_state *s, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + + if (s->Verbose) + _mesa_printf("instruction_ready(%i)\n", ip); + + if (pairinst->IsTex) + add_pairinst_to_list(&s->ReadyTEX, pairinst); + else if (!pairinst->NeedAlpha) + add_pairinst_to_list(&s->ReadyRGB, pairinst); + else if (!pairinst->NeedRGB) + add_pairinst_to_list(&s->ReadyAlpha, pairinst); + else + add_pairinst_to_list(&s->ReadyFullALU, pairinst); +} + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) +{ + struct prog_src_register tmp; + + switch(inst->Opcode) { + case OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = PROGRAM_BUILTIN; + inst->SrcReg[1].Swizzle = SWIZZLE_1111; + inst->SrcReg[1].NegateBase = 0; + inst->SrcReg[1].NegateAbs = 0; + inst->Opcode = OPCODE_MAD; + break; + case OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = PROGRAM_BUILTIN; + inst->SrcReg[1].Swizzle = SWIZZLE_1111; + inst->SrcReg[2].File = PROGRAM_BUILTIN; + inst->SrcReg[2].Swizzle = SWIZZLE_0000; + inst->Opcode = OPCODE_MAD; + break; + case OPCODE_MUL: + inst->SrcReg[2].File = PROGRAM_BUILTIN; + inst->SrcReg[2].Swizzle = SWIZZLE_0000; + inst->Opcode = OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct pair_state *s, + struct prog_instruction *inst, struct pair_state_instruction *pairinst) +{ + pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; + pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; + + switch(inst->Opcode) { + case OPCODE_ADD: + case OPCODE_CMP: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_FRC: + case OPCODE_MAD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MOV: + case OPCODE_MUL: + break; + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + pairinst->IsTranscendent = 1; + pairinst->NeedAlpha = 1; + break; + case OPCODE_DP4: + pairinst->NeedAlpha = 1; + /* fall through */ + case OPCODE_DP3: + pairinst->NeedRGB = 1; + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + case OPCODE_END: + pairinst->IsTex = 1; + break; + default: + error("Unknown opcode %d\n", inst->Opcode); + break; + } +} + + +/** + * Count which (input, temporary) register is read and written how often, + * and scan the instruction stream to find dependencies. + */ +static void scan_instructions(struct pair_state *s) +{ + struct prog_instruction *inst; + struct pair_state_instruction *pairinst; + GLuint ip; + + for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; + inst->Opcode != OPCODE_END; + ++inst, ++pairinst, ++ip) { + final_rewrite(s, inst); + classify_instruction(s, inst, pairinst); + + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int j; + for(j = 0; j < nsrc; j++) { + struct pair_register_translation *t = + get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); + if (!t) + continue; + + t->RefCount++; + + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + int i; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); + if (swz >= 4) + continue; /* constant or NIL swizzle */ + if (!t->Value[swz]) + continue; /* this is an undefined read */ + + /* Do not add a dependency if this instruction + * also rewrites the value. The code below adds + * a dependency for the DstReg, which is a superset + * of the SrcReg dependency. */ + if (inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == inst->SrcReg[j].Index && + GET_BIT(inst->DstReg.WriteMask, swz)) + continue; + + struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; + pairinst->NumDependencies++; + t->Value[swz]->NumReaders++; + r->IP = ip; + r->Next = t->Value[swz]->Readers; + t->Value[swz]->Readers = r; + } + } + } + + int ndst = _mesa_num_inst_dst_regs(inst->Opcode); + if (ndst) { + struct pair_register_translation *t = + get_register(s, inst->DstReg.File, inst->DstReg.Index); + if (t) { + t->RefCount++; + + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + int j; + for(j = 0; j < 4; ++j) { + if (!GET_BIT(inst->DstReg.WriteMask, j)) + continue; + + struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; + v->IP = ip; + if (t->Value[j]) { + pairinst->NumDependencies++; + t->Value[j]->Next = v; + } + t->Value[j] = v; + pairinst->Values[j] = v; + } + } + } + } + + if (s->Verbose) + _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); + + if (!pairinst->NumDependencies) + instruction_ready(s, ip); + } + + /* Clear the PROGRAM_TEMPORARY state */ + int i, j; + for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { + for(j = 0; j < 4; ++j) + s->Temps[i].Value[j] = 0; + } +} + + +/** + * Reserve hardware temporary registers for the program inputs. + * + * @note This allocation is performed explicitly, because the order of inputs + * is determined by the RS hardware. + */ +static void allocate_input_registers(struct pair_state *s) +{ + GLuint InputsRead = s->Program->InputsRead; + int i; + GLuint hwindex = 0; + + /* Texcoords come first */ + for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); + InputsRead &= ~FRAG_BIT_WPOS; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); + InputsRead &= ~FRAG_BIT_COL1; + + /* Anything else */ + if (InputsRead) + error("Don't know how to handle inputs 0x%x\n", InputsRead); +} + + +static void decrement_dependencies(struct pair_state *s, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + ASSERT(pairinst->NumDependencies > 0); + if (!--pairinst->NumDependencies) + instruction_ready(s, ip); +} + +/** + * Update the dependency tracking state based on what the instruction + * at the given IP does. + */ +static void commit_instruction(struct pair_state *s, int ip) +{ + struct prog_instruction *inst = s->Program->Instructions + ip; + struct pair_state_instruction *pairinst = s->Instructions + ip; + + if (s->Verbose) + _mesa_printf("commit_instruction(%i)\n", ip); + + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; + deref_hw_reg(s, t->HwIndex); + + int i; + for(i = 0; i < 4; ++i) { + if (!GET_BIT(inst->DstReg.WriteMask, i)) + continue; + + t->Value[i] = pairinst->Values[i]; + if (t->Value[i]->NumReaders) { + struct reg_value_reader *r; + for(r = pairinst->Values[i]->Readers; r; r = r->Next) + decrement_dependencies(s, r->IP); + } else if (t->Value[i]->Next) { + /* This happens when the only reader writes + * the register at the same time */ + decrement_dependencies(s, t->Value[i]->Next->IP); + } + } + } + + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int i; + for(i = 0; i < nsrc; i++) { + struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (!t) + continue; + + deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); + + if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) + continue; + + int j; + for(j = 0; j < 4; ++j) { + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz >= 4) + continue; + if (!t->Value[swz]) + continue; + + /* Do not free a dependency if this instruction + * also rewrites the value. See scan_instructions. */ + if (inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == inst->SrcReg[i].Index && + GET_BIT(inst->DstReg.WriteMask, swz)) + continue; + + if (!--t->Value[swz]->NumReaders) { + if (t->Value[swz]->Next) + decrement_dependencies(s, t->Value[swz]->Next->IP); + } + } + } +} + + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + * + * In R500, we don't really know when the result of a texture instruction + * arrives. So allocate all destinations first, to make sure they do not + * arrive early and overwrite a texture coordinate we're going to use later + * in the block. + */ +static void emit_all_tex(struct pair_state *s) +{ + struct pair_state_instruction *readytex; + struct pair_state_instruction *pairinst; + + ASSERT(s->ReadyTEX); + + // Don't let the ready list change under us! + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + + // Allocate destination hardware registers in one block to avoid conflicts. + for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { + int ip = pairinst - s->Instructions; + struct prog_instruction *inst = s->Program->Instructions + ip; + if (inst->Opcode != OPCODE_KIL) + get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + } + + if (s->Debug) + _mesa_printf(" BEGIN_TEX\n"); + + if (s->Handler->BeginTexBlock) + s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData); + + for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { + int ip = pairinst - s->Instructions; + struct prog_instruction *inst = s->Program->Instructions + ip; + commit_instruction(s, ip); + + if (inst->Opcode != OPCODE_KIL) + inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); + + if (s->Debug) { + _mesa_printf(" "); + _mesa_print_instruction(inst); + } + s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); + } + + if (s->Debug) + _mesa_printf(" END_TEX\n"); +} + + +static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, + struct prog_src_register src, GLboolean rgb, GLboolean alpha) +{ + int candidate = -1; + int candidate_quality = -1; + int i; + + if (!rgb && !alpha) + return 0; + + GLuint constant; + GLuint index; + + if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { + constant = 0; + index = get_hw_reg(s, src.File, src.Index); + } else { + constant = 1; + s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index); + } + + for(i = 0; i < 3; ++i) { + int q = 0; + if (rgb) { + if (pair->RGB.Src[i].Used) { + if (pair->RGB.Src[i].Constant != constant || + pair->RGB.Src[i].Index != index) + continue; + q++; + } + } + if (alpha) { + if (pair->Alpha.Src[i].Used) { + if (pair->Alpha.Src[i].Constant != constant || + pair->Alpha.Src[i].Index != index) + continue; + q++; + } + } + if (q > candidate_quality) { + candidate_quality = q; + candidate = i; + } + } + + if (candidate >= 0) { + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].Constant = constant; + pair->RGB.Src[candidate].Index = index; + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].Constant = constant; + pair->Alpha.Src[candidate].Index = index; + } + } + + return candidate; +} + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + struct prog_instruction *inst = s->Program->Instructions + ip; + + ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); + ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); + + if (pairinst->NeedRGB) { + if (pairinst->IsTranscendent) + pair->RGB.Opcode = OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (pairinst->NeedAlpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + int nargs = _mesa_num_inst_src_regs(inst->Opcode); + int i; + + /* Special case for DDX/DDY (MDH/MDV). */ + if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { + if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) + return GL_FALSE; + else + nargs++; + } + + for(i = 0; i < nargs; ++i) { + int source; + if (pairinst->NeedRGB && !pairinst->IsTranscendent) { + GLboolean srcrgb = GL_FALSE; + GLboolean srcalpha = GL_FALSE; + GLuint negatebase = 0; + int j; + for(j = 0; j < 3; ++j) { + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz < 3) + srcrgb = GL_TRUE; + else if (swz < 4) + srcalpha = GL_TRUE; + if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].NegateBase, j)) + negatebase = 1; + } + source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); + if (source < 0) + return GL_FALSE; + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + } + if (pairinst->NeedAlpha) { + GLboolean srcrgb = GL_FALSE; + GLboolean srcalpha = GL_FALSE; + GLuint negatebase = GET_BIT(inst->SrcReg[i].NegateBase, pairinst->IsTranscendent ? 0 : 3); + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); + if (swz < 3) + srcrgb = GL_TRUE; + else if (swz < 4) + srcalpha = GL_TRUE; + source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); + if (source < 0) + return GL_FALSE; + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = swz; + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + } + } + + return GL_TRUE; +} + + +/** + * Fill in the destination register information. + * + * This is split from filling in source registers because we want + * to avoid allocating hardware temporaries for destinations until + * we are absolutely certain that we're going to emit a certain + * instruction pairing. + */ +static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + struct prog_instruction *inst = s->Program->Instructions + ip; + + if (inst->DstReg.File == PROGRAM_OUTPUT) { + if (inst->DstReg.Index == FRAG_RESULT_COLR) { + pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else if (inst->DstReg.Index == FRAG_RESULT_DEPR) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } else { + GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + if (pairinst->NeedRGB) { + pair->RGB.DestIndex = hwindex; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + } + if (pairinst->NeedAlpha) { + pair->Alpha.DestIndex = hwindex; + pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } +} + + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_alu(struct pair_state *s) +{ + struct radeon_pair_instruction pair; + + if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { + int ip; + if (s->ReadyFullALU) { + ip = s->ReadyFullALU - s->Instructions; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + } else if (s->ReadyRGB) { + ip = s->ReadyRGB - s->Instructions; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else { + ip = s->ReadyAlpha - s->Instructions; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } + + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, ip); + fill_dest_into_pair(s, &pair, ip); + commit_instruction(s, ip); + } else { + struct pair_state_instruction **prgb; + struct pair_state_instruction **palpha; + + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + int rgbip = *prgb - s->Instructions; + int alphaip = *palpha - s->Instructions; + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, rgbip); + if (!fill_instruction_into_pair(s, &pair, alphaip)) + continue; + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + fill_dest_into_pair(s, &pair, rgbip); + fill_dest_into_pair(s, &pair, alphaip); + commit_instruction(s, rgbip); + commit_instruction(s, alphaip); + goto success; + } + } + + /* No success in pairing; just take the first RGB instruction */ + int ip = s->ReadyRGB - s->Instructions; + s->ReadyRGB = s->ReadyRGB->NextReady; + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, ip); + fill_dest_into_pair(s, &pair, ip); + commit_instruction(s, ip); + success: ; + } + + if (s->Debug) + radeonPrintPairInstruction(&pair); + + s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); +} + + +GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, + const struct radeon_pair_handler* handler, void *userdata) +{ + struct pair_state s; + + _mesa_bzero(&s, sizeof(s)); + s.Ctx = ctx; + s.Program = program; + s.Handler = handler; + s.UserData = userdata; + s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; + s.Verbose = GL_FALSE && s.Debug; + + s.Instructions = (struct pair_state_instruction*)_mesa_calloc( + sizeof(struct pair_state_instruction)*s.Program->NumInstructions); + s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4); + s.ReaderPool = (struct reg_value_reader*)_mesa_calloc( + sizeof(struct reg_value_reader)*s.Program->NumInstructions*12); + + if (s.Debug) + _mesa_printf("Emit paired program\n"); + + scan_instructions(&s); + allocate_input_registers(&s); + + while(!s.Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s); + + while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) + emit_alu(&s); + } + + if (s.Debug) + _mesa_printf(" END\n"); + + _mesa_free(s.Instructions); + _mesa_free(s.ValuePool); + _mesa_free(s.ReaderPool); + + return !s.Error; +} + + +static void print_pair_src(int i, struct radeon_pair_instruction_source* src) +{ + _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); +} + +static const char* opcode_string(GLuint opcode) +{ + if (opcode == OPCODE_REPL_ALPHA) + return "SOP"; + else + return _mesa_opcode_string(opcode); +} + +static int num_pairinst_args(GLuint opcode) +{ + if (opcode == OPCODE_REPL_ALPHA) + return 0; + else + return _mesa_num_inst_src_regs(opcode); +} + +static char swizzle_char(GLuint swz) +{ + switch(swz) { + case SWIZZLE_X: return 'x'; + case SWIZZLE_Y: return 'y'; + case SWIZZLE_Z: return 'z'; + case SWIZZLE_W: return 'w'; + case SWIZZLE_ZERO: return '0'; + case SWIZZLE_ONE: return '1'; + case SWIZZLE_NIL: return '_'; + default: return '?'; + } +} + +void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) +{ + int nargs; + int i; + + _mesa_printf(" RGB: "); + for(i = 0; i < 3; ++i) { + if (inst->RGB.Src[i].Used) + print_pair_src(i, inst->RGB.Src + i); + } + _mesa_printf("\n"); + _mesa_printf(" Alpha:"); + for(i = 0; i < 3; ++i) { + if (inst->Alpha.Src[i].Used) + print_pair_src(i, inst->Alpha.Src + i); + } + _mesa_printf("\n"); + + _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + _mesa_printf(" COLOR.%s%s%s", + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + nargs = num_pairinst_args(inst->RGB.Opcode); + for(i = 0; i < nargs; ++i) { + const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; + _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), + abs); + } + _mesa_printf("\n"); + + _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + _mesa_printf(" COLOR.w"); + if (inst->Alpha.DepthWriteMask) + _mesa_printf(" DEPTH.w"); + nargs = num_pairinst_args(inst->Alpha.Opcode); + for(i = 0; i < nargs; ++i) { + const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; + _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, + swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); + } + _mesa_printf("\n"); +} diff --git a/r300/radeon_program_pair.h b/r300/radeon_program_pair.h new file mode 100644 index 0000000..4624a24 --- /dev/null +++ b/r300/radeon_program_pair.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_PAIR_H_ +#define __RADEON_PROGRAM_PAIR_H_ + +#include "radeon_program.h" + + +/** + * Represents a paired instruction, as found in R300 and R500 + * fragment programs. + */ +struct radeon_pair_instruction_source { + GLuint Index:8; + GLuint Constant:1; + GLuint Used:1; +}; + +struct radeon_pair_instruction_rgb { + GLuint Opcode:8; + GLuint DestIndex:8; + GLuint WriteMask:3; + GLuint OutputWriteMask:3; + GLuint Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + GLuint Source:2; + GLuint Swizzle:9; + GLuint Abs:1; + GLuint Negate:1; + } Arg[3]; +}; + +struct radeon_pair_instruction_alpha { + GLuint Opcode:8; + GLuint DestIndex:8; + GLuint WriteMask:1; + GLuint OutputWriteMask:1; + GLuint DepthWriteMask:1; + GLuint Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + GLuint Source:2; + GLuint Swizzle:3; + GLuint Abs:1; + GLuint Negate:1; + } Arg[3]; +}; + +struct radeon_pair_instruction { + struct radeon_pair_instruction_rgb RGB; + struct radeon_pair_instruction_alpha Alpha; +}; + + +/** + * + */ +struct radeon_pair_handler { + /** + * Fill in the proper hardware index for the given constant register. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex); + + /** + * Write a paired instruction to the hardware. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*); + + /** + * Write a texture instruction to the hardware. + * Register indices have already been rewritten to the allocated + * hardware register numbers. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitTex)(void*, struct prog_instruction*); + + /** + * Called before a block of contiguous, independent texture + * instructions is emitted. + */ + GLboolean (*BeginTexBlock)(void*); + + GLuint MaxHwTemps; +}; + +GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, + const struct radeon_pair_handler*, void *userdata); + +void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); + +#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/r300/radeon_span.c b/r300/radeon_span.c index eae09d6..3616d8b 100644 --- a/r300/radeon_span.c +++ b/r300/radeon_span.c @@ -172,6 +172,8 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) /* 16-bit depth buffer functions */ +#define VALUE_TYPE GLushort + #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; @@ -186,6 +188,8 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) * Careful: It looks like the R300 uses ZZZS byte order while the R200 * uses SZZZ for 24 bit depth, 8 bit stencil mode. */ +#define VALUE_TYPE GLuint + #ifdef COMPILE_R300 #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -282,6 +286,30 @@ static void radeonSpanRenderStart(GLcontext * ctx) #endif LOCK_HARDWARE(rmesa); radeonWaitForIdleLocked(rmesa); + + /* Read the first pixel in the frame buffer. This should + * be a noop, right? In fact without this conform fails as reading + * from the framebuffer sometimes produces old results -- the + * on-card read cache gets mixed up and doesn't notice that the + * framebuffer has been updated. + * + * Note that we should probably be reading some otherwise unused + * region of VRAM, otherwise we might get incorrect results when + * reading pixels from the top left of the screen. + * + * I found this problem on an R420 with glean's texCube test. + * Note that the R200 span code also *writes* the first pixel in the + * framebuffer, but I've found this to be unnecessary. + * -- Nicolai Hähnle, June 2008 + */ + { + int p; + driRenderbuffer *drb = + (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; + volatile int *buf = + (volatile int *)(rmesa->dri.screen->pFB + drb->offset); + p = *buf; + } } static void radeonSpanRenderFinish(GLcontext * ctx) diff --git a/r300/radeon_state.c b/r300/radeon_state.c index 82bfd95..d81318c 100644 --- a/r300/radeon_state.c +++ b/r300/radeon_state.c @@ -125,8 +125,8 @@ void radeonUpdateScissor(GLcontext* ctx) radeon->state.scissor.rect.x1 = x1; radeon->state.scissor.rect.y1 = y1; - radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width - 1; - radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height - 1; + radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width; + radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height; radeonRecalcScissorRects(radeon); } @@ -152,7 +152,7 @@ void radeonSetCliprects(radeonContextPtr radeon) GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate; GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate; - if (draw_fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) { + if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { /* Can't ignore 2d windows if we are page flipping. */ if (drawable->numBackClipRects == 0 || radeon->doPageFlip || radeon->sarea->pfCurrentPage == 1) { diff --git a/radeon/Makefile.am b/radeon/Makefile.am index ee1d008..9b2fbcf 100644 --- a/radeon/Makefile.am +++ b/radeon/Makefile.am @@ -21,4 +21,4 @@ radeon_dri_la_SOURCES = \ radeon_swtcl.c \ radeon_span.c \ radeon_maos.c \ - radeon_sanity.c + radeon_sanity.c diff --git a/radeon/radeon_chipset.h b/radeon/radeon_chipset.h index 5c17e8f..55a73ea 100644 --- a/radeon/radeon_chipset.h +++ b/radeon/radeon_chipset.h @@ -106,6 +106,7 @@ #define PCI_CHIP_RV410_564F 0x564F #define PCI_CHIP_RV410_5652 0x5652 #define PCI_CHIP_RV410_5653 0x5653 +#define PCI_CHIP_RV410_5657 0x5657 #define PCI_CHIP_RS300_5834 0x5834 #define PCI_CHIP_RS300_5835 0x5835 #define PCI_CHIP_RS480_5954 0x5954 @@ -127,7 +128,6 @@ #define PCI_CHIP_RV370_5B63 0x5B63 #define PCI_CHIP_RV370_5B64 0x5B64 #define PCI_CHIP_RV370_5B65 0x5B65 -#define PCI_CHIP_RV370_5657 0x5657 #define PCI_CHIP_RV280_5C61 0x5C61 #define PCI_CHIP_RV280_5C63 0x5C63 #define PCI_CHIP_R430_5D48 0x5D48 @@ -146,8 +146,112 @@ #define PCI_CHIP_RV410_5E4C 0x5E4C #define PCI_CHIP_RV410_5E4D 0x5E4D #define PCI_CHIP_RV410_5E4F 0x5E4F + +#define PCI_CHIP_R520_7100 0x7100 +#define PCI_CHIP_R520_7101 0x7101 +#define PCI_CHIP_R520_7102 0x7102 +#define PCI_CHIP_R520_7103 0x7103 +#define PCI_CHIP_R520_7104 0x7104 +#define PCI_CHIP_R520_7105 0x7105 +#define PCI_CHIP_R520_7106 0x7106 +#define PCI_CHIP_R520_7108 0x7108 +#define PCI_CHIP_R520_7109 0x7109 +#define PCI_CHIP_R520_710A 0x710A +#define PCI_CHIP_R520_710B 0x710B +#define PCI_CHIP_R520_710C 0x710C +#define PCI_CHIP_R520_710E 0x710E +#define PCI_CHIP_R520_710F 0x710F +#define PCI_CHIP_RV515_7140 0x7140 +#define PCI_CHIP_RV515_7141 0x7141 +#define PCI_CHIP_RV515_7142 0x7142 +#define PCI_CHIP_RV515_7143 0x7143 +#define PCI_CHIP_RV515_7144 0x7144 +#define PCI_CHIP_RV515_7145 0x7145 +#define PCI_CHIP_RV515_7146 0x7146 +#define PCI_CHIP_RV515_7147 0x7147 +#define PCI_CHIP_RV515_7149 0x7149 +#define PCI_CHIP_RV515_714A 0x714A +#define PCI_CHIP_RV515_714B 0x714B +#define PCI_CHIP_RV515_714C 0x714C +#define PCI_CHIP_RV515_714D 0x714D +#define PCI_CHIP_RV515_714E 0x714E +#define PCI_CHIP_RV515_714F 0x714F +#define PCI_CHIP_RV515_7151 0x7151 +#define PCI_CHIP_RV515_7152 0x7152 +#define PCI_CHIP_RV515_7153 0x7153 +#define PCI_CHIP_RV515_715E 0x715E +#define PCI_CHIP_RV515_715F 0x715F +#define PCI_CHIP_RV515_7180 0x7180 +#define PCI_CHIP_RV515_7181 0x7181 +#define PCI_CHIP_RV515_7183 0x7183 +#define PCI_CHIP_RV515_7186 0x7186 +#define PCI_CHIP_RV515_7187 0x7187 +#define PCI_CHIP_RV515_7188 0x7188 +#define PCI_CHIP_RV515_718A 0x718A +#define PCI_CHIP_RV515_718B 0x718B +#define PCI_CHIP_RV515_718C 0x718C +#define PCI_CHIP_RV515_718D 0x718D +#define PCI_CHIP_RV515_718F 0x718F +#define PCI_CHIP_RV515_7193 0x7193 +#define PCI_CHIP_RV515_7196 0x7196 +#define PCI_CHIP_RV515_719B 0x719B +#define PCI_CHIP_RV515_719F 0x719F +#define PCI_CHIP_RV530_71C0 0x71C0 +#define PCI_CHIP_RV530_71C1 0x71C1 +#define PCI_CHIP_RV530_71C2 0x71C2 +#define PCI_CHIP_RV530_71C3 0x71C3 +#define PCI_CHIP_RV530_71C4 0x71C4 +#define PCI_CHIP_RV530_71C5 0x71C5 +#define PCI_CHIP_RV530_71C6 0x71C6 +#define PCI_CHIP_RV530_71C7 0x71C7 +#define PCI_CHIP_RV530_71CD 0x71CD +#define PCI_CHIP_RV530_71CE 0x71CE +#define PCI_CHIP_RV530_71D2 0x71D2 +#define PCI_CHIP_RV530_71D4 0x71D4 +#define PCI_CHIP_RV530_71D5 0x71D5 +#define PCI_CHIP_RV530_71D6 0x71D6 +#define PCI_CHIP_RV530_71DA 0x71DA +#define PCI_CHIP_RV530_71DE 0x71DE +#define PCI_CHIP_RV515_7200 0x7200 +#define PCI_CHIP_RV515_7210 0x7210 +#define PCI_CHIP_RV515_7211 0x7211 +#define PCI_CHIP_R580_7240 0x7240 +#define PCI_CHIP_R580_7243 0x7243 +#define PCI_CHIP_R580_7244 0x7244 +#define PCI_CHIP_R580_7245 0x7245 +#define PCI_CHIP_R580_7246 0x7246 +#define PCI_CHIP_R580_7247 0x7247 +#define PCI_CHIP_R580_7248 0x7248 +#define PCI_CHIP_R580_7249 0x7249 +#define PCI_CHIP_R580_724A 0x724A +#define PCI_CHIP_R580_724B 0x724B +#define PCI_CHIP_R580_724C 0x724C +#define PCI_CHIP_R580_724D 0x724D +#define PCI_CHIP_R580_724E 0x724E +#define PCI_CHIP_R580_724F 0x724F +#define PCI_CHIP_RV570_7280 0x7280 +#define PCI_CHIP_RV560_7281 0x7281 +#define PCI_CHIP_RV560_7283 0x7283 +#define PCI_CHIP_R580_7284 0x7284 +#define PCI_CHIP_RV560_7287 0x7287 +#define PCI_CHIP_RV570_7288 0x7288 +#define PCI_CHIP_RV570_7289 0x7289 +#define PCI_CHIP_RV570_728B 0x728B +#define PCI_CHIP_RV570_728C 0x728C +#define PCI_CHIP_RV560_7290 0x7290 +#define PCI_CHIP_RV560_7291 0x7291 +#define PCI_CHIP_RV560_7293 0x7293 +#define PCI_CHIP_RV560_7297 0x7297 + #define PCI_CHIP_RS350_7834 0x7834 #define PCI_CHIP_RS350_7835 0x7835 +#define PCI_CHIP_RS690_791E 0x791E +#define PCI_CHIP_RS690_791F 0x791F +#define PCI_CHIP_RS740_796C 0x796C +#define PCI_CHIP_RS740_796D 0x796D +#define PCI_CHIP_RS740_796E 0x796E +#define PCI_CHIP_RS740_796F 0x796F + enum { CHIP_FAMILY_R100, @@ -166,6 +270,14 @@ enum { CHIP_FAMILY_R420, CHIP_FAMILY_RV410, CHIP_FAMILY_RS400, + CHIP_FAMILY_RS690, + CHIP_FAMILY_RS740, + CHIP_FAMILY_RV515, + CHIP_FAMILY_R520, + CHIP_FAMILY_RV530, + CHIP_FAMILY_R580, + CHIP_FAMILY_RV560, + CHIP_FAMILY_RV570, CHIP_FAMILY_LAST }; diff --git a/radeon/radeon_context.c b/radeon/radeon_context.c index 9451ec4..7aa0e3e 100644 --- a/radeon/radeon_context.c +++ b/radeon/radeon_context.c @@ -35,14 +35,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * Keith Whitwell <keith@tungstengraphics.com> */ -#include "glheader.h" -#include "api_arrayelt.h" -#include "context.h" -#include "simple_list.h" -#include "imports.h" -#include "matrix.h" -#include "extensions.h" -#include "framebuffer.h" +#include "main/glheader.h" +#include "main/api_arrayelt.h" +#include "main/context.h" +#include "main/simple_list.h" +#include "main/imports.h" +#include "main/matrix.h" +#include "main/extensions.h" +#include "main/framebuffer.h" +#include "main/state.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" @@ -232,14 +233,14 @@ radeonCreateContext( const __GLcontextModes *glVisual, "def_max_anisotropy"); if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) { - if ( sPriv->drmMinor < 13 ) + if ( sPriv->drm_version.minor < 13 ) fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " - "disabling.\n",sPriv->drmMinor ); + "disabling.\n", sPriv->drm_version.minor ); else rmesa->using_hyperz = GL_TRUE; } - if ( sPriv->drmMinor >= 15 ) + if ( sPriv->drm_version.minor >= 15 ) rmesa->texmicrotile = GL_TRUE; /* Init default driver functions then plug in our Radeon-specific functions @@ -270,7 +271,7 @@ radeonCreateContext( const __GLcontextModes *glVisual, rmesa->dri.hwContext = driContextPriv->hHWContext; rmesa->dri.hwLock = &sPriv->pSAREA->lock; rmesa->dri.fd = sPriv->fd; - rmesa->dri.drmMinor = sPriv->drmMinor; + rmesa->dri.drmMinor = sPriv->drm_version.minor; rmesa->radeonScreen = screen; rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA + @@ -423,10 +424,7 @@ radeonCreateContext( const __GLcontextModes *glVisual, rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); - rmesa->vblank_flags = (rmesa->radeonScreen->irq != 0) - ? driGetDefaultVBlankFlags(&rmesa->optionCache) : VBLANK_FLAG_NO_IRQ; - - (*dri_interface->getUST)( & rmesa->swap_ust ); + (*sPriv->systemTime->getUST)( & rmesa->swap_ust ); #if DO_DEBUG @@ -591,16 +589,18 @@ radeonMakeCurrent( __DRIcontextPrivate *driContextPriv, if (RADEON_DEBUG & DEBUG_DRI) fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) newCtx->glCtx); - if ( newCtx->dri.drawable != driDrawPriv ) { - /* XXX we may need to validate the drawable here!!! */ - driDrawableInitVBlank( driDrawPriv, newCtx->vblank_flags, - &newCtx->vbl_seq ); - } - newCtx->dri.readable = driReadPriv; if ( (newCtx->dri.drawable != driDrawPriv) || newCtx->lastStamp != driDrawPriv->lastStamp ) { + if (driDrawPriv->swap_interval == (unsigned)-1) { + driDrawPriv->vblFlags = (newCtx->radeonScreen->irq != 0) + ? driGetDefaultVBlankFlags(&newCtx->optionCache) + : VBLANK_FLAG_NO_IRQ; + + driDrawableInitVBlank( driDrawPriv ); + } + newCtx->dri.drawable = driDrawPriv; radeonSetCliprects(newCtx); diff --git a/radeon/radeon_context.h b/radeon/radeon_context.h index 8dedd66..bc43fc5 100644 --- a/radeon/radeon_context.h +++ b/radeon/radeon_context.h @@ -161,6 +161,8 @@ struct radeon_tex_obj { drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; /* Six, for the cube faces */ + GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ + GLuint pp_txfilter; /* hardware register values */ GLuint pp_txformat; GLuint pp_txoffset; /* Image location in texmem. @@ -667,9 +669,6 @@ struct radeon_context { /* VBI */ - GLuint vbl_seq; - GLuint vblank_flags; - int64_t swap_ust; int64_t swap_missed_ust; @@ -708,9 +707,9 @@ struct radeon_context { #define RADEON_CONTEXT(ctx) ((radeonContextPtr)(ctx->DriverCtx)) -static __inline GLuint radeonPackColor(GLuint cpp, - GLubyte r, GLubyte g, - GLubyte b, GLubyte a) +static INLINE GLuint radeonPackColor(GLuint cpp, + GLubyte r, GLubyte g, + GLubyte b, GLubyte a) { switch (cpp) { case 2: diff --git a/radeon/radeon_ioctl.c b/radeon/radeon_ioctl.c index 4c64bc2..446025b 100644 --- a/radeon/radeon_ioctl.c +++ b/radeon/radeon_ioctl.c @@ -863,13 +863,14 @@ static void radeonWaitForFrameCompletion( radeonContextPtr rmesa ) /* Copy the back color buffer to the front color buffer. */ -void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv, +void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, const drm_clip_rect_t *rect) { radeonContextPtr rmesa; GLint nbox, i, ret; GLboolean missed_target; int64_t ust; + __DRIscreenPrivate *psp; assert(dPriv); assert(dPriv->driContextPriv); @@ -891,7 +892,7 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv, if (!rect) { UNLOCK_HARDWARE( rmesa ); - driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target ); + driWaitForVBlank( dPriv, & missed_target ); LOCK_HARDWARE( rmesa ); } @@ -918,16 +919,18 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv, if (rect->y2 < b->y2) b->y2 = rect->y2; - if (b->x1 < b->x2 && b->y1 < b->y2) - b++; + if (b->x1 >= b->x2 || b->y1 >= b->y2) + continue; } - else - b++; + b++; n++; } rmesa->sarea->nbox = n; + if (!n) + continue; + ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); if ( ret ) { @@ -940,8 +943,9 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv, UNLOCK_HARDWARE( rmesa ); if (!rect) { + psp = dPriv->driScreenPriv; rmesa->swap_count++; - (*dri_interface->getUST)( & ust ); + (*psp->systemTime->getUST)( & ust ); if ( missed_target ) { rmesa->swap_missed_count++; rmesa->swap_missed_ust = ust - rmesa->swap_ust; @@ -952,17 +956,19 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv, } } -void radeonPageFlip( const __DRIdrawablePrivate *dPriv ) +void radeonPageFlip( __DRIdrawablePrivate *dPriv ) { radeonContextPtr rmesa; GLint ret; GLboolean missed_target; + __DRIscreenPrivate *psp; assert(dPriv); assert(dPriv->driContextPriv); assert(dPriv->driContextPriv->driverPrivate); rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + psp = dPriv->driScreenPriv; if ( RADEON_DEBUG & DEBUG_IOCTL ) { fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, @@ -987,10 +993,10 @@ void radeonPageFlip( const __DRIdrawablePrivate *dPriv ) */ radeonWaitForFrameCompletion( rmesa ); UNLOCK_HARDWARE( rmesa ); - driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target ); + driWaitForVBlank( dPriv, & missed_target ); if ( missed_target ) { rmesa->swap_missed_count++; - (void) (*dri_interface->getUST)( & rmesa->swap_missed_ust ); + (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust ); } LOCK_HARDWARE( rmesa ); @@ -1004,7 +1010,7 @@ void radeonPageFlip( const __DRIdrawablePrivate *dPriv ) } rmesa->swap_count++; - (void) (*dri_interface->getUST)( & rmesa->swap_ust ); + (void) (*psp->systemTime->getUST)( & rmesa->swap_ust ); /* Get ready for drawing next frame. Update the renderbuffers' * flippedOffset/Pitch fields so we draw into the right place. diff --git a/radeon/radeon_ioctl.h b/radeon/radeon_ioctl.h index 11a7d02..b3c287f 100644 --- a/radeon/radeon_ioctl.h +++ b/radeon/radeon_ioctl.h @@ -87,9 +87,9 @@ extern void radeonReleaseDmaRegion( radeonContextPtr rmesa, struct radeon_dma_region *region, const char *caller ); -extern void radeonCopyBuffer( const __DRIdrawablePrivate *drawable, +extern void radeonCopyBuffer( __DRIdrawablePrivate *drawable, const drm_clip_rect_t *rect); -extern void radeonPageFlip( const __DRIdrawablePrivate *drawable ); +extern void radeonPageFlip( __DRIdrawablePrivate *drawable ); extern void radeonFlush( GLcontext *ctx ); extern void radeonFinish( GLcontext *ctx ); extern void radeonWaitForIdleLocked( radeonContextPtr rmesa ); @@ -124,7 +124,7 @@ do { \ memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \ rmesa->hw.ATOM.cmd_size * 4) -static __inline int RADEON_DB_STATECHANGE( +static INLINE int RADEON_DB_STATECHANGE( radeonContextPtr rmesa, struct radeon_state_atom *atom ) { @@ -177,7 +177,7 @@ do { \ * and hang on to the lock until the critical section is finished and we flush * the buffer again and unlock. */ -static __inline void radeonEnsureCmdBufSpace( radeonContextPtr rmesa, +static INLINE void radeonEnsureCmdBufSpace( radeonContextPtr rmesa, int bytes ) { if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ) @@ -187,7 +187,7 @@ static __inline void radeonEnsureCmdBufSpace( radeonContextPtr rmesa, /* Alloc space in the command buffer */ -static __inline char *radeonAllocCmdBuf( radeonContextPtr rmesa, +static INLINE char *radeonAllocCmdBuf( radeonContextPtr rmesa, int bytes, const char *where ) { if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ) diff --git a/radeon/radeon_screen.c b/radeon/radeon_screen.c index 907a987..84b5c46 100644 --- a/radeon/radeon_screen.c +++ b/radeon/radeon_screen.c @@ -49,6 +49,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #if !RADEON_COMMON #include "radeon_context.h" #include "radeon_span.h" +#include "radeon_tex.h" #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) #include "r200_context.h" #include "r200_ioctl.h" @@ -90,7 +91,7 @@ DRI_CONF_BEGIN DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER) DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC) DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF) - DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_ALLOW_LARGE_TEXTURES(2) DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG DRI_CONF_NO_RAST(false) @@ -117,7 +118,7 @@ DRI_CONF_BEGIN DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER) DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC) DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF) - DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_ALLOW_LARGE_TEXTURES(2) DRI_CONF_TEXTURE_BLEND_QUALITY(1.0,"0.0:1.0") DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG @@ -179,7 +180,7 @@ DRI_CONF_OPT_BEGIN_V(fp_optimization,enum,def,"0:1") \ DRI_CONF_DESC_END \ DRI_CONF_OPT_END -const char __driConfigOptions[] = +PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN) @@ -188,14 +189,13 @@ DRI_CONF_BEGIN DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(8, 2, 8) DRI_CONF_MAX_TEXTURE_COORD_UNITS(8, 2, 8) DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32) - DRI_CONF_DISABLE_FALLBACK(false) + DRI_CONF_DISABLE_FALLBACK(true) DRI_CONF_DISABLE_DOUBLE_SIDE_STENCIL(false) DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB) DRI_CONF_DEF_MAX_ANISOTROPY(1.0, "1.0,2.0,4.0,8.0,16.0") - DRI_CONF_NO_NEG_LOD_BIAS(false) - DRI_CONF_FORCE_S3TC_ENABLE(false) + DRI_CONF_FORCE_S3TC_ENABLE(false) DRI_CONF_DISABLE_S3TC(false) DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER) DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC) @@ -206,7 +206,7 @@ DRI_CONF_BEGIN DRI_CONF_NO_RAST(false) DRI_CONF_SECTION_END DRI_CONF_END; -static const GLuint __driNConfigOptions = 18; +static const GLuint __driNConfigOptions = 17; #ifndef RADEON_DEBUG int RADEON_DEBUG = 0; @@ -244,25 +244,26 @@ radeonGetParam(int fd, int param, void *value) { int ret; drm_radeon_getparam_t gp; - + gp.param = param; gp.value = value; - + ret = drmCommandWriteRead( fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); return ret; } -static __GLcontextModes * -radeonFillInModes( unsigned pixel_bits, unsigned depth_bits, - unsigned stencil_bits, GLboolean have_back_buffer ) +static const __DRIconfig ** +radeonFillInModes( __DRIscreenPrivate *psp, + unsigned pixel_bits, unsigned depth_bits, + unsigned stencil_bits, GLboolean have_back_buffer ) { - __GLcontextModes * modes; - __GLcontextModes * m; - unsigned num_modes; + __DRIconfig **configs; + __GLcontextModes *m; unsigned depth_buffer_factor; unsigned back_buffer_factor; GLenum fb_format; GLenum fb_type; + int i; /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy * enough to add support. Basically, if a context is created with an @@ -279,7 +280,7 @@ radeonFillInModes( unsigned pixel_bits, unsigned depth_bits, depth_bits_array[0] = depth_bits; depth_bits_array[1] = depth_bits; - + /* Just like with the accumulation buffer, always provide some modes * with a stencil buffer. It will be a sw fallback, but some apps won't * care about that. @@ -290,8 +291,6 @@ radeonFillInModes( unsigned pixel_bits, unsigned depth_bits, depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1; back_buffer_factor = (have_back_buffer) ? 2 : 1; - num_modes = depth_buffer_factor * back_buffer_factor * 4; - if ( pixel_bits == 16 ) { fb_format = GL_RGB; fb_type = GL_UNSIGNED_SHORT_5_6_5; @@ -301,21 +300,11 @@ radeonFillInModes( unsigned pixel_bits, unsigned depth_bits, fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; } - modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) ); - m = modes; - if ( ! driFillInModes( & m, fb_format, fb_type, - depth_bits_array, stencil_bits_array, depth_buffer_factor, - back_buffer_modes, back_buffer_factor, - GLX_TRUE_COLOR ) ) { - fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__ ); - return NULL; - } - - if ( ! driFillInModes( & m, fb_format, fb_type, - depth_bits_array, stencil_bits_array, depth_buffer_factor, - back_buffer_modes, back_buffer_factor, - GLX_DIRECT_COLOR ) ) { + configs = driCreateConfigs(fb_format, fb_type, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, back_buffer_factor); + if (configs == NULL) { fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", __func__, __LINE__ ); return NULL; @@ -323,15 +312,43 @@ radeonFillInModes( unsigned pixel_bits, unsigned depth_bits, /* Mark the visual as slow if there are "fake" stencil bits. */ - for ( m = modes ; m != NULL ; m = m->next ) { - if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) { + for (i = 0; configs[i]; i++) { + m = &configs[i]->modes; + if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) { m->visualRating = GLX_SLOW_CONFIG; } } - return modes; + return (const __DRIconfig **) configs; } +#if !RADEON_COMMON +static const __DRItexOffsetExtension radeonTexOffsetExtension = { + { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, + radeonSetTexOffset, +}; +#endif + +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +static const __DRIallocateExtension r200AllocateExtension = { + { __DRI_ALLOCATE, __DRI_ALLOCATE_VERSION }, + r200AllocateMemoryMESA, + r200FreeMemoryMESA, + r200GetMemoryOffsetMESA +}; + +static const __DRItexOffsetExtension r200texOffsetExtension = { + { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, + r200SetTexOffset, +}; +#endif + +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +static const __DRItexOffsetExtension r300texOffsetExtension = { + { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, + r300SetTexOffset, +}; +#endif /* Create the device specific screen private data struct. */ @@ -341,9 +358,9 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) radeonScreenPtr screen; RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv; unsigned char *RADEONMMIO; - PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension = - (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension")); - void * const psc = sPriv->psc->screenConfigs; + int i; + int ret; + uint32_t temp; if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) { fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n"); @@ -374,7 +391,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) int ret; ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET, &screen->gart_buffer_offset); - + if (ret) { FREE( screen ); fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret); @@ -396,13 +413,13 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret); return NULL; } - screen->drmSupportsCubeMapsR200 = (sPriv->drmMinor >= 7); - screen->drmSupportsBlendColor = (sPriv->drmMinor >= 11); - screen->drmSupportsTriPerf = (sPriv->drmMinor >= 16); - screen->drmSupportsFragShader = (sPriv->drmMinor >= 18); - screen->drmSupportsPointSprites = (sPriv->drmMinor >= 13); - screen->drmSupportsCubeMapsR100 = (sPriv->drmMinor >= 15); - screen->drmSupportsVertexProgram = (sPriv->drmMinor >= 25); + screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7); + screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11); + screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16); + screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18); + screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13); + screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15); + screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25); } screen->mmio.handle = dri_priv->registerHandle; @@ -536,7 +553,11 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->chip_family = CHIP_FAMILY_RS300; break; + /* 9500 with 1 pipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */ case PCI_CHIP_R300_AD: + screen->chip_family = CHIP_FAMILY_RV350; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; case PCI_CHIP_R300_AE: case PCI_CHIP_R300_AF: case PCI_CHIP_R300_AG: @@ -585,7 +606,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV370_5B63: case PCI_CHIP_RV370_5B64: case PCI_CHIP_RV370_5B65: - case PCI_CHIP_RV370_5657: case PCI_CHIP_RV380_3150: case PCI_CHIP_RV380_3152: case PCI_CHIP_RV380_3154: @@ -635,18 +655,14 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->chip_flags = RADEON_CHIPSET_TCL; break; - /* RV410 SE chips have half the pipes of regular RV410 */ case PCI_CHIP_RV410_5E4C: case PCI_CHIP_RV410_5E4F: - screen->chip_family = CHIP_FAMILY_RV380; - screen->chip_flags = RADEON_CHIPSET_TCL; - break; - case PCI_CHIP_RV410_564A: case PCI_CHIP_RV410_564B: case PCI_CHIP_RV410_564F: case PCI_CHIP_RV410_5652: case PCI_CHIP_RV410_5653: + case PCI_CHIP_RV410_5657: case PCI_CHIP_RV410_5E48: case PCI_CHIP_RV410_5E4A: case PCI_CHIP_RV410_5E4B: @@ -664,7 +680,132 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RC410_5A61: case PCI_CHIP_RC410_5A62: screen->chip_family = CHIP_FAMILY_RS400; - fprintf(stderr, "Warning, xpress200 detected.\n"); + break; + + case PCI_CHIP_RS690_791E: + case PCI_CHIP_RS690_791F: + screen->chip_family = CHIP_FAMILY_RS690; + break; + case PCI_CHIP_RS740_796C: + case PCI_CHIP_RS740_796D: + case PCI_CHIP_RS740_796E: + case PCI_CHIP_RS740_796F: + screen->chip_family = CHIP_FAMILY_RS740; + break; + + case PCI_CHIP_R520_7100: + case PCI_CHIP_R520_7101: + case PCI_CHIP_R520_7102: + case PCI_CHIP_R520_7103: + case PCI_CHIP_R520_7104: + case PCI_CHIP_R520_7105: + case PCI_CHIP_R520_7106: + case PCI_CHIP_R520_7108: + case PCI_CHIP_R520_7109: + case PCI_CHIP_R520_710A: + case PCI_CHIP_R520_710B: + case PCI_CHIP_R520_710C: + case PCI_CHIP_R520_710E: + case PCI_CHIP_R520_710F: + screen->chip_family = CHIP_FAMILY_R520; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV515_7140: + case PCI_CHIP_RV515_7141: + case PCI_CHIP_RV515_7142: + case PCI_CHIP_RV515_7143: + case PCI_CHIP_RV515_7144: + case PCI_CHIP_RV515_7145: + case PCI_CHIP_RV515_7146: + case PCI_CHIP_RV515_7147: + case PCI_CHIP_RV515_7149: + case PCI_CHIP_RV515_714A: + case PCI_CHIP_RV515_714B: + case PCI_CHIP_RV515_714C: + case PCI_CHIP_RV515_714D: + case PCI_CHIP_RV515_714E: + case PCI_CHIP_RV515_714F: + case PCI_CHIP_RV515_7151: + case PCI_CHIP_RV515_7152: + case PCI_CHIP_RV515_7153: + case PCI_CHIP_RV515_715E: + case PCI_CHIP_RV515_715F: + case PCI_CHIP_RV515_7180: + case PCI_CHIP_RV515_7181: + case PCI_CHIP_RV515_7183: + case PCI_CHIP_RV515_7186: + case PCI_CHIP_RV515_7187: + case PCI_CHIP_RV515_7188: + case PCI_CHIP_RV515_718A: + case PCI_CHIP_RV515_718B: + case PCI_CHIP_RV515_718C: + case PCI_CHIP_RV515_718D: + case PCI_CHIP_RV515_718F: + case PCI_CHIP_RV515_7193: + case PCI_CHIP_RV515_7196: + case PCI_CHIP_RV515_719B: + case PCI_CHIP_RV515_719F: + case PCI_CHIP_RV515_7200: + case PCI_CHIP_RV515_7210: + case PCI_CHIP_RV515_7211: + screen->chip_family = CHIP_FAMILY_RV515; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV530_71C0: + case PCI_CHIP_RV530_71C1: + case PCI_CHIP_RV530_71C2: + case PCI_CHIP_RV530_71C3: + case PCI_CHIP_RV530_71C4: + case PCI_CHIP_RV530_71C5: + case PCI_CHIP_RV530_71C6: + case PCI_CHIP_RV530_71C7: + case PCI_CHIP_RV530_71CD: + case PCI_CHIP_RV530_71CE: + case PCI_CHIP_RV530_71D2: + case PCI_CHIP_RV530_71D4: + case PCI_CHIP_RV530_71D5: + case PCI_CHIP_RV530_71D6: + case PCI_CHIP_RV530_71DA: + case PCI_CHIP_RV530_71DE: + screen->chip_family = CHIP_FAMILY_RV530; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_R580_7240: + case PCI_CHIP_R580_7243: + case PCI_CHIP_R580_7244: + case PCI_CHIP_R580_7245: + case PCI_CHIP_R580_7246: + case PCI_CHIP_R580_7247: + case PCI_CHIP_R580_7248: + case PCI_CHIP_R580_7249: + case PCI_CHIP_R580_724A: + case PCI_CHIP_R580_724B: + case PCI_CHIP_R580_724C: + case PCI_CHIP_R580_724D: + case PCI_CHIP_R580_724E: + case PCI_CHIP_R580_724F: + case PCI_CHIP_R580_7284: + screen->chip_family = CHIP_FAMILY_R580; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV570_7280: + case PCI_CHIP_RV560_7281: + case PCI_CHIP_RV560_7283: + case PCI_CHIP_RV560_7287: + case PCI_CHIP_RV570_7288: + case PCI_CHIP_RV570_7289: + case PCI_CHIP_RV570_728B: + case PCI_CHIP_RV570_728C: + case PCI_CHIP_RV560_7290: + case PCI_CHIP_RV560_7291: + case PCI_CHIP_RV560_7293: + case PCI_CHIP_RV560_7297: + screen->chip_family = CHIP_FAMILY_RV560; + screen->chip_flags = RADEON_CHIPSET_TCL; break; default: @@ -673,11 +814,19 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) return NULL; } if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) && - sPriv->ddxMinor < 2) { + sPriv->ddx_version.minor < 2) { fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n"); return NULL; } + if ((sPriv->drm_version.minor < 29) && (screen->chip_family >= CHIP_FAMILY_RV515)) { + fprintf(stderr, "R500 support requires a newer drm.\n"); + return NULL; + } + + if (getenv("R300_NO_TCL")) + screen->chip_flags &= ~RADEON_CHIPSET_TCL; + if (screen->chip_family <= CHIP_FAMILY_RS200) screen->chip_flags |= RADEON_CLASS_R100; else if (screen->chip_family <= CHIP_FAMILY_RV280) @@ -688,9 +837,51 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->cpp = dri_priv->bpp / 8; screen->AGPMode = dri_priv->AGPMode; - screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff ) << 16; + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION, + &temp); + if (ret) { + if (screen->chip_family < CHIP_FAMILY_RS690) + screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16; + else { + FREE( screen ); + fprintf(stderr, "Unable to get fb location need newer drm\n"); + return NULL; + } + } else { + screen->fbLocation = (temp & 0xffff) << 16; + } + + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES, + &temp); + if (ret) { + fprintf(stderr, "Unable to get num_pipes, need newer drm\n"); + switch (screen->chip_family) { + case CHIP_FAMILY_R300: + case CHIP_FAMILY_R350: + screen->num_gb_pipes = 2; + break; + case CHIP_FAMILY_R420: + case CHIP_FAMILY_R520: + case CHIP_FAMILY_R580: + case CHIP_FAMILY_RV560: + case CHIP_FAMILY_RV570: + screen->num_gb_pipes = 4; + break; + case CHIP_FAMILY_RV350: + case CHIP_FAMILY_RV515: + case CHIP_FAMILY_RV530: + case CHIP_FAMILY_RV410: + default: + screen->num_gb_pipes = 1; + break; + } + } else { + screen->num_gb_pipes = temp; + } + } - if ( sPriv->drmMinor >= 10 ) { + if ( sPriv->drm_version.minor >= 10 ) { drm_radeon_setparam_t sp; sp.param = RADEON_SETPARAM_FB_LOCATION; @@ -708,7 +899,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->depthPitch = dri_priv->depthPitch; /* Check if ddx has set up a surface reg to cover depth buffer */ - screen->depthHasSurface = (sPriv->ddxMajor > 4) || + screen->depthHasSurface = (sPriv->ddx_version.major > 4) || /* these chips don't use tiled z without hyperz. So always pretend we have set up a surface which will cause linear reads/writes */ ((screen->chip_family & RADEON_CLASS_R100) && @@ -741,29 +932,34 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) dri_priv->log2GARTTexGran; } - if ( glx_enable_extension != NULL ) { - if ( screen->irq != 0 ) { - (*glx_enable_extension)( psc, "GLX_SGI_swap_control" ); - (*glx_enable_extension)( psc, "GLX_SGI_video_sync" ); - (*glx_enable_extension)( psc, "GLX_MESA_swap_control" ); - } + i = 0; + screen->extensions[i++] = &driCopySubBufferExtension.base; + screen->extensions[i++] = &driFrameTrackingExtension.base; + screen->extensions[i++] = &driReadDrawableExtension; - (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" ); - if (IS_R200_CLASS(screen)) - (*glx_enable_extension)( psc, "GLX_MESA_allocate_memory" ); - - (*glx_enable_extension)( psc, "GLX_MESA_copy_sub_buffer" ); - (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" ); + if ( screen->irq != 0 ) { + screen->extensions[i++] = &driSwapControlExtension.base; + screen->extensions[i++] = &driMediaStreamCounterExtension.base; } +#if !RADEON_COMMON + screen->extensions[i++] = &radeonTexOffsetExtension.base; +#endif + #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) - if (IS_R200_CLASS(screen)) { - sPriv->psc->allocateMemory = (void *) r200AllocateMemoryMESA; - sPriv->psc->freeMemory = (void *) r200FreeMemoryMESA; - sPriv->psc->memoryOffset = (void *) r200GetMemoryOffsetMESA; - } + if (IS_R200_CLASS(screen)) + screen->extensions[i++] = &r200AllocateExtension.base; + + screen->extensions[i++] = &r200texOffsetExtension.base; #endif +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) + screen->extensions[i++] = &r300texOffsetExtension.base; +#endif + + screen->extensions[i++] = NULL; + sPriv->extensions = screen->extensions; + screen->driScreen = sPriv; screen->sarea_priv_offset = dri_priv->sarea_priv_offset; return screen; @@ -946,72 +1142,16 @@ static void radeonDestroyContext(__DRIcontextPrivate * driContextPriv) #endif -#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)) -static struct __DriverAPIRec radeonAPI = { - .InitDriver = radeonInitDriver, - .DestroyScreen = radeonDestroyScreen, - .CreateContext = radeonCreateContext, - .DestroyContext = radeonDestroyContext, - .CreateBuffer = radeonCreateBuffer, - .DestroyBuffer = radeonDestroyBuffer, - .SwapBuffers = radeonSwapBuffers, - .MakeCurrent = radeonMakeCurrent, - .UnbindContext = radeonUnbindContext, - .GetSwapInfo = getSwapInfo, - .GetMSC = driGetMSC32, - .WaitForMSC = driWaitForMSC32, - .WaitForSBC = NULL, - .SwapBuffersMSC = NULL, - .CopySubBuffer = radeonCopySubBuffer, -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) - .setTexOffset = r300SetTexOffset, -#endif -}; -#else -static const struct __DriverAPIRec r200API = { - .InitDriver = radeonInitDriver, - .DestroyScreen = radeonDestroyScreen, - .CreateContext = r200CreateContext, - .DestroyContext = r200DestroyContext, - .CreateBuffer = radeonCreateBuffer, - .DestroyBuffer = radeonDestroyBuffer, - .SwapBuffers = r200SwapBuffers, - .MakeCurrent = r200MakeCurrent, - .UnbindContext = r200UnbindContext, - .GetSwapInfo = getSwapInfo, - .GetMSC = driGetMSC32, - .WaitForMSC = driWaitForMSC32, - .WaitForSBC = NULL, - .SwapBuffersMSC = NULL, - .CopySubBuffer = r200CopySubBuffer, - .setTexOffset = r200SetTexOffset -}; -#endif - /** - * This is the bootstrap function for the driver. libGL supplies all of the - * requisite information about the system, and the driver initializes itself. - * This routine also fills in the linked list pointed to by \c driver_modes - * with the \c __GLcontextModes that the driver can support for windows or - * pbuffers. + * This is the driver specific part of the createNewScreen entry point. * - * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on - * failure. + * \todo maybe fold this into intelInitDriver + * + * \return the __GLcontextModes supported by this driver */ -PUBLIC void * -__driCreateNewScreen_20050727( __DRInativeDisplay *dpy, - int scrn, __DRIscreen *psc, - const __GLcontextModes * modes, - const __DRIversion * ddx_version, - const __DRIversion * dri_version, - const __DRIversion * drm_version, - const __DRIframebuffer * frame_buffer, - drmAddress pSAREA, int fd, - int internal_api_version, - const __DRIinterfaceMethods * interface, - __GLcontextModes ** driver_modes ) +static const __DRIconfig ** +radeonInitScreen(__DRIscreenPrivate *psp) { - __DRIscreenPrivate *psp; #if !RADEON_COMMON static const char *driver_name = "Radeon"; static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; @@ -1028,57 +1168,42 @@ __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 1, 24, 0 }; #endif - - dri_interface = interface; + RADEONDRIPtr dri_priv = (RADEONDRIPtr) psp->pDevPriv; if ( ! driCheckDriDdxDrmVersions3( driver_name, - dri_version, & dri_expected, - ddx_version, & ddx_expected, - drm_version, & drm_expected ) ) { + &psp->dri_version, & dri_expected, + &psp->ddx_version, & ddx_expected, + &psp->drm_version, & drm_expected ) ) { return NULL; } -#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)) - psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, - ddx_version, dri_version, drm_version, - frame_buffer, pSAREA, fd, - internal_api_version, &radeonAPI); -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) - psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, - ddx_version, dri_version, drm_version, - frame_buffer, pSAREA, fd, - internal_api_version, &r200API); -#endif - - if ( psp != NULL ) { - RADEONDRIPtr dri_priv = (RADEONDRIPtr) psp->pDevPriv; - if (driver_modes) { - *driver_modes = radeonFillInModes( dri_priv->bpp, - (dri_priv->bpp == 16) ? 16 : 24, - (dri_priv->bpp == 16) ? 0 : 8, - (dri_priv->backOffset != dri_priv->depthOffset) ); - } - /* Calling driInitExtensions here, with a NULL context pointer, - * does not actually enable the extensions. It just makes sure - * that all the dispatch offsets for all the extensions that - * *might* be enables are known. This is needed because the - * dispatch offsets need to be known when _mesa_context_create - * is called, but we can't enable the extensions until we have a - * context pointer. - * - * Hello chicken. Hello egg. How are you two today? - */ - driInitExtensions( NULL, card_extensions, GL_FALSE ); + /* Calling driInitExtensions here, with a NULL context pointer, + * does not actually enable the extensions. It just makes sure + * that all the dispatch offsets for all the extensions that + * *might* be enables are known. This is needed because the + * dispatch offsets need to be known when _mesa_context_create + * is called, but we can't enable the extensions until we have a + * context pointer. + * + * Hello chicken. Hello egg. How are you two today? + */ + driInitExtensions( NULL, card_extensions, GL_FALSE ); #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) - driInitExtensions( NULL, blend_extensions, GL_FALSE ); - driInitSingleExtension( NULL, ARB_vp_extension ); - driInitSingleExtension( NULL, NV_vp_extension ); - driInitSingleExtension( NULL, ATI_fs_extension ); - driInitExtensions( NULL, point_extensions, GL_FALSE ); + driInitExtensions( NULL, blend_extensions, GL_FALSE ); + driInitSingleExtension( NULL, ARB_vp_extension ); + driInitSingleExtension( NULL, NV_vp_extension ); + driInitSingleExtension( NULL, ATI_fs_extension ); + driInitExtensions( NULL, point_extensions, GL_FALSE ); #endif - } - return (void *) psp; + if (!radeonInitDriver(psp)) + return NULL; + + return radeonFillInModes( psp, + dri_priv->bpp, + (dri_priv->bpp == 16) ? 16 : 24, + (dri_priv->bpp == 16) ? 0 : 8, + (dri_priv->backOffset != dri_priv->depthOffset) ); } @@ -1111,3 +1236,41 @@ getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) return 0; } + +#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)) +const struct __DriverAPIRec driDriverAPI = { + .InitScreen = radeonInitScreen, + .DestroyScreen = radeonDestroyScreen, + .CreateContext = radeonCreateContext, + .DestroyContext = radeonDestroyContext, + .CreateBuffer = radeonCreateBuffer, + .DestroyBuffer = radeonDestroyBuffer, + .SwapBuffers = radeonSwapBuffers, + .MakeCurrent = radeonMakeCurrent, + .UnbindContext = radeonUnbindContext, + .GetSwapInfo = getSwapInfo, + .GetDrawableMSC = driDrawableGetMSC32, + .WaitForMSC = driWaitForMSC32, + .WaitForSBC = NULL, + .SwapBuffersMSC = NULL, + .CopySubBuffer = radeonCopySubBuffer, +}; +#else +const struct __DriverAPIRec driDriverAPI = { + .InitScreen = radeonInitScreen, + .DestroyScreen = radeonDestroyScreen, + .CreateContext = r200CreateContext, + .DestroyContext = r200DestroyContext, + .CreateBuffer = radeonCreateBuffer, + .DestroyBuffer = radeonDestroyBuffer, + .SwapBuffers = r200SwapBuffers, + .MakeCurrent = r200MakeCurrent, + .UnbindContext = r200UnbindContext, + .GetSwapInfo = getSwapInfo, + .GetDrawableMSC = driDrawableGetMSC32, + .WaitForMSC = driWaitForMSC32, + .WaitForSBC = NULL, + .SwapBuffersMSC = NULL, + .CopySubBuffer = r200CopySubBuffer, +}; +#endif diff --git a/radeon/radeon_screen.h b/radeon/radeon_screen.h index 25e6fcf..ab859d5 100644 --- a/radeon/radeon_screen.h +++ b/radeon/radeon_screen.h @@ -103,6 +103,10 @@ typedef struct { /* Configuration cache with default values for all contexts */ driOptionCache optionCache; + + const __DRIextension *extensions[8]; + + int num_gb_pipes; } radeonScreenRec, *radeonScreenPtr; #define IS_R100_CLASS(screen) \ diff --git a/radeon/radeon_span.c b/radeon/radeon_span.c index 732a85e..e7ab367 100644 --- a/radeon/radeon_span.c +++ b/radeon/radeon_span.c @@ -173,6 +173,8 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) /* 16-bit depth buffer functions */ +#define VALUE_TYPE GLushort + #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; @@ -187,6 +189,8 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) * Careful: It looks like the R300 uses ZZZS byte order while the R200 * uses SZZZ for 24 bit depth, 8 bit stencil mode. */ +#define VALUE_TYPE GLuint + #ifdef COMPILE_R300 #define WRITE_DEPTH( _x, _y, d ) \ do { \ diff --git a/radeon/radeon_state.c b/radeon/radeon_state.c index 4de05c7..ae83c91 100644 --- a/radeon/radeon_state.c +++ b/radeon/radeon_state.c @@ -1640,8 +1640,7 @@ void radeonSetCliprects( radeonContextPtr rmesa ) GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate; GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate; - if (draw_fb->_ColorDrawBufferMask[0] - == BUFFER_BIT_BACK_LEFT) { + if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { /* Can't ignore 2d windows if we are page flipping. */ if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) { @@ -1693,17 +1692,18 @@ static void radeonDrawBuffer( GLcontext *ctx, GLenum mode ) RADEON_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */ - /* - * _ColorDrawBufferMask is easier to cope with than <mode>. - * Check for software fallback, update cliprects. - */ - switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) { - case BUFFER_BIT_FRONT_LEFT: - case BUFFER_BIT_BACK_LEFT: + if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) { + /* 0 (GL_NONE) buffers or multiple color drawing buffers */ + FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE ); + return; + } + + switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) { + case BUFFER_FRONT_LEFT: + case BUFFER_BACK_LEFT: FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE ); break; default: - /* 0 (GL_NONE) buffers or multiple color drawing buffers */ FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE ); return; } @@ -2222,11 +2222,11 @@ radeonUpdateDrawBuffer(GLcontext *ctx) struct gl_framebuffer *fb = ctx->DrawBuffer; driRenderbuffer *drb; - if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) { + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { /* draw to front */ drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; } - else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) { + else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { /* draw to back */ drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; } diff --git a/radeon/radeon_tex.h b/radeon/radeon_tex.h index a806981..0b955ed 100644 --- a/radeon/radeon_tex.h +++ b/radeon/radeon_tex.h @@ -38,6 +38,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef __RADEON_TEX_H__ #define __RADEON_TEX_H__ +extern void radeonSetTexOffset(__DRIcontext *pDRICtx, GLint texname, + unsigned long long offset, GLint depth, + GLuint pitch); + extern void radeonUpdateTextureState( GLcontext *ctx ); extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, diff --git a/radeon/radeon_texmem.c b/radeon/radeon_texmem.c index 20f25dd..0a3d745 100644 --- a/radeon/radeon_texmem.c +++ b/radeon/radeon_texmem.c @@ -334,7 +334,7 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint fac { int numLevels; - if ( !t || t->base.totalSize == 0 ) + if ( !t || t->base.totalSize == 0 || t->image_override ) return 0; if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { diff --git a/radeon/radeon_texstate.c b/radeon/radeon_texstate.c index 37bb749..ecd3754 100644 --- a/radeon/radeon_texstate.c +++ b/radeon/radeon_texstate.c @@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "context.h" #include "macros.h" #include "texformat.h" +#include "texobj.h" #include "enums.h" #include "radeon_context.h" @@ -84,7 +85,7 @@ tx_table[] = _ALPHA_REV(RGBA8888), _ALPHA(ARGB8888), _ALPHA_REV(ARGB8888), - _INVALID(RGB888), + [ MESA_FORMAT_RGB888 ] = { RADEON_TXFORMAT_ARGB8888, 0 }, _COLOR(RGB565), _COLOR_REV(RGB565), _ALPHA(ARGB4444), @@ -134,18 +135,19 @@ static void radeonSetTexImages( radeonContextPtr rmesa, /* Set the hardware texture format */ - - t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK | - RADEON_TXFORMAT_ALPHA_IN_MAP); - t->pp_txfilter &= ~RADEON_YUV_TO_RGB; - - if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) { - t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format; - t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter; - } - else { - _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); - return; + if ( !t->image_override ) { + t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK | + RADEON_TXFORMAT_ALPHA_IN_MAP); + t->pp_txfilter &= ~RADEON_YUV_TO_RGB; + + if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) { + t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format; + t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter; + } + else { + _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); + return; + } } texelBytes = baseImage->TexFormat->TexelBytes; @@ -341,11 +343,13 @@ static void radeonSetTexImages( radeonContextPtr rmesa, * requires 64-byte aligned pitches, and we may/may not need the * blitter. NPOT only! */ - if (baseImage->IsCompressed) - t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); - else - t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); - t->pp_txpitch -= 32; + if ( !t->image_override ) { + if (baseImage->IsCompressed) + t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); + else + t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); + t->pp_txpitch -= 32; + } t->dirty_state = TEX_ALL; @@ -840,6 +844,44 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit ) return GL_TRUE; } +void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch) +{ + radeonContextPtr rmesa = pDRICtx->driverPrivate; + struct gl_texture_object *tObj = + _mesa_lookup_texture(rmesa->glCtx, texname); + radeonTexObjPtr t; + + if (tObj == NULL) + return; + + t = (radeonTexObjPtr) tObj->DriverData; + + t->image_override = GL_TRUE; + + if (!offset) + return; + + t->pp_txoffset = offset; + t->pp_txpitch = pitch - 32; + + switch (depth) { + case 32: + t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format; + t->pp_txfilter |= tx_table[MESA_FORMAT_ARGB8888].filter; + break; + case 24: + default: + t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format; + t->pp_txfilter |= tx_table[MESA_FORMAT_RGB888].filter; + break; + case 16: + t->pp_txformat = tx_table[MESA_FORMAT_RGB565].format; + t->pp_txfilter |= tx_table[MESA_FORMAT_RGB565].filter; + break; + } +} + #define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK | \ RADEON_MIN_FILTER_MASK | \ RADEON_MAG_FILTER_MASK | \ @@ -1136,7 +1178,7 @@ static GLboolean enable_tex_2d( GLcontext *ctx, int unit ) RADEON_FIREVERTICES( rmesa ); radeonSetTexImages( rmesa, tObj ); radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 ); - if ( !t->base.memBlock ) + if ( !t->base.memBlock && !t->image_override ) return GL_FALSE; } @@ -1203,7 +1245,8 @@ static GLboolean enable_tex_rect( GLcontext *ctx, int unit ) RADEON_FIREVERTICES( rmesa ); radeonSetTexImages( rmesa, tObj ); radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 ); - if ( !t->base.memBlock /* && !rmesa->prefer_gart_client_texturing FIXME */ ) { + if ( !t->base.memBlock && + !t->image_override /* && !rmesa->prefer_gart_client_texturing FIXME */ ) { fprintf(stderr, "%s: upload failed\n", __FUNCTION__); return GL_FALSE; } |