1 files changed, 542 insertions, 0 deletions
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_fastpath.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_fastpath.c
new file mode 100644
index 000000000..1ff701435
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_fastpath.c
@@ -0,0 +1,542 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_fastpath.c,v 1.1 2001/01/08 01:07:27 martin Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keithw@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#include "radeon_state.h"
+#include "radeon_vb.h"
+#include "radeon_pipeline.h"
+#include "radeon_ioctl.h"
+#include "radeon_tris.h"
+
+#include "mmath.h"
+#include "cva.h"
+#include "vertices.h"
+
+
+struct radeon_fast_tab {
+   void (*build_vertices)( struct vertex_buffer *VB, GLuint do_cliptest );
+   void (*interp)( GLfloat t, GLfloat *O, const GLfloat *I, const GLfloat *J );
+};
+
+#define POINT(x)   radeon_draw_point( rmesa, &vert[x], psize )
+#define LINE(x,y)  radeon_draw_line( rmesa, &vert[x], &vert[y], lwidth )
+#define TRI(x,y,z) radeon_draw_triangle( rmesa, &vert[x], &vert[y], &vert[z] )
+
+
+/* Direct, and no clipping required.  The clip funcs have not been
+ * written yet, so this is only useful for the fast path.
+ */
+#define RENDER_POINTS( start, count )					\
+do {									\
+   GLuint e;								\
+   for ( e = start ; e < count ; e++ )					\
+      POINT( elt[e] );							\
+} while (0)
+
+#define RENDER_LINE( i1, i )						\
+do {									\
+   GLuint e1 = elt[i1], e = elt[i];					\
+   LINE( e1, e );							\
+} while (0)
+
+#define RENDER_TRI( i2, i1, i, pv, parity )				\
+do {									\
+   GLuint e2 = elt[i2], e1 = elt[i1], e = elt[i];			\
+   if ( parity ) {							\
+      GLuint tmp = e2;							\
+      e2 = e1;								\
+      e1 = tmp;								\
+   }									\
+   TRI( e2, e1, e );							\
+} while (0)
+
+#define RENDER_QUAD( i3, i2, i1, i, pv )				\
+do {									\
+   GLuint e3 = elt[i3], e2 = elt[i2], e1 = elt[i1], e = elt[i];		\
+   TRI( e3, e2, e );							\
+   TRI( e2, e1, e );							\
+} while (0)
+
+#define LOCAL_VARS							\
+   radeonVertexPtr vert = RADEON_DRIVER_DATA(VB)->verts;		\
+   const GLuint *elt = VB->EltPtr->data;				\
+   GLcontext *ctx = VB->ctx;						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
+   const GLfloat lwidth = ctx->Line.Width;				\
+   const GLfloat psize = ctx->Point.Size;				\
+   (void) lwidth; (void) psize; (void) vert;
+
+#define TAG(x) radeon_##x##_smooth_indirect
+#include "render_tmp.h"
+
+
+
+#define NEGATIVE( f )		(f < 0)
+#define DIFFERENT_SIGNS( a, b )	((a * b) < 0)
+#define LINTERP( T, A, B )	((A) + (T) * ((B) - (A)))
+
+
+#define INTERP_RGBA( t, out, a, b )					\
+do {									\
+   int i;								\
+   for ( i = 0 ; i < 4 ; i++ ) {					\
+      GLfloat fa = UBYTE_COLOR_TO_FLOAT_COLOR( a[i] );			\
+      GLfloat fb = UBYTE_COLOR_TO_FLOAT_COLOR( b[i] );			\
+      GLfloat fo = LINTERP( t, fa, fb );				\
+      FLOAT_COLOR_TO_UBYTE_COLOR( out[i], fo );				\
+   }									\
+} while (0)
+
+
+#define CLIP( SGN, V, PLANE )						\
+do {									\
+   if ( mask & PLANE ) {						\
+      GLuint *indata = inlist[in];					\
+      GLuint *outdata = inlist[in ^= 1];				\
+      GLuint nr = n;							\
+      GLfloat *J = verts[indata[nr-1]].f;				\
+      GLfloat dpJ = (SGN J[V]) + J[3];					\
+									\
+      inlist[0] = vlist1;						\
+      for ( i = n = 0 ; i < nr ; i++ ) {				\
+	 GLuint elt_i = indata[i];					\
+	 GLfloat *I = verts[elt_i].f;					\
+	 GLfloat dpI = (SGN I[V]) + I[3];				\
+									\
+	 if ( DIFFERENT_SIGNS( dpI, dpJ ) ) {				\
+	    GLfloat *O = verts[next_vert].f;				\
+	    GLfloat t, *in, *out;					\
+									\
+	    if ( NEGATIVE( dpI ) ) {					\
+	       t = dpI / (dpI - dpJ);					\
+	       in = I;							\
+	       out = J;							\
+	    } else {							\
+	       t = dpJ / (dpJ - dpI);					\
+	       in = J;							\
+	       out = I;							\
+	    }								\
+									\
+	    interp( t, O, in, out );					\
+									\
+	    clipmask[next_vert] = 0;					\
+	    outdata[n++] = next_vert++;					\
+	 }								\
+									\
+	 clipmask[elt_i] |= PLANE;      /* don't set up */		\
+									\
+	 if ( !NEGATIVE( dpI ) ) {					\
+	    outdata[n++] = elt_i;					\
+	    clipmask[elt_i] &= ~PLANE; /* set up after all */		\
+	 }								\
+									\
+	 J = I;								\
+	 dpJ = dpI;							\
+      }									\
+									\
+      if ( n < 3 ) return;						\
+   }									\
+} while (0)
+
+#define LINE_CLIP( x, y, z, w, PLANE )					\
+do {									\
+   if ( mask & PLANE ) {						\
+      GLfloat dpI = DOT4V( I, x, y, z, w);				\
+      GLfloat dpJ = DOT4V( J, x, y, z, w);				\
+									\
+      if ( DIFFERENT_SIGNS( dpI, dpJ ) ) {				\
+	 GLfloat *O = verts[next_vert].f;				\
+	 GLfloat t = dpI / (dpI - dpJ);					\
+									\
+	 interp( t, O, I, J );						\
+									\
+	 clipmask[next_vert] = 0;					\
+									\
+	 if ( NEGATIVE( dpI ) ) {					\
+	    clipmask[elts[0]] |= PLANE;					\
+	    I = O;							\
+	    elts[0] = next_vert++;					\
+	 } else {							\
+	    clipmask[elts[1]] |= PLANE;					\
+	    J = O;							\
+	    elts[1] = next_vert++;					\
+	 }								\
+      } else if ( NEGATIVE( dpI ) ) return;				\
+   }									\
+} while (0)
+
+
+static __inline void radeon_tri_clip( GLuint **p_elts,
+				      radeonVertexPtr verts,
+				      GLubyte *clipmask,
+				      GLuint *p_next_vert,
+				      GLubyte mask,
+				      radeon_interp_func interp )
+{
+   GLuint *elts = *p_elts;
+   GLuint next_vert = *p_next_vert;
+   GLuint in = 0;
+   GLuint n = 3;
+   GLuint vlist1[VB_MAX_CLIPPED_VERTS];
+   GLuint vlist2[VB_MAX_CLIPPED_VERTS];
+   GLuint *inlist[2];
+   GLuint *out;
+   GLuint i;
+
+   inlist[0] = elts;
+   inlist[1] = vlist2;
+
+   CLIP( -, 0, CLIP_RIGHT_BIT );
+   CLIP( +, 0, CLIP_LEFT_BIT );
+   CLIP( -, 1, CLIP_TOP_BIT );
+   CLIP( +, 1, CLIP_BOTTOM_BIT );
+   CLIP( -, 2, CLIP_FAR_BIT );
+   CLIP( +, 2, CLIP_NEAR_BIT );
+
+   /* Convert the planar polygon to a list of triangles */
+   out = inlist[in];
+
+   for ( i = 2 ; i < n ; i++ ) {
+      elts[0] = out[0];
+      elts[1] = out[i-1];
+      elts[2] = out[i];
+      elts += 3;
+   }
+
+   *p_next_vert = next_vert;
+   *p_elts = elts;
+}
+
+
+static __inline void radeon_line_clip( GLuint **p_elts,
+				       radeonVertexPtr verts,
+				       GLubyte *clipmask,
+				       GLuint *p_next_vert,
+				       GLubyte mask,
+				       radeon_interp_func interp )
+{
+   GLuint *elts = *p_elts;
+   GLfloat *I = verts[elts[0]].f;
+   GLfloat *J = verts[elts[1]].f;
+   GLuint next_vert = *p_next_vert;
+
+   LINE_CLIP( 1, 0, 0, -1, CLIP_LEFT_BIT );
+   LINE_CLIP( -1, 0, 0, 1, CLIP_RIGHT_BIT );
+   LINE_CLIP( 0, 1, 0, -1, CLIP_TOP_BIT );
+   LINE_CLIP( 0, -1, 0, 1, CLIP_BOTTOM_BIT );
+   LINE_CLIP( 0, 0, 1, -1, CLIP_FAR_BIT );
+   LINE_CLIP( 0, 0, -1, 1, CLIP_NEAR_BIT );
+
+   *p_next_vert = next_vert;
+   *p_elts += 2;
+}
+
+
+
+#define CLIP_POINT( e )							\
+do {									\
+   if ( mask[e] ) *out++ = e;						\
+} while (0)
+
+#define CLIP_LINE( e1, e0 )						\
+do {									\
+   GLubyte ormask = mask[e0] | mask[e1];				\
+   out[0] = e1;								\
+   out[1] = e0;								\
+   out += 2;								\
+   if ( ormask ) {							\
+      out-=2;								\
+      if ( !(mask[e0] & mask[e1]) ) {					\
+	 radeon_line_clip( &out, verts, mask,				\
+			   &next_vert, ormask, interp );		\
+      }									\
+   }									\
+} while (0)
+
+#define CLIP_TRIANGLE( e2, e1, e0 )					\
+do {									\
+   GLubyte ormask;							\
+   out[0] = e2;								\
+   out[1] = e1;								\
+   out[2] = e0;								\
+   out += 3;								\
+   ormask = mask[e2] | mask[e1] | mask[e0];				\
+   if ( ormask ) {							\
+      out -= 3;								\
+      if ( !(mask[e2] & mask[e1] & mask[e0]) ) {			\
+	 radeon_tri_clip( &out, verts, mask,				\
+			  &next_vert, ormask, interp );			\
+      }									\
+   }									\
+} while (0)
+
+
+
+/* Build a table of functions to clip each primitive type.  These
+ * produce a list of elements in the appropriate 'reduced' primitive,
+ * ie (points, lines, triangles) containing all the clipped and
+ * unclipped primitives from the original list.
+ */
+#define LOCAL_VARS							\
+   radeonContextPtr rmesa = RADEON_CONTEXT(VB->ctx);			\
+   radeonVertexBufferPtr rvb = RADEON_DRIVER_DATA(VB);			\
+   GLuint *elt = VB->EltPtr->data;					\
+   radeonVertexPtr verts = rvb->verts;					\
+   GLuint next_vert = rvb->last_vert;					\
+   GLuint *out = rvb->clipped_elements.data;				\
+   GLubyte *mask = VB->ClipMask;					\
+   radeon_interp_func interp = rmesa->interp;				\
+   (void) interp; (void) verts;
+
+#define POSTFIX								\
+   rvb->clipped_elements.count = out - rvb->clipped_elements.data;	\
+   rvb->last_vert = next_vert;
+
+
+#define INIT( x )
+
+#define RENDER_POINTS( start, count )					\
+do {									\
+   GLuint i;								\
+   for ( i = start; i < count ; i++ )					\
+      CLIP_POINT( elt[i] );						\
+} while (0)
+
+#define RENDER_LINE( i1, i0 )						\
+do {									\
+   CLIP_LINE( elt[i1], elt[i0] );					\
+} while (0)
+
+#define RENDER_TRI( i2, i1, i0, pv, parity )				\
+do {									\
+   GLuint e2 = elt[i2], e1 = elt[i1], e0 = elt[i0];			\
+   if ( parity ) e2 = elt[i1], e1 = elt[i2];				\
+   CLIP_TRIANGLE( e2, e1, e0 );						\
+} while (0)
+
+#define RENDER_QUAD( i3, i2, i1, i0, pv )				\
+do {									\
+   CLIP_TRIANGLE( elt[i3], elt[i2], elt[i0] );				\
+   CLIP_TRIANGLE( elt[i2], elt[i1], elt[i0] );				\
+} while (0)
+
+#define TAG(x) radeon_##x##_clip_elt
+#include "render_tmp.h"
+
+
+
+/* Pack rgba and/or texture into the remaining half of a 32 byte vertex.
+ */
+#define CLIP_UBYTE_COLOR	4
+#define CLIP_UBYTE_R		0
+#define CLIP_UBYTE_G		1
+#define CLIP_UBYTE_B		2
+#define CLIP_UBYTE_A		3
+#define CLIP_S0			6
+#define CLIP_T0			7
+#define CLIP_S1			8
+#define CLIP_T1			9
+
+#define TYPE (0)
+#define TAG(x) x
+#include "radeon_fasttmp.h"
+
+#define TYPE (RADEON_RGBA_BIT)
+#define TAG(x) x##_RGBA
+#include "radeon_fasttmp.h"
+
+#define TYPE (RADEON_TEX0_BIT)
+#define TAG(x) x##_TEX0
+#include "radeon_fasttmp.h"
+
+#define TYPE (RADEON_RGBA_BIT | RADEON_TEX0_BIT)
+#define TAG(x) x##_RGBA_TEX0
+#include "radeon_fasttmp.h"
+
+#define TYPE (RADEON_RGBA_BIT | RADEON_TEX0_BIT | RADEON_TEX1_BIT)
+#define TAG(x) x##_RGBA_TEX0_TEX1
+#include "radeon_fasttmp.h"
+
+/* This one *could* get away with sneaking TEX1 into the color and
+ * specular slots, thus fitting inside a cache line.  Would be even
+ * better to switch to a smaller vertex.
+ */
+#define TYPE (RADEON_TEX0_BIT | RADEON_TEX1_BIT)
+#define TAG(x) x##_TEX0_TEX1
+#include "radeon_fasttmp.h"
+
+
+
+static void radeon_render_elements_direct( struct vertex_buffer *VB )
+{
+   GLcontext *ctx = VB->ctx;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLenum prim = ctx->CVA.elt_mode;
+   GLuint nr = VB->EltPtr->count;
+   render_func func = radeon_render_tab_smooth_indirect[prim];
+   GLuint p = 0;
+
+   if ( rmesa->new_state )
+      radeonDDUpdateHWState( ctx );
+
+   do {
+      func( VB, 0, nr, 0 );
+   } while ( ctx->Driver.MultipassFunc &&
+	     ctx->Driver.MultipassFunc( VB, ++p ) );
+}
+
+/* GH: These should go away altogether on the Radeon.  We should disable
+ * the viewport mapping entirely in Mesa and let the hardware do it in
+ * all cases.
+ */
+static void radeon_project_vertices( struct vertex_buffer *VB )
+{
+   GLcontext *ctx = VB->ctx;
+   GLmatrix *mat = &ctx->Viewport.WindowMap;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonVertexBufferPtr rvb = RADEON_DRIVER_DATA(VB);
+   GLfloat *m = rmesa->tmp_matrix;
+
+   m[MAT_SX] =  mat->m[MAT_SX];
+   m[MAT_TX] =  mat->m[MAT_TX];
+   m[MAT_SY] = -mat->m[MAT_SY];
+   m[MAT_TY] = -mat->m[MAT_TY];
+   m[MAT_SZ] =  mat->m[MAT_SZ];
+   m[MAT_TZ] =  mat->m[MAT_TZ];
+
+   gl_project_v16( rvb->verts[VB->CopyStart].f,
+		   rvb->verts[rvb->last_vert].f,
+		   m,
+		   16 * 4 );
+}
+
+static void radeon_project_clipped_vertices( struct vertex_buffer *VB )
+{
+   GLcontext *ctx = VB->ctx;
+   GLmatrix *mat = &ctx->Viewport.WindowMap;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonVertexBufferPtr rvb = RADEON_DRIVER_DATA(VB);
+   GLfloat *m = rmesa->tmp_matrix;
+
+   m[MAT_SX] =  mat->m[MAT_SX];
+   m[MAT_TX] =  mat->m[MAT_TX];
+   m[MAT_SY] = -mat->m[MAT_SY];
+   m[MAT_TY] = -mat->m[MAT_TY];
+   m[MAT_SZ] =  mat->m[MAT_SZ];
+   m[MAT_TZ] =  mat->m[MAT_TZ];
+
+   gl_project_clipped_v16( rvb->verts[VB->CopyStart].f,
+			   rvb->verts[rvb->last_vert].f,
+			   m,
+			   16 * 4,
+			   VB->ClipMask + VB->CopyStart );
+}
+
+static struct radeon_fast_tab radeonFastTab[RADEON_MAX_SETUPFUNC];
+
+void radeonDDFastPathInit( void )
+{
+   radeon_render_init_clip_elt();
+   radeon_render_init_smooth_indirect();
+
+   radeon_init_fastpath( &radeonFastTab[0] );
+   radeon_init_fastpath_RGBA( &radeonFastTab[RADEON_RGBA_BIT] );
+   radeon_init_fastpath_TEX0( &radeonFastTab[RADEON_TEX0_BIT] );
+   radeon_init_fastpath_RGBA_TEX0( &radeonFastTab[(RADEON_RGBA_BIT |
+						   RADEON_TEX0_BIT)] );
+   radeon_init_fastpath_TEX0_TEX1( &radeonFastTab[(RADEON_TEX0_BIT |
+						   RADEON_TEX1_BIT)] );
+   radeon_init_fastpath_RGBA_TEX0_TEX1( &radeonFastTab[(RADEON_RGBA_BIT |
+							RADEON_TEX0_BIT |
+							RADEON_TEX1_BIT)] );
+}
+
+#define VALID_SETUP (RADEON_RGBA_BIT | RADEON_TEX0_BIT | RADEON_TEX1_BIT)
+
+void radeonDDFastPath( struct vertex_buffer *VB )
+{
+   GLcontext *ctx = VB->ctx;
+   GLenum prim = ctx->CVA.elt_mode;
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   struct radeon_fast_tab *tab =
+      &radeonFastTab[rmesa->SetupIndex & VALID_SETUP];
+   GLuint do_cliptest = 1;
+
+   gl_prepare_arrays_cva( VB );   /* still need this */
+
+   if ( ( gl_reduce_prim[prim] == GL_TRIANGLES ) &&
+	( VB->Count < (RADEON_BUFFER_SIZE / (10 * sizeof(GLuint))) ) &&
+	( ctx->ModelProjectMatrix.flags & (MAT_FLAG_GENERAL |
+					   MAT_FLAG_PERSPECTIVE) ) )
+   {
+      radeonDDEltPath( VB );
+      return;
+   }
+
+   /* Reserve enough space for the pathological case */
+   if ( VB->EltPtr->count * 12 > RADEON_DRIVER_DATA(VB)->size ) {
+      radeonDDResizeVB( VB, VB->EltPtr->count * 12 );
+      do_cliptest = 1;
+   }
+
+   tab->build_vertices( VB, do_cliptest );	/* object->clip space */
+
+   if ( rmesa->new_state )
+      radeonDDUpdateHWState( ctx );
+
+   if ( VB->ClipOrMask ) {
+      if ( !VB->ClipAndMask ) {
+	 render_func *clip = radeon_render_tab_clip_elt;
+
+	 rmesa->interp = tab->interp;
+
+	 clip[prim]( VB, 0, VB->EltPtr->count, 0 ); /* build new elts */
+
+	 ctx->CVA.elt_mode = gl_reduce_prim[prim];
+	 VB->EltPtr = &(RADEON_DRIVER_DATA(VB)->clipped_elements);
+
+	 radeon_project_clipped_vertices( VB );	/* clip->device space */
+	 radeon_render_elements_direct( VB );	/* render using new list */
+      }
+   } else {
+      radeon_project_vertices( VB );		/* clip->device space  */
+      radeon_render_elements_direct( VB );	/* render using orig list */
+   }
+
+   /* This indicates that there is no cached data to reuse */
+   VB->pipeline->data_valid = 0;
+   VB->pipeline->new_state = 0;
+}