summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuc Verhaegen <libv@skynet.be>2010-03-14 22:15:41 +0000
committerLuc Verhaegen <libv@skynet.be>2010-03-14 22:15:41 +0000
commit534eb0f6eea95ff5851d3cb74663679fcd375572 (patch)
treefe6b3c1c482725e17e0fed3ad9d9004e8870c988
parent0c8469d1892b441c38d1cb09d6bbf85692c89e92 (diff)
Import radeon, r200, r300 and r600 dri drivers from mesa 7.7.0.7.7.0
-rw-r--r--configure.ac9
-rw-r--r--r200/Makefile.am7
-rw-r--r--r200/r200_context.c20
-rw-r--r--r200/r200_state.c22
-rw-r--r--r200/r200_state.h2
-rw-r--r--r200/r200_state_init.c27
-rw-r--r--r200/r200_tcl.c15
-rw-r--r--r200/r200_tex.c22
-rw-r--r--r200/r200_texstate.c36
-rw-r--r--r300/Makefile.am8
-rw-r--r--r300/compiler/Makefile.am9
-rw-r--r--r300/compiler/SConscript37
-rw-r--r--r300/compiler/r300_fragprog.c170
-rw-r--r--r300/compiler/r300_fragprog.h5
-rw-r--r--r300/compiler/r300_fragprog_emit.c162
-rw-r--r--r300/compiler/r300_fragprog_swizzle.c107
-rw-r--r--r300/compiler/r300_fragprog_swizzle.h12
-rw-r--r--r300/compiler/r3xx_fragprog.c122
-rw-r--r--r300/compiler/r3xx_vertprog.c413
-rw-r--r--r300/compiler/r3xx_vertprog_dump.c2
-rw-r--r--r300/compiler/r500_fragprog.c250
-rw-r--r--r300/compiler/r500_fragprog.h14
-rw-r--r--r300/compiler/r500_fragprog_emit.c322
-rw-r--r--r300/compiler/radeon_code.c18
-rw-r--r--r300/compiler/radeon_code.h25
-rw-r--r--r300/compiler/radeon_compiler.c175
-rw-r--r--r300/compiler/radeon_compiler.h57
-rw-r--r--r300/compiler/radeon_dataflow.c170
-rw-r--r--r300/compiler/radeon_dataflow.h59
-rw-r--r--r300/compiler/radeon_dataflow_deadcode.c299
-rw-r--r--r300/compiler/radeon_dataflow_swizzles.c102
-rw-r--r--r300/compiler/radeon_nqssadce.c294
-rw-r--r--r300/compiler/radeon_nqssadce.h91
-rw-r--r--r300/compiler/radeon_opcodes.c431
-rw-r--r--r300/compiler/radeon_opcodes.h235
-rw-r--r--r300/compiler/radeon_pair_regalloc.c359
-rw-r--r--r300/compiler/radeon_pair_schedule.c508
-rw-r--r--r300/compiler/radeon_pair_translate.c255
-rw-r--r--r300/compiler/radeon_program.c154
-rw-r--r--r300/compiler/radeon_program.h166
-rw-r--r--r300/compiler/radeon_program_alu.c643
-rw-r--r--r300/compiler/radeon_program_alu.h10
-rw-r--r--r300/compiler/radeon_program_constants.h147
-rw-r--r--r300/compiler/radeon_program_pair.c896
-rw-r--r--r300/compiler/radeon_program_pair.h129
-rw-r--r--r300/compiler/radeon_program_print.c301
-rw-r--r--r300/compiler/radeon_swizzle.h57
-rw-r--r--r300/r300_cmdbuf.c43
-rw-r--r--r300/r300_context.c36
-rw-r--r--r300/r300_context.h5
-rw-r--r--r300/r300_draw.c2
-rw-r--r--r300/r300_emit.h1
-rw-r--r--r300/r300_fragprog_common.c20
-rw-r--r--r300/r300_reg.h12
-rw-r--r--r300/r300_render.c2
-rw-r--r--r300/r300_state.c44
-rw-r--r--r300/r300_tex.c31
-rw-r--r--r300/r300_texstate.c61
-rw-r--r--r300/r300_vertprog.c38
-rw-r--r--r300/radeon_context.h14
-rw-r--r--r300/radeon_mesa_to_rc.c223
-rw-r--r--r300/radeon_mesa_to_rc.h36
-rw-r--r--r600/Makefile.am8
-rw-r--r--r600/r600_cmdbuf.c234
-rw-r--r--r600/r600_cmdbuf.h16
-rw-r--r--r600/r600_context.c278
-rw-r--r--r600/r600_context.h60
-rw-r--r--r600/r600_reg_r6xx.h6
-rw-r--r--r600/r600_reg_r7xx.h2
-rw-r--r--r600/r600_tex.c24
-rw-r--r--r600/r600_texstate.c132
-rw-r--r--r600/r700_assembler.c596
-rw-r--r--r600/r700_assembler.h12
-rw-r--r--r600/r700_chip.c164
-rw-r--r--r600/r700_fragprog.c69
-rw-r--r--r600/r700_oglprog.c55
-rw-r--r--r600/r700_render.c811
-rw-r--r--r600/r700_shader.c90
-rw-r--r--r600/r700_shader.h2
-rw-r--r--r600/r700_state.c151
-rw-r--r--r600/r700_state.h1
-rw-r--r--r600/r700_vertprog.c230
-rw-r--r--r600/r700_vertprog.h15
-rw-r--r--radeon/Makefile.am6
-rw-r--r--radeon/radeon_bo.c110
-rw-r--r--radeon/radeon_bo_drm.h198
-rw-r--r--radeon/radeon_bo_int_drm.h45
-rw-r--r--radeon/radeon_bo_legacy.c85
-rw-r--r--radeon/radeon_bocs_wrapper.h3
-rw-r--r--radeon/radeon_buffer_objects.c5
-rw-r--r--radeon/radeon_common.c39
-rw-r--r--radeon/radeon_common.h3
-rw-r--r--radeon/radeon_common_context.c19
-rw-r--r--radeon/radeon_common_context.h8
-rw-r--r--radeon/radeon_context.c6
-rw-r--r--radeon/radeon_context.h4
-rw-r--r--radeon/radeon_cs.c95
-rw-r--r--radeon/radeon_cs_drm.h215
-rw-r--r--radeon/radeon_cs_int_drm.h66
-rw-r--r--radeon/radeon_cs_legacy.c72
-rw-r--r--radeon/radeon_cs_space_drm.c66
-rw-r--r--radeon/radeon_dma.c8
-rw-r--r--radeon/radeon_fbo.c189
-rw-r--r--radeon/radeon_lock.c9
-rw-r--r--radeon/radeon_mipmap_tree.c545
-rw-r--r--radeon/radeon_mipmap_tree.h31
-rw-r--r--radeon/radeon_queryobj.c56
-rw-r--r--radeon/radeon_screen.c133
-rw-r--r--radeon/radeon_span.c247
-rw-r--r--radeon/radeon_state.c25
-rw-r--r--radeon/radeon_state_init.c14
-rw-r--r--radeon/radeon_tex.c19
-rw-r--r--radeon/radeon_texstate.c69
-rw-r--r--radeon/radeon_texture.c643
-rw-r--r--radeon/radeon_texture.h23
115 files changed, 9048 insertions, 4906 deletions
diff --git a/configure.ac b/configure.ac
index d9be032..5cd936f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
# Process this file with autoconf to produce a configure script
AC_PREREQ(2.57)
-AC_INIT([mesa-dri-radeon], 7.6.0, [], mesa-dri-radeon)
+AC_INIT([mesa-dri-radeon], 7.7.0, [], mesa-dri-radeon)
AM_INIT_AUTOMAKE([dist-bzip2])
@@ -16,12 +16,11 @@ AC_PROG_CC
AC_HEADER_STDC
PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0])
-# we now need dri_metaops.h
-PKG_CHECK_MODULES([DRI], [libmesadri >= 7.6.0 libmesadri < 7.7.0
- libmesadricommon >= 7.6.0 libmesadricommon < 7.7.0])
+PKG_CHECK_MODULES([DRI], [libmesadri >= 7.7.0 libmesadri < 7.8.0
+ libmesadricommon >= 7.7.0 libmesadricommon < 7.8.0])
# libdrm 2.4.17 changed the api significantly.
-PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon libdrm <= 2.4.16],
+PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon libdrm >= 2.4.17],
HAVE_LIBDRM_RADEON=yes, HAVE_LIBDRM_RADEON=no)
AM_CONDITIONAL(HAVE_LIBDRM_RADEON, test "x$HAVE_LIBDRM_RADEON" = xyes)
diff --git a/r200/Makefile.am b/r200/Makefile.am
index 804b285..7cdc634 100644
--- a/r200/Makefile.am
+++ b/r200/Makefile.am
@@ -1,7 +1,6 @@
AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
-R200_CFLAGS = -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R200
-R200_CFLAGS += -I../radeon -I../radeon/server
+R200_CFLAGS = -DRADEON_R200 -I../radeon -I../radeon/server
r200_dri_la_LTLIBRARIES = r200_dri.la
r200_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(R200_CFLAGS)
@@ -41,5 +40,7 @@ if HAVE_LIBDRM_RADEON
r200_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
r200_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
r200_dri_la_SOURCES += \
- ../radeon/radeon_cs_space_drm.c
+ ../radeon/radeon_cs_space_drm.c \
+ ../radeon/radeon_bo.c \
+ ../radeon/radeon_cs.c
endif
diff --git a/r200/r200_context.c b/r200/r200_context.c
index 3ddb5bf..5f985d6 100644
--- a/r200/r200_context.c
+++ b/r200/r200_context.c
@@ -75,7 +75,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define need_GL_NV_vertex_program
#define need_GL_ARB_point_parameters
#define need_GL_EXT_framebuffer_object
-#include "extension_helper.h"
+#include "main/remap_helper.h"
#define DRIVER_DATE "20060602"
@@ -115,7 +115,7 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
/* Extension strings exported by the R200 driver.
*/
-const struct dri_extension card_extensions[] =
+static const struct dri_extension card_extensions[] =
{
{ "GL_ARB_multitexture", NULL },
{ "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions},
@@ -146,31 +146,31 @@ const struct dri_extension card_extensions[] =
{ NULL, NULL }
};
-const struct dri_extension blend_extensions[] = {
+static const struct dri_extension blend_extensions[] = {
{ "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions },
{ "GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions },
{ NULL, NULL }
};
-const struct dri_extension ARB_vp_extension[] = {
+static const struct dri_extension ARB_vp_extension[] = {
{ "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }
};
-const struct dri_extension NV_vp_extension[] = {
+static const struct dri_extension NV_vp_extension[] = {
{ "GL_NV_vertex_program", GL_NV_vertex_program_functions }
};
-const struct dri_extension ATI_fs_extension[] = {
+static const struct dri_extension ATI_fs_extension[] = {
{ "GL_ATI_fragment_shader", GL_ATI_fragment_shader_functions }
};
-const struct dri_extension point_extensions[] = {
+static const struct dri_extension point_extensions[] = {
{ "GL_ARB_point_sprite", NULL },
{ "GL_ARB_point_parameters", GL_ARB_point_parameters_functions },
{ NULL, NULL }
};
-const struct dri_extension mm_extensions[] = {
+static const struct dri_extension mm_extensions[] = {
{ "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
{ NULL, NULL }
};
@@ -325,9 +325,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
_mesa_init_driver_functions(&functions);
r200InitDriverFuncs(&functions);
r200InitIoctlFuncs(&functions);
- r200InitStateFuncs(&functions, screen->kernel_mm);
+ r200InitStateFuncs(&functions);
r200InitTextureFuncs(&functions);
- r200InitShaderFuncs(&functions);
+ r200InitShaderFuncs(&functions);
radeonInitQueryObjFunctions(&functions);
if (!radeonInitContext(&rmesa->radeon, &functions,
diff --git a/r200/r200_state.c b/r200/r200_state.c
index 76852e3..6d99c03 100644
--- a/r200/r200_state.c
+++ b/r200/r200_state.c
@@ -1578,13 +1578,6 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
* Window position and viewport transformation
*/
-/*
- * To correctly position primitives:
- */
-#define SUBPIXEL_X 0.125
-#define SUBPIXEL_Y 0.125
-
-
/**
* Called when window size or position changes or viewport or depth range
* state is changed. We update the hardware viewport state here.
@@ -1609,9 +1602,9 @@ void r200UpdateWindow( GLcontext *ctx )
}
float_ui32_type sx = { v[MAT_SX] };
- float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
+ float_ui32_type tx = { v[MAT_TX] + xoffset };
float_ui32_type sy = { v[MAT_SY] * y_scale };
- float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y };
+ float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias };
float_ui32_type sz = { v[MAT_SZ] * depthScale };
float_ui32_type tz = { v[MAT_TZ] * depthScale };
@@ -1680,8 +1673,8 @@ void r200UpdateViewportOffset( GLcontext *ctx )
float_ui32_type tx;
float_ui32_type ty;
- tx.f = v[MAT_TX] + xoffset + SUBPIXEL_X;
- ty.f = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+ tx.f = v[MAT_TX] + xoffset;
+ ty.f = (- v[MAT_TY]) + yoffset;
if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 ||
rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 )
@@ -2483,7 +2476,7 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
}
/* Initialize the driver's state functions.
*/
-void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 )
+void r200InitStateFuncs( struct dd_function_table *functions )
{
functions->UpdateState = r200InvalidateState;
functions->LightingSpaceChange = r200LightingSpaceChange;
@@ -2517,10 +2510,7 @@ void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 )
functions->LogicOpcode = r200LogicOpCode;
functions->PolygonMode = r200PolygonMode;
functions->PolygonOffset = r200PolygonOffset;
- if (dri2)
- functions->PolygonStipple = r200PolygonStipple;
- else
- functions->PolygonStipple = radeonPolygonStipplePreKMS;
+ functions->PolygonStipple = r200PolygonStipple;
functions->PointParameterfv = r200PointParameter;
functions->PointSize = r200PointSize;
functions->RenderMode = r200RenderMode;
diff --git a/r200/r200_state.h b/r200/r200_state.h
index 9c62f0a..7b9b0c1 100644
--- a/r200/r200_state.h
+++ b/r200/r200_state.h
@@ -38,7 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_context.h"
extern void r200InitState( r200ContextPtr rmesa );
-extern void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 );
+extern void r200InitStateFuncs( struct dd_function_table *functions );
extern void r200InitTnlFuncs( GLcontext *ctx );
extern void r200UpdateMaterial( GLcontext *ctx );
diff --git a/r200/r200_state_init.c b/r200/r200_state_init.c
index 7697306..6c5a0b7 100644
--- a/r200/r200_state_init.c
+++ b/r200/r200_state_init.c
@@ -529,16 +529,18 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
if (rrb->cpp == 4)
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
- else switch (rrb->base._ActualFormat) {
- case GL_RGB5:
+ else switch (rrb->base.Format) {
+ case MESA_FORMAT_RGB565:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
break;
- case GL_RGBA4:
+ case MESA_FORMAT_ARGB4444:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
break;
- case GL_RGB5_A1:
+ case MESA_FORMAT_ARGB1555:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
break;
+ default:
+ _mesa_problem(ctx, "Unexpected format in ctx_emit_cs");
}
cbpitch = (rrb->pitch / rrb->cpp);
@@ -638,7 +640,7 @@ static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
OUT_BATCH_TABLE(atom->cmd, 10);
if (t && t->mt && !t->image_override) {
- OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t),
RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
} else if (!t) {
/* workaround for old CS mechanism */
@@ -885,10 +887,8 @@ void r200InitState( r200ContextPtr rmesa )
}
}
}
- /* polygon stipple is done with irq for non-kms */
- if (rmesa->radeon.radeonScreen->kernel_mm) {
- ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 );
- }
+
+ ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 );
for (i = 0; i < 6; i++)
if (rmesa->radeon.radeonScreen->kernel_mm)
@@ -1120,12 +1120,11 @@ void r200InitState( r200ContextPtr rmesa )
rmesa->hw.sci.cmd[SCI_CMD_1] = CP_PACKET0(R200_RE_TOP_LEFT, 0);
rmesa->hw.sci.cmd[SCI_CMD_2] = CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0);
- if (rmesa->radeon.radeonScreen->kernel_mm) {
-
- rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
- rmesa->hw.stp.cmd[STP_DATA_0] = 0;
- rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
+ rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
+ rmesa->hw.stp.cmd[STP_DATA_0] = 0;
+ rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
rmesa->hw.mtl[0].emit = mtl_emit;
rmesa->hw.mtl[1].emit = mtl_emit;
diff --git a/r200/r200_tcl.c b/r200/r200_tcl.c
index c702910..e7d48a7 100644
--- a/r200/r200_tcl.c
+++ b/r200/r200_tcl.c
@@ -509,25 +509,26 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
prog to a not enabled output however, so just don't mess with it.
We only need to change compsel. */
GLuint out_compsel = 0;
- GLuint vp_out = rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten;
+ const GLbitfield64 vp_out =
+ rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten;
vimap_rev = &rmesa->curr_vp_hw->inputmap_rev[0];
- assert(vp_out & (1 << VERT_RESULT_HPOS));
+ assert(vp_out & BITFIELD64_BIT(VERT_RESULT_HPOS));
out_compsel = R200_OUTPUT_XYZW;
- if (vp_out & (1 << VERT_RESULT_COL0)) {
+ if (vp_out & BITFIELD64_BIT(VERT_RESULT_COL0)) {
out_compsel |= R200_OUTPUT_COLOR_0;
}
- if (vp_out & (1 << VERT_RESULT_COL1)) {
+ if (vp_out & BITFIELD64_BIT(VERT_RESULT_COL1)) {
out_compsel |= R200_OUTPUT_COLOR_1;
}
- if (vp_out & (1 << VERT_RESULT_FOGC)) {
+ if (vp_out & BITFIELD64_BIT(VERT_RESULT_FOGC)) {
out_compsel |= R200_OUTPUT_DISCRETE_FOG;
}
- if (vp_out & (1 << VERT_RESULT_PSIZ)) {
+ if (vp_out & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
out_compsel |= R200_OUTPUT_PT_SIZE;
}
for (i = VERT_RESULT_TEX0; i < VERT_RESULT_TEX6; i++) {
- if (vp_out & (1 << i)) {
+ if (vp_out & BITFIELD64_BIT(i)) {
out_compsel |= R200_OUTPUT_TEX_0 << (i - VERT_RESULT_TEX0);
}
}
diff --git a/r200/r200_tex.c b/r200/r200_tex.c
index 36d9e37..a417721 100644
--- a/r200/r200_tex.c
+++ b/r200/r200_tex.c
@@ -38,7 +38,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/enums.h"
#include "main/image.h"
#include "main/simple_list.h"
-#include "main/texformat.h"
#include "main/texstore.h"
#include "main/teximage.h"
#include "main/texobj.h"
@@ -386,16 +385,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target,
case GL_TEXTURE_MAX_LEVEL:
case GL_TEXTURE_MIN_LOD:
case GL_TEXTURE_MAX_LOD:
- /* This isn't the most efficient solution but there doesn't appear to
- * be a nice alternative. Since there's no LOD clamping,
- * we just have to rely on loading the right subset of mipmap levels
- * to simulate a clamped LOD.
- */
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = 0;
- t->validated = GL_FALSE;
- }
+ t->validated = GL_FALSE;
break;
default:
@@ -414,7 +404,7 @@ static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
(void *)texObj,
_mesa_lookup_enum_by_nr(texObj->Target));
}
-
+
if (rmesa) {
int i;
radeon_firevertices(&rmesa->radeon);
@@ -426,11 +416,9 @@ static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
}
}
}
-
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = 0;
- }
+
+ radeon_miptree_unreference(&t->mt);
+
_mesa_delete_texture_object(ctx, texObj);
}
diff --git a/r200/r200_texstate.c b/r200/r200_texstate.c
index c948347..7782404 100644
--- a/r200/r200_texstate.c
+++ b/r200/r200_texstate.c
@@ -36,7 +36,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/imports.h"
#include "main/context.h"
#include "main/macros.h"
-#include "main/texformat.h"
#include "main/teximage.h"
#include "main/texobj.h"
#include "main/enums.h"
@@ -825,20 +824,14 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
radeon_bo_unref(rImage->bo);
rImage->bo = NULL;
}
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = NULL;
- }
- if (rImage->mt) {
- radeon_miptree_unreference(rImage->mt);
- rImage->mt = NULL;
- }
+
+ radeon_miptree_unreference(&t->mt);
+ radeon_miptree_unreference(&rImage->mt);
+
_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
texImage->RowStride = rb->pitch / rb->cpp;
- texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
- internalFormat,
- type, format, 0);
+
rImage->bo = rb->bo;
radeon_bo_ref(rImage->bo);
t->bo = rb->bo;
@@ -1426,10 +1419,9 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
*/
static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
{
- int firstlevel = t->mt ? t->mt->firstLevel : 0;
- const struct gl_texture_image *firstImage = t->base.Image[0][firstlevel];
+ const struct gl_texture_image *firstImage = t->base.Image[0][t->minLod];
GLint log2Width, log2Height, log2Depth, texelBytes;
-
+
if ( t->bo ) {
return;
}
@@ -1437,11 +1429,11 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
log2Width = firstImage->WidthLog2;
log2Height = firstImage->HeightLog2;
log2Depth = firstImage->DepthLog2;
- texelBytes = firstImage->TexFormat->TexelBytes;
+ texelBytes = _mesa_get_format_bytes(firstImage->TexFormat);
if (!t->image_override) {
- if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
+ if (VALID_FORMAT(firstImage->TexFormat)) {
const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
tx_table_be;
@@ -1449,17 +1441,17 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
R200_TXFORMAT_ALPHA_IN_MAP);
t->pp_txfilter &= ~R200_YUV_TO_RGB;
- t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
- t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
+ t->pp_txformat |= table[ firstImage->TexFormat ].format;
+ t->pp_txfilter |= table[ firstImage->TexFormat ].filter;
} else {
_mesa_problem(NULL, "unexpected texture format in %s",
__FUNCTION__);
return;
}
}
-
+
t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
- t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << R200_MAX_MIP_LEVEL_SHIFT;
+ t->pp_txfilter |= (t->maxLod - t->minLod) << R200_MAX_MIP_LEVEL_SHIFT;
t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
R200_TXFORMAT_HEIGHT_MASK |
@@ -1504,7 +1496,7 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
| ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT));
if ( !t->image_override ) {
- if (firstImage->IsCompressed)
+ if (_mesa_is_format_compressed(firstImage->TexFormat))
t->pp_txpitch = (firstImage->Width + 63) & ~(63);
else
t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
diff --git a/r300/Makefile.am b/r300/Makefile.am
index a678ddb..236710a 100644
--- a/r300/Makefile.am
+++ b/r300/Makefile.am
@@ -2,8 +2,7 @@ SUBDIRS = compiler
AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
-R300_CFLAGS = -DCOMPILE_R300 -DR200_MERGED=0 -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300
-R300_CFLAGS += -I../radeon -I../radeon/server
+R300_CFLAGS = -DRADEON_R300 -I../radeon -I../radeon/server
r300_dri_la_LTLIBRARIES = r300_dri.la
r300_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(R300_CFLAGS)
@@ -37,6 +36,7 @@ r300_dri_la_SOURCES = \
r300_vertprog.c \
r300_fragprog_common.c \
r300_shader.c \
+ radeon_mesa_to_rc.c \
r300_emit.c \
r300_swtcl.c
@@ -44,5 +44,7 @@ if HAVE_LIBDRM_RADEON
r300_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
r300_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
r300_dri_la_SOURCES += \
- ../radeon/radeon_cs_space_drm.c
+ ../radeon/radeon_cs_space_drm.c \
+ ../radeon/radeon_bo.c \
+ ../radeon/radeon_cs.c
endif
diff --git a/r300/compiler/Makefile.am b/r300/compiler/Makefile.am
index e24a719..c3dc39f 100644
--- a/r300/compiler/Makefile.am
+++ b/r300/compiler/Makefile.am
@@ -5,10 +5,17 @@ libr300compiler_la_CFLAGS = $(AM_CFLAGS) $(DRI_CFLAGS)
libr300compiler_la_SOURCES = \
radeon_code.c \
radeon_compiler.c \
- radeon_nqssadce.c \
radeon_program.c \
+ radeon_program_print.c \
+ radeon_opcodes.c \
radeon_program_alu.c \
radeon_program_pair.c \
+ radeon_pair_translate.c \
+ radeon_pair_schedule.c \
+ radeon_pair_regalloc.c \
+ radeon_dataflow.c \
+ radeon_dataflow_deadcode.c \
+ radeon_dataflow_swizzles.c \
r3xx_fragprog.c \
r300_fragprog.c \
r300_fragprog_swizzle.c \
diff --git a/r300/compiler/SConscript b/r300/compiler/SConscript
new file mode 100644
index 0000000..46075a8
--- /dev/null
+++ b/r300/compiler/SConscript
@@ -0,0 +1,37 @@
+Import('*')
+
+env = env.Clone()
+env.Append(CPPPATH = '#/include')
+env.Append(CPPPATH = '#/src/mesa')
+
+# temporary fix
+env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '')
+
+r300compiler = env.ConvenienceLibrary(
+ target = 'r300compiler',
+ source = [
+ 'radeon_code.c',
+ 'radeon_compiler.c',
+ 'radeon_program.c',
+ 'radeon_program_print.c',
+ 'radeon_opcodes.c',
+ 'radeon_program_alu.c',
+ 'radeon_program_pair.c',
+ 'radeon_pair_translate.c',
+ 'radeon_pair_schedule.c',
+ 'radeon_pair_regalloc.c',
+ 'radeon_dataflow.c',
+ 'radeon_dataflow_deadcode.c',
+ 'radeon_dataflow_swizzles.c',
+ 'r3xx_fragprog.c',
+ 'r300_fragprog.c',
+ 'r300_fragprog_swizzle.c',
+ 'r300_fragprog_emit.c',
+ 'r500_fragprog.c',
+ 'r500_fragprog_emit.c',
+ 'r3xx_vertprog.c',
+ 'r3xx_vertprog_dump.c',
+ 'memory_pool.c',
+ ])
+
+Return('r300compiler')
diff --git a/r300/compiler/r300_fragprog.c b/r300/compiler/r300_fragprog.c
index 6c9fba4..aa69b0f 100644
--- a/r300/compiler/r300_fragprog.c
+++ b/r300/compiler/r300_fragprog.c
@@ -27,17 +27,17 @@
#include "r300_fragprog.h"
-#include "shader/prog_parameter.h"
+#include <stdio.h>
#include "../r300_reg.h"
-static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
+static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
{
- struct prog_src_register reg = { 0, };
+ struct rc_src_register reg = { 0, };
- reg.File = PROGRAM_STATE_VAR;
+ reg.File = RC_FILE_CONSTANT;
reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
- reg.Swizzle = SWIZZLE_WWWW;
+ reg.Swizzle = RC_SWIZZLE_WWWW;
return reg;
}
@@ -47,7 +47,7 @@ static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int t
* - extract operand swizzles
* - introduce a temporary register when write masks are needed
*/
-GLboolean r300_transform_TEX(
+int r300_transform_TEX(
struct radeon_compiler * c,
struct rc_instruction* inst,
void* data)
@@ -55,77 +55,77 @@ GLboolean r300_transform_TEX(
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
- if (inst->I.Opcode != OPCODE_TEX &&
- inst->I.Opcode != OPCODE_TXB &&
- inst->I.Opcode != OPCODE_TXP &&
- inst->I.Opcode != OPCODE_KIL)
- return GL_FALSE;
+ if (inst->U.I.Opcode != RC_OPCODE_TEX &&
+ inst->U.I.Opcode != RC_OPCODE_TXB &&
+ inst->U.I.Opcode != RC_OPCODE_TXP &&
+ inst->U.I.Opcode != RC_OPCODE_KIL)
+ return 0;
/* ARB_shadow & EXT_shadow_funcs */
- if (inst->I.Opcode != OPCODE_KIL &&
- c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) {
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
- if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- inst->I.Opcode = OPCODE_MOV;
+ if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
- if (comparefunc == GL_ALWAYS) {
- inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
+ if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
} else {
- inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
+ inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit);
}
- return GL_TRUE;
+ return 1;
} else {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+ unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode;
struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
int pass, fail;
- inst_rcp->I.Opcode = OPCODE_RCP;
- inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
- inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
- inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
- inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- inst_cmp->I.DstReg = inst->I.DstReg;
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = rc_find_free_temporary(c);
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
-
- inst_mad->I.Opcode = OPCODE_MAD;
- inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
- inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
- inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
- inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
- inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
- inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
- inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+
+ inst_cmp->U.I.DstReg = inst->U.I.DstReg;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index;
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+ inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index;
if (depthmode == 0) /* GL_LUMINANCE */
- inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z);
else if (depthmode == 2) /* GL_ALPHA */
- inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW;
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
* r < tex <=> -tex+r < 0
* r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
+ inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW;
else
- inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
+ inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
- inst_cmp->I.Opcode = OPCODE_CMP;
+ inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
/* DstReg has been filled out above */
- inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
+ inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
pass = 1;
fail = 2;
} else {
@@ -133,9 +133,9 @@ GLboolean r300_transform_TEX(
fail = 1;
}
- inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
- inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
- inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
+ inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
+ inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
+ inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit);
}
}
@@ -143,52 +143,52 @@ GLboolean r300_transform_TEX(
* instead of [0..Width]x[0..Height].
* Add a scaling instruction.
*/
- if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) {
+ if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.TexSrcTarget == RC_TEXTURE_RECT) {
struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev);
- inst_mul->I.Opcode = OPCODE_MUL;
- inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mul->I.DstReg.Index = rc_find_free_temporary(c);
- inst_mul->I.SrcReg[0] = inst->I.SrcReg[0];
- inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR;
- inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit);
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_mul->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);
- reset_srcreg(&inst->I.SrcReg[0]);
- inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index;
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mul->U.I.DstReg.Index;
}
/* Cannot write texture to output registers or with masks */
- if (inst->I.Opcode != OPCODE_KIL &&
- (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) {
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.DstReg.WriteMask != RC_MASK_XYZW)) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg = inst->I.DstReg;
- inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg = inst->U.I.DstReg;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
}
/* Cannot read texture coordinate from constants file */
- if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
+ if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- reset_srcreg(&inst->I.SrcReg[0]);
- inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
}
- return GL_TRUE;
+ return 1;
}
/* just some random things... */
diff --git a/r300/compiler/r300_fragprog.h b/r300/compiler/r300_fragprog.h
index 0ac46db..418df36 100644
--- a/r300/compiler/r300_fragprog.h
+++ b/r300/compiler/r300_fragprog.h
@@ -33,9 +33,6 @@
#ifndef __R300_FRAGPROG_H_
#define __R300_FRAGPROG_H_
-#include "shader/program.h"
-#include "shader/prog_instruction.h"
-
#include "radeon_compiler.h"
#include "radeon_program.h"
@@ -44,6 +41,6 @@ extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler
extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
-extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data);
+extern int r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data);
#endif
diff --git a/r300/compiler/r300_fragprog_emit.c b/r300/compiler/r300_fragprog_emit.c
index c7227bb..bbc0003 100644
--- a/r300/compiler/r300_fragprog_emit.c
+++ b/r300/compiler/r300_fragprog_emit.c
@@ -56,7 +56,6 @@ struct r300_emit_state {
};
#define PROG_CODE \
- struct r300_emit_state * emit = (struct r300_emit_state*)data; \
struct r300_fragment_program_compiler *c = emit->compiler; \
struct r300_fragment_program_code *code = &c->code->code.r300
@@ -69,64 +68,76 @@ struct r300_emit_state {
/**
* Mark a temporary register as used.
*/
-static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
+static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
{
if (index > code->pixsize)
code->pixsize = index;
}
+static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src)
+{
+ if (src.File == RC_FILE_CONSTANT) {
+ return src.Index | (1 << 5);
+ } else if (src.File == RC_FILE_TEMPORARY) {
+ use_temporary(code, src.Index);
+ return src.Index;
+ }
+
+ return 0;
+}
+
-static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
+static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
{
switch(opcode) {
- case OPCODE_CMP: return R300_ALU_OUTC_CMP;
- case OPCODE_DP3: return R300_ALU_OUTC_DP3;
- case OPCODE_DP4: return R300_ALU_OUTC_DP4;
- case OPCODE_FRC: return R300_ALU_OUTC_FRC;
+ case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
+ case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
+ case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
+ case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
default:
error("translate_rgb_opcode(%i): Unknown opcode", opcode);
/* fall through */
- case OPCODE_NOP:
+ case RC_OPCODE_NOP:
/* fall through */
- case OPCODE_MAD: return R300_ALU_OUTC_MAD;
- case OPCODE_MAX: return R300_ALU_OUTC_MAX;
- case OPCODE_MIN: return R300_ALU_OUTC_MIN;
- case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
+ case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
+ case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
+ case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
+ case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
}
}
-static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
+static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
{
switch(opcode) {
- case OPCODE_CMP: return R300_ALU_OUTA_CMP;
- case OPCODE_DP3: return R300_ALU_OUTA_DP4;
- case OPCODE_DP4: return R300_ALU_OUTA_DP4;
- case OPCODE_EX2: return R300_ALU_OUTA_EX2;
- case OPCODE_FRC: return R300_ALU_OUTA_FRC;
- case OPCODE_LG2: return R300_ALU_OUTA_LG2;
+ case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
+ case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
+ case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
+ case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
+ case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
+ case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
default:
error("translate_rgb_opcode(%i): Unknown opcode", opcode);
/* fall through */
- case OPCODE_NOP:
+ case RC_OPCODE_NOP:
/* fall through */
- case OPCODE_MAD: return R300_ALU_OUTA_MAD;
- case OPCODE_MAX: return R300_ALU_OUTA_MAX;
- case OPCODE_MIN: return R300_ALU_OUTA_MIN;
- case OPCODE_RCP: return R300_ALU_OUTA_RCP;
- case OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
+ case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
+ case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
+ case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
+ case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
+ case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
}
}
/**
* Emit one paired ALU instruction.
*/
-static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
+static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
{
PROG_CODE;
if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
error("Too many ALU instructions");
- return GL_FALSE;
+ return 0;
}
int ip = code->alu.length++;
@@ -136,17 +147,13 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
- GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
- if (!inst->RGB.Src[j].Constant)
- use_temporary(code, inst->RGB.Src[j].Index);
+ unsigned int src = use_source(code, inst->RGB.Src[j]);
code->alu.inst[ip].rgb_addr |= src << (6*j);
- src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
- if (!inst->Alpha.Src[j].Constant)
- use_temporary(code, inst->Alpha.Src[j].Index);
+ src = use_source(code, inst->Alpha.Src[j]);
code->alu.inst[ip].alpha_addr |= src << (6*j);
- GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+ unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
arg |= inst->RGB.Arg[j].Abs << 6;
arg |= inst->RGB.Arg[j].Negate << 5;
code->alu.inst[ip].rgb_inst |= arg << (7*j);
@@ -186,27 +193,27 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
if (inst->Alpha.DepthWriteMask) {
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
emit->node_flags |= R300_W_OUT;
- c->code->writes_depth = GL_TRUE;
+ c->code->writes_depth = 1;
}
- return GL_TRUE;
+ return 1;
}
/**
* Finish the current node without advancing to the next one.
*/
-static GLboolean finish_node(struct r300_emit_state * emit)
+static int finish_node(struct r300_emit_state * emit)
{
struct r300_fragment_program_compiler * c = emit->compiler;
struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
if (code->alu.length == emit->node_first_alu) {
/* Generate a single NOP for this node */
- struct radeon_pair_instruction inst;
- _mesa_bzero(&inst, sizeof(inst));
+ struct rc_pair_instruction inst;
+ memset(&inst, 0, sizeof(inst));
if (!emit_alu(emit, &inst))
- return GL_FALSE;
+ return 0;
}
unsigned alu_offset = emit->node_first_alu;
@@ -217,7 +224,7 @@ static GLboolean finish_node(struct r300_emit_state * emit)
if (code->tex.length == emit->node_first_tex) {
if (emit->current_node > 0) {
error("Node %i has no TEX instructions", emit->current_node);
- return GL_FALSE;
+ return 0;
}
tex_end = 0;
@@ -240,7 +247,7 @@ static GLboolean finish_node(struct r300_emit_state * emit)
(tex_end << R300_TEX_SIZE_SHIFT) |
emit->node_flags;
- return GL_TRUE;
+ return 1;
}
@@ -248,79 +255,72 @@ static GLboolean finish_node(struct r300_emit_state * emit)
* Begin a block of texture instructions.
* Create the necessary indirection.
*/
-static GLboolean begin_tex(void* data)
+static int begin_tex(struct r300_emit_state * emit)
{
PROG_CODE;
if (code->alu.length == emit->node_first_alu &&
code->tex.length == emit->node_first_tex) {
- return GL_TRUE;
+ return 1;
}
if (emit->current_node == 3) {
error("Too many texture indirections");
- return GL_FALSE;
+ return 0;
}
if (!finish_node(emit))
- return GL_FALSE;
+ return 0;
emit->current_node++;
emit->node_first_tex = code->tex.length;
emit->node_first_alu = code->alu.length;
emit->node_flags = 0;
- return GL_TRUE;
+ return 1;
}
-static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst)
+static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
{
PROG_CODE;
if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
error("Too many TEX instructions");
- return GL_FALSE;
+ return 0;
}
- GLuint unit = inst->TexSrcUnit;
- GLuint dest = inst->DestIndex;
- GLuint opcode;
+ unsigned int unit = inst->U.I.TexSrcUnit;
+ unsigned int dest = inst->U.I.DstReg.Index;
+ unsigned int opcode;
- switch(inst->Opcode) {
- case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
- case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
- case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
- case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+ case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+ case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+ case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
default:
- error("Unknown texture opcode %i", inst->Opcode);
- return GL_FALSE;
+ error("Unknown texture opcode %i", inst->U.I.Opcode);
+ return 0;
}
- if (inst->Opcode == RADEON_OPCODE_KIL) {
+ if (inst->U.I.Opcode == RC_OPCODE_KIL) {
unit = 0;
dest = 0;
} else {
use_temporary(code, dest);
}
- use_temporary(code, inst->SrcIndex);
+ use_temporary(code, inst->U.I.SrcReg[0].Index);
code->tex.inst[code->tex.length++] =
- (inst->SrcIndex << R300_SRC_ADDR_SHIFT) |
+ (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
(dest << R300_DST_ADDR_SHIFT) |
(unit << R300_TEX_ID_SHIFT) |
(opcode << R300_TEX_INST_SHIFT);
- return GL_TRUE;
+ return 1;
}
-static const struct radeon_pair_handler pair_handler = {
- .EmitPaired = &emit_alu,
- .EmitTex = &emit_tex,
- .BeginTexBlock = &begin_tex,
- .MaxHwTemps = R300_PFS_NUM_TEMP_REGS
-};
-
/**
* Final compilation step: Turn the intermediate radeon_program into
* machine-readable instructions.
@@ -329,13 +329,31 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
{
struct r300_emit_state emit;
struct r300_fragment_program_code *code = &compiler->code->code.r300;
+ struct rc_instruction * inst;
memset(&emit, 0, sizeof(emit));
emit.compiler = compiler;
- _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
+ memset(code, 0, sizeof(struct r300_fragment_program_code));
+
+ for(inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+ inst = inst->Next) {
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+ begin_tex(&emit);
+ continue;
+ }
+
+ emit_tex(&emit, inst);
+ } else {
+ emit_alu(&emit, &inst->U.P);
+ }
+ }
+
+ if (code->pixsize >= R300_PFS_NUM_TEMP_REGS)
+ rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
- radeonPairProgram(compiler, &pair_handler, &emit);
if (compiler->Base.Error)
return;
diff --git a/r300/compiler/r300_fragprog_swizzle.c b/r300/compiler/r300_fragprog_swizzle.c
index 1b14cc3..cfa48a5 100644
--- a/r300/compiler/r300_fragprog_swizzle.c
+++ b/r300/compiler/r300_fragprog_swizzle.c
@@ -33,16 +33,17 @@
#include "r300_fragprog_swizzle.h"
+#include <stdio.h>
+
#include "../r300_reg.h"
-#include "radeon_nqssadce.h"
#include "radeon_compiler.h"
-#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO))
+#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
struct swizzle_data {
- GLuint hash; /**< swizzle value this matches */
- GLuint base; /**< base value for hw swizzle */
- GLuint stride; /**< difference in base between arg0/1/2 */
+ unsigned int hash; /**< swizzle value this matches */
+ unsigned int base; /**< base value for hw swizzle */
+ unsigned int stride; /**< difference in base between arg0/1/2 */
};
static const struct swizzle_data native_swizzles[] = {
@@ -65,15 +66,15 @@ static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swi
* Find a native RGB swizzle that matches the given swizzle.
* Returns 0 if none found.
*/
-static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle)
+static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
{
int i, comp;
for(i = 0; i < num_native_swizzles; ++i) {
const struct swizzle_data* sd = &native_swizzles[i];
for(comp = 0; comp < 3; ++comp) {
- GLuint swz = GET_SWZ(swizzle, comp);
- if (swz == SWIZZLE_NIL)
+ unsigned int swz = GET_SWZ(swizzle, comp);
+ if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz != GET_SWZ(sd->hash, comp))
break;
@@ -90,71 +91,72 @@ static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle)
* Check whether the given instruction supports the swizzle and negate
* combinations in the given source register.
*/
-GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
+static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
if (reg.Abs)
- reg.Negate = NEGATE_NONE;
+ reg.Negate = RC_MASK_NONE;
- if (opcode == OPCODE_KIL ||
- opcode == OPCODE_TEX ||
- opcode == OPCODE_TXB ||
- opcode == OPCODE_TXP) {
+ if (opcode == RC_OPCODE_KIL ||
+ opcode == RC_OPCODE_TEX ||
+ opcode == RC_OPCODE_TXB ||
+ opcode == RC_OPCODE_TXP) {
int j;
if (reg.Abs || reg.Negate)
- return GL_FALSE;
+ return 0;
for(j = 0; j < 4; ++j) {
- GLuint swz = GET_SWZ(reg.Swizzle, j);
- if (swz == SWIZZLE_NIL)
+ unsigned int swz = GET_SWZ(reg.Swizzle, j);
+ if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz != j)
- return GL_FALSE;
+ return 0;
}
- return GL_TRUE;
+ return 1;
}
- GLuint relevant = 0;
+ unsigned int relevant = 0;
int j;
for(j = 0; j < 3; ++j)
- if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL)
+ if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
relevant |= 1 << j;
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
- return GL_FALSE;
+ return 0;
if (!lookup_native_swizzle(reg.Swizzle))
- return GL_FALSE;
+ return 0;
- return GL_TRUE;
+ return 1;
}
-/**
- * Generate MOV dst, src using only native swizzles.
- */
-void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
+static void r300_swizzle_split(
+ struct rc_src_register src, unsigned int mask,
+ struct rc_swizzle_split * split)
{
if (src.Abs)
- src.Negate = NEGATE_NONE;
+ src.Negate = RC_MASK_NONE;
+
+ split->NumPhases = 0;
- while(dst.WriteMask) {
+ while(mask) {
const struct swizzle_data *best_swizzle = 0;
- GLuint best_matchcount = 0;
- GLuint best_matchmask = 0;
+ unsigned int best_matchcount = 0;
+ unsigned int best_matchmask = 0;
int i, comp;
for(i = 0; i < num_native_swizzles; ++i) {
const struct swizzle_data *sd = &native_swizzles[i];
- GLuint matchcount = 0;
- GLuint matchmask = 0;
+ unsigned int matchcount = 0;
+ unsigned int matchmask = 0;
for(comp = 0; comp < 3; ++comp) {
- if (!GET_BIT(dst.WriteMask, comp))
+ if (!GET_BIT(mask, comp))
continue;
- GLuint swz = GET_SWZ(src.Swizzle, comp);
- if (swz == SWIZZLE_NIL)
+ unsigned int swz = GET_SWZ(src.Swizzle, comp);
+ if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz == GET_SWZ(sd->hash, comp)) {
/* check if the negate bit of current component
@@ -170,34 +172,35 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst,
best_swizzle = sd;
best_matchcount = matchcount;
best_matchmask = matchmask;
- if (matchmask == (dst.WriteMask & WRITEMASK_XYZ))
+ if (matchmask == (mask & RC_MASK_XYZ))
break;
}
}
- struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
- inst->I.Opcode = OPCODE_MOV;
- inst->I.DstReg = dst;
- inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
- inst->I.SrcReg[0] = src;
- inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
- /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
+ if (mask & RC_MASK_W)
+ best_matchmask |= RC_MASK_W;
- dst.WriteMask &= ~inst->I.DstReg.WriteMask;
+ split->Phase[split->NumPhases++] = best_matchmask;
+ mask &= ~best_matchmask;
}
}
+struct rc_swizzle_caps r300_swizzle_caps = {
+ .IsNative = r300_swizzle_is_native,
+ .Split = r300_swizzle_split
+};
+
/**
* Translate an RGB (XYZ) swizzle into the hardware code for the given
* instruction source.
*/
-GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle)
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
{
const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
if (!sd) {
- _mesa_printf("Not a native swizzle: %08x\n", swizzle);
+ fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
return 0;
}
@@ -209,15 +212,15 @@ GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle)
* Translate an Alpha (W) swizzle into the hardware code for the given
* instruction source.
*/
-GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle)
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
{
if (swizzle < 3)
return swizzle + 3*src;
switch(swizzle) {
- case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
- case SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
- case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+ case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
+ case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
+ case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
default: return R300_ALU_ARGA_ONE;
}
}
diff --git a/r300/compiler/r300_fragprog_swizzle.h b/r300/compiler/r300_fragprog_swizzle.h
index 231bf4e..118476a 100644
--- a/r300/compiler/r300_fragprog_swizzle.h
+++ b/r300/compiler/r300_fragprog_swizzle.h
@@ -28,15 +28,11 @@
#ifndef __R300_FRAGPROG_SWIZZLE_H_
#define __R300_FRAGPROG_SWIZZLE_H_
-#include "main/glheader.h"
-#include "shader/prog_instruction.h"
+#include "radeon_swizzle.h"
-struct nqssadce_state;
+extern struct rc_swizzle_caps r300_swizzle_caps;
-GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
-void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
-
-GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle);
-GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle);
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/r300/compiler/r3xx_fragprog.c b/r300/compiler/r3xx_fragprog.c
index 76c3a7e..5581f25 100644
--- a/r300/compiler/r3xx_fragprog.c
+++ b/r300/compiler/r3xx_fragprog.c
@@ -22,22 +22,21 @@
#include "radeon_compiler.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_statevars.h"
+#include <stdio.h>
-#include "radeon_nqssadce.h"
+#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
#include "r300_fragprog.h"
#include "r300_fragprog_swizzle.h"
#include "r500_fragprog.h"
-static void nqssadce_init(struct nqssadce_state* s)
+static void dataflow_outputs_mark_use(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
{
- struct r300_fragment_program_compiler * c = s->UserData;
- s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW;
- s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W;
+ struct r300_fragment_program_compiler * c = userdata;
+ callback(data, c->OutputColor, RC_MASK_XYZW);
+ callback(data, c->OutputDepth, RC_MASK_W);
}
static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
@@ -45,35 +44,35 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
struct rc_instruction *rci;
for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
- struct prog_instruction * inst = &rci->I;
+ struct rc_sub_instruction * inst = &rci->U.I;
- if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth)
+ if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
continue;
- if (inst->DstReg.WriteMask & WRITEMASK_Z) {
- inst->DstReg.WriteMask = WRITEMASK_W;
+ if (inst->DstReg.WriteMask & RC_MASK_Z) {
+ inst->DstReg.WriteMask = RC_MASK_W;
} else {
inst->DstReg.WriteMask = 0;
continue;
}
switch (inst->Opcode) {
- case OPCODE_FRC:
- case OPCODE_MOV:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ case RC_OPCODE_FRC:
+ case RC_OPCODE_MOV:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
break;
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ case RC_OPCODE_ADD:
+ case RC_OPCODE_MAX:
+ case RC_OPCODE_MIN:
+ case RC_OPCODE_MUL:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
break;
- case OPCODE_CMP:
- case OPCODE_MAD:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
+ case RC_OPCODE_CMP:
+ case RC_OPCODE_MAD:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]);
break;
default:
// Scalar instructions needn't be reswizzled
@@ -89,11 +88,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
if (c->is_r500) {
struct radeon_program_transformation transformations[] = {
{ &r500_transform_TEX, c },
+ { &r500_transform_IF, 0 },
{ &radeonTransformALU, 0 },
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 }
};
- radeonLocalTransform(&c->Base, 4, transformations);
+ radeonLocalTransform(&c->Base, 5, transformations);
+
+ c->Base.SwizzleCaps = &r500_swizzle_caps;
} else {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_TEX, c },
@@ -101,32 +103,66 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{ &radeonTransformTrigSimple, 0 }
};
radeonLocalTransform(&c->Base, 3, transformations);
+
+ c->Base.SwizzleCaps = &r300_swizzle_caps;
}
if (c->Base.Debug) {
- _mesa_printf("Fragment Program: After native rewrite:\n");
+ fprintf(stderr, "Fragment Program: After native rewrite:\n");
rc_print_program(&c->Base.Program);
fflush(stderr);
}
- if (c->is_r500) {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r500FPIsNativeSwizzle,
- .BuildSwizzle = &r500FPBuildSwizzle
- };
- radeonNqssaDce(&c->Base, &nqssadce, c);
- } else {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r300FPIsNativeSwizzle,
- .BuildSwizzle = &r300FPBuildSwizzle
- };
- radeonNqssaDce(&c->Base, &nqssadce, c);
+ rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c);
+ if (c->Base.Error)
+ return;
+
+ if (c->Base.Debug) {
+ fprintf(stderr, "Fragment Program: After deadcode:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stderr);
+ }
+
+ rc_dataflow_swizzles(&c->Base);
+ if (c->Base.Error)
+ return;
+
+ if (c->Base.Debug) {
+ fprintf(stderr, "Compiler: after dataflow passes:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stderr);
+ }
+
+ rc_pair_translate(c);
+ if (c->Base.Error)
+ return;
+
+ if (c->Base.Debug) {
+ fprintf(stderr, "Compiler: after pair translate:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stderr);
}
+ rc_pair_schedule(c);
+ if (c->Base.Error)
+ return;
+
+ if (c->Base.Debug) {
+ fprintf(stderr, "Compiler: after pair scheduling:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stderr);
+ }
+
+ if (c->is_r500)
+ rc_pair_regalloc(c, 128);
+ else
+ rc_pair_regalloc(c, R300_PFS_NUM_TEMP_REGS);
+
+ if (c->Base.Error)
+ return;
+
if (c->Base.Debug) {
- _mesa_printf("Compiler: after NqSSA-DCE:\n");
+ fprintf(stderr, "Compiler: after pair register allocation:\n");
rc_print_program(&c->Base.Program);
fflush(stderr);
}
diff --git a/r300/compiler/r3xx_vertprog.c b/r300/compiler/r3xx_vertprog.c
index dad27fc..1b2cb8d 100644
--- a/r300/compiler/r3xx_vertprog.c
+++ b/r300/compiler/r3xx_vertprog.c
@@ -22,13 +22,13 @@
#include "radeon_compiler.h"
+#include <stdio.h>
+
#include "../r300_reg.h"
-#include "radeon_nqssadce.h"
-#include "radeon_program.h"
+#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
-
-#include "shader/prog_print.h"
+#include "radeon_swizzle.h"
/*
@@ -42,104 +42,83 @@
t_swizzle(y), \
t_swizzle(y), \
t_src_class(vpi->SrcReg[x].File), \
- NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+ RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
-static unsigned long t_dst_mask(GLuint mask)
+static unsigned long t_dst_mask(unsigned int mask)
{
- /* WRITEMASK_* is equivalent to VSF_FLAG_* */
- return mask & WRITEMASK_XYZW;
+ /* RC_MASK_* is equivalent to VSF_FLAG_* */
+ return mask & RC_MASK_XYZW;
}
-static unsigned long t_dst_class(gl_register_file file)
+static unsigned long t_dst_class(rc_register_file file)
{
-
switch (file) {
- case PROGRAM_TEMPORARY:
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_TEMPORARY:
return PVS_DST_REG_TEMPORARY;
- case PROGRAM_OUTPUT:
+ case RC_FILE_OUTPUT:
return PVS_DST_REG_OUT;
- case PROGRAM_ADDRESS:
+ case RC_FILE_ADDRESS:
return PVS_DST_REG_A0;
- /*
- case PROGRAM_INPUT:
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_STATE_VAR:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
}
}
static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
- struct prog_dst_register *dst)
+ struct rc_dst_register *dst)
{
- if (dst->File == PROGRAM_OUTPUT)
+ if (dst->File == RC_FILE_OUTPUT)
return vp->outputs[dst->Index];
return dst->Index;
}
-static unsigned long t_src_class(gl_register_file file)
+static unsigned long t_src_class(rc_register_file file)
{
switch (file) {
- case PROGRAM_BUILTIN:
- case PROGRAM_TEMPORARY:
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_NONE:
+ case RC_FILE_TEMPORARY:
return PVS_SRC_REG_TEMPORARY;
- case PROGRAM_INPUT:
+ case RC_FILE_INPUT:
return PVS_SRC_REG_INPUT;
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_CONSTANT:
- case PROGRAM_STATE_VAR:
+ case RC_FILE_CONSTANT:
return PVS_SRC_REG_CONSTANT;
- /*
- case PROGRAM_OUTPUT:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
}
}
-static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b)
+static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
{
unsigned long aclass = t_src_class(a.File);
unsigned long bclass = t_src_class(b.File);
if (aclass != bclass)
- return GL_FALSE;
+ return 0;
if (aclass == PVS_SRC_REG_TEMPORARY)
- return GL_FALSE;
+ return 0;
if (a.RelAddr || b.RelAddr)
- return GL_TRUE;
+ return 1;
if (a.Index != b.Index)
- return GL_TRUE;
+ return 1;
- return GL_FALSE;
+ return 0;
}
-static INLINE unsigned long t_swizzle(GLubyte swizzle)
+static inline unsigned long t_swizzle(unsigned int swizzle)
{
- /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+ /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
return swizzle;
}
static unsigned long t_src_index(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- if (src->File == PROGRAM_INPUT) {
+ if (src->File == RC_FILE_INPUT) {
assert(vp->inputs[src->Index] != -1);
return vp->inputs[src->Index];
} else {
@@ -155,9 +134,9 @@ static unsigned long t_src_index(struct r300_vertex_program_code *vp,
/* these two functions should probably be merged... */
static unsigned long t_src(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
*/
return PVS_SRC_OPERAND(t_src_index(vp, src),
@@ -170,9 +149,9 @@ static unsigned long t_src(struct r300_vertex_program_code *vp,
}
static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
*/
return PVS_SRC_OPERAND(t_src_index(vp, src),
@@ -181,79 +160,79 @@ static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_src_class(src->File),
- src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(src->RelAddr << 4);
}
-static GLboolean valid_dst(struct r300_vertex_program_code *vp,
- struct prog_dst_register *dst)
+static int valid_dst(struct r300_vertex_program_code *vp,
+ struct rc_dst_register *dst)
{
- if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
- return GL_FALSE;
- } else if (dst->File == PROGRAM_ADDRESS) {
+ if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
+ return 0;
+ } else if (dst->File == RC_FILE_ADDRESS) {
assert(dst->Index == 0);
}
- return GL_TRUE;
+ return 1;
}
static void ei_vector1(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_FALSE,
- GL_FALSE,
+ 0,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
}
static void ei_vector2(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_FALSE,
- GL_FALSE,
+ 0,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src(vp, &vpi->SrcReg[0]);
inst[2] = t_src(vp, &vpi->SrcReg[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
+ inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
}
static void ei_math1(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
}
static void ei_lit(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
@@ -263,27 +242,27 @@ static void ei_lit(struct r300_vertex_program_code *vp,
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
}
static void ei_mad(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
/* Remarks about hardware limitations of MAD
* (please preserve this comment, as this information is _NOT_
@@ -311,22 +290,22 @@ static void ei_mad(struct r300_vertex_program_code *vp,
* according to AMD docs, this should improve performance by one clock
* as a nice side bonus.
*/
- if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY &&
- vpi->SrcReg[1].File == PROGRAM_TEMPORARY &&
- vpi->SrcReg[2].File == PROGRAM_TEMPORARY &&
+ if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
- GL_FALSE,
- GL_TRUE,
+ 0,
+ 1,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
} else {
inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
+ 0,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
@@ -337,17 +316,17 @@ static void ei_mad(struct r300_vertex_program_code *vp,
}
static void ei_pow(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
@@ -362,8 +341,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
compiler->SetHwInputOutput(compiler);
for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
- struct prog_instruction *vpi = &rci->I;
- GLuint *inst = compiler->code->body.d + compiler->code->length;
+ struct rc_sub_instruction *vpi = &rci->U.I;
+ unsigned int *inst = compiler->code->body.d + compiler->code->length;
/* Skip instructions writing to non-existing destination */
if (!valid_dst(compiler->code, &vpi->DstReg))
@@ -375,26 +354,26 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
}
switch (vpi->Opcode) {
- case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
- case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
- case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
- case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
- case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
- case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
- case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
- case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
- case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
- case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
- case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
- case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
- case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
- case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
- case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
- case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
- case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
- case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
- case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
- case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+ case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+ case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+ case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+ case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+ case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+ case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+ case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+ case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+ case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+ case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+ case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+ case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
default:
rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
return;
@@ -408,38 +387,37 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
}
struct temporary_allocation {
- GLuint Allocated:1;
- GLuint HwTemp:15;
+ unsigned int Allocated:1;
+ unsigned int HwTemp:15;
struct rc_instruction * LastRead;
};
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *inst;
- GLuint num_orig_temps = 0;
- GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ unsigned int num_orig_temps = 0;
+ char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
struct temporary_allocation * ta;
- GLuint i, j;
+ unsigned int i, j;
compiler->code->num_temporaries = 0;
memset(hwtemps, 0, sizeof(hwtemps));
/* Pass 1: Count original temporaries and allocate structures */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
- GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
- if (inst->I.SrcReg[i].Index >= num_orig_temps)
- num_orig_temps = inst->I.SrcReg[i].Index + 1;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
}
}
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
- if (inst->I.DstReg.Index >= num_orig_temps)
- num_orig_temps = inst->I.DstReg.Index + 1;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.DstReg.Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.DstReg.Index + 1;
}
}
}
@@ -450,32 +428,31 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
/* Pass 2: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY)
- ta[inst->I.SrcReg[i].Index].LastRead = inst;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
+ ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
}
}
/* Pass 3: Register allocation */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
- GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
- GLuint orig = inst->I.SrcReg[i].Index;
- inst->I.SrcReg[i].Index = ta[orig].HwTemp;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.SrcReg[i].Index;
+ inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
if (ta[orig].Allocated && inst == ta[orig].LastRead)
- hwtemps[ta[orig].HwTemp] = GL_FALSE;
+ hwtemps[ta[orig].HwTemp] = 0;
}
}
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
- GLuint orig = inst->I.DstReg.Index;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.DstReg.Index;
if (!ta[orig].Allocated) {
for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
@@ -485,16 +462,16 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
if (j >= VSF_MAX_FRAGMENT_TEMPS) {
fprintf(stderr, "Out of hw temporaries\n");
} else {
- ta[orig].Allocated = GL_TRUE;
+ ta[orig].Allocated = 1;
ta[orig].HwTemp = j;
- hwtemps[j] = GL_TRUE;
+ hwtemps[j] = 1;
if (j >= compiler->code->num_temporaries)
compiler->code->num_temporaries = j + 1;
}
}
- inst->I.DstReg.Index = ta[orig].HwTemp;
+ inst->U.I.DstReg.Index = ta[orig].HwTemp;
}
}
}
@@ -505,45 +482,45 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
* Vertex engine cannot read two inputs or two constants at the same time.
* Introduce intermediate MOVs to temporary registers to account for this.
*/
-static GLboolean transform_source_conflicts(
+static int transform_source_conflicts(
struct radeon_compiler *c,
struct rc_instruction* inst,
void* unused)
{
- GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (num_operands == 3) {
- if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
- || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
+ if (opcode->NumSrcRegs == 3) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
+ || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = tmpreg;
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
-
- reset_srcreg(&inst->I.SrcReg[2]);
- inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[2].Index = tmpreg;
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+
+ reset_srcreg(&inst->U.I.SrcReg[2]);
+ inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[2].Index = tmpreg;
}
}
- if (num_operands >= 2) {
- if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
+ if (opcode->NumSrcRegs >= 2) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = tmpreg;
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
-
- reset_srcreg(&inst->I.SrcReg[1]);
- inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[1].Index = tmpreg;
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+
+ reset_srcreg(&inst->U.I.SrcReg[1]);
+ inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[1].Index = tmpreg;
}
}
- return GL_TRUE;
+ return 1;
}
static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
@@ -554,44 +531,52 @@ static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
if ((compiler->RequiredOutputs & (1 << i)) &&
!(compiler->Base.Program.OutputsWritten & (1 << i))) {
struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
- inst->I.Opcode = OPCODE_MOV;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
- inst->I.DstReg.File = PROGRAM_OUTPUT;
- inst->I.DstReg.Index = i;
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = i;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
- inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
- inst->I.SrcReg[0].Index = 0;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
+ inst->U.I.SrcReg[0].Index = 0;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
compiler->Base.Program.OutputsWritten |= 1 << i;
}
}
}
-static void nqssadceInit(struct nqssadce_state* s)
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
{
- struct r300_vertex_program_compiler * compiler = s->UserData;
+ struct r300_vertex_program_compiler * c = userdata;
int i;
- for(i = 0; i < VERT_RESULT_MAX; ++i) {
- if (compiler->RequiredOutputs & (1 << i))
- s->Outputs[i].Sourced = WRITEMASK_XYZW;
+ for(i = 0; i < 32; ++i) {
+ if (c->RequiredOutputs & (1 << i))
+ callback(data, i, RC_MASK_XYZW);
}
}
-static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
(void) opcode;
(void) reg;
- return GL_TRUE;
+ return 1;
}
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+ .IsNative = &swizzle_is_native,
+ .Split = 0 /* should never be called */
+};
+
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
+ compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
addArtificialOutputs(compiler);
{
@@ -624,22 +609,22 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
fflush(stderr);
}
- {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadceInit,
- .IsNativeSwizzle = &swizzleIsNative,
- .BuildSwizzle = NULL
- };
- radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
+ rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after deadcode:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stderr);
+ }
- /* We need this step for reusing temporary registers */
- allocate_temporary_registers(compiler);
+ rc_dataflow_swizzles(&compiler->Base);
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after NQSSADCE:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stderr);
- }
+ allocate_temporary_registers(compiler);
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after dataflow:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stderr);
}
translate_vertex_program(compiler);
diff --git a/r300/compiler/r3xx_vertprog_dump.c b/r300/compiler/r3xx_vertprog_dump.c
index 980ef3e..66f9b05 100644
--- a/r300/compiler/r3xx_vertprog_dump.c
+++ b/r300/compiler/r3xx_vertprog_dump.c
@@ -146,7 +146,7 @@ static void r300_vs_op_dump(uint32_t op)
static void r300_vs_src_dump(uint32_t src)
{
fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
- (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3],
+ (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3],
src & (1 << 25) ? "-" : " ",
r300_vs_swiz_debug[(src >> 13) & 0x7],
src & (1 << 26) ? "-" : " ",
diff --git a/r300/compiler/r500_fragprog.c b/r300/compiler/r500_fragprog.c
index 7e2faed..d87acec 100644
--- a/r300/compiler/r500_fragprog.c
+++ b/r300/compiler/r500_fragprog.c
@@ -27,15 +27,17 @@
#include "r500_fragprog.h"
+#include <stdio.h>
+
#include "../r300_reg.h"
-static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
+static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
{
- struct prog_src_register reg = { 0, };
+ struct rc_src_register reg = { 0, };
- reg.File = PROGRAM_STATE_VAR;
+ reg.File = RC_FILE_CONSTANT;
reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
- reg.Swizzle = SWIZZLE_WWWW;
+ reg.Swizzle = RC_SWIZZLE_WWWW;
return reg;
}
@@ -44,7 +46,7 @@ static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int t
* - implement texture compare (shadow extensions)
* - extract non-native source / destination operands
*/
-GLboolean r500_transform_TEX(
+int r500_transform_TEX(
struct radeon_compiler * c,
struct rc_instruction * inst,
void* data)
@@ -52,77 +54,77 @@ GLboolean r500_transform_TEX(
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
- if (inst->I.Opcode != OPCODE_TEX &&
- inst->I.Opcode != OPCODE_TXB &&
- inst->I.Opcode != OPCODE_TXP &&
- inst->I.Opcode != OPCODE_KIL)
- return GL_FALSE;
+ if (inst->U.I.Opcode != RC_OPCODE_TEX &&
+ inst->U.I.Opcode != RC_OPCODE_TXB &&
+ inst->U.I.Opcode != RC_OPCODE_TXP &&
+ inst->U.I.Opcode != RC_OPCODE_KIL)
+ return 0;
/* ARB_shadow & EXT_shadow_funcs */
- if (inst->I.Opcode != OPCODE_KIL &&
- c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) {
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
- if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- inst->I.Opcode = OPCODE_MOV;
+ if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
- if (comparefunc == GL_ALWAYS) {
- inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
+ if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
} else {
- inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
+ inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit);
}
- return GL_TRUE;
+ return 1;
} else {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+ unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode;
struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
int pass, fail;
- inst_rcp->I.Opcode = OPCODE_RCP;
- inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
- inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
- inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
- inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- inst_cmp->I.DstReg = inst->I.DstReg;
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = rc_find_free_temporary(c);
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
-
- inst_mad->I.Opcode = OPCODE_MAD;
- inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
- inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
- inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
- inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
- inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
- inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
- inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+
+ inst_cmp->U.I.DstReg = inst->U.I.DstReg;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index;
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+ inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index;
if (depthmode == 0) /* GL_LUMINANCE */
- inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z);
else if (depthmode == 2) /* GL_ALPHA */
- inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW;
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
* r < tex <=> -tex+r < 0
* r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
+ inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW;
else
- inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
+ inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
- inst_cmp->I.Opcode = OPCODE_CMP;
+ inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
/* DstReg has been filled out above */
- inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
+ inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
pass = 1;
fail = 2;
} else {
@@ -130,131 +132,161 @@ GLboolean r500_transform_TEX(
fail = 1;
}
- inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
- inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
- inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
+ inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
+ inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
+ inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit);
}
}
/* Cannot write texture to output registers */
- if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) {
+ if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg = inst->I.DstReg;
- inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg = inst->U.I.DstReg;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
}
/* Cannot read texture coordinate from constants file */
- if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
+ if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- reset_srcreg(&inst->I.SrcReg[0]);
- inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
}
- return GL_TRUE;
+ return 1;
+}
+
+/**
+ * Rewrite IF instructions to use the ALU result special register.
+ */
+int r500_transform_IF(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_IF)
+ return 0;
+
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.WriteMask = 0;
+ inst_mov->U.I.WriteALUResult = RC_ALURESULT_W;
+ inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X);
+
+ inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+ inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].Negate = 0;
+
+ return 1;
}
-GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
+static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
- GLuint relevant;
+ unsigned int relevant;
int i;
- if (opcode == OPCODE_TEX ||
- opcode == OPCODE_TXB ||
- opcode == OPCODE_TXP ||
- opcode == OPCODE_KIL) {
+ if (opcode == RC_OPCODE_TEX ||
+ opcode == RC_OPCODE_TXB ||
+ opcode == RC_OPCODE_TXP ||
+ opcode == RC_OPCODE_KIL) {
if (reg.Abs)
- return GL_FALSE;
+ return 0;
- if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE))
- return GL_FALSE;
+ if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
+ return 0;
if (reg.Negate)
- reg.Negate ^= NEGATE_XYZW;
+ reg.Negate ^= RC_MASK_XYZW;
for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(reg.Swizzle, i);
- if (swz == SWIZZLE_NIL) {
+ unsigned int swz = GET_SWZ(reg.Swizzle, i);
+ if (swz == RC_SWIZZLE_UNUSED) {
reg.Negate &= ~(1 << i);
continue;
}
if (swz >= 4)
- return GL_FALSE;
+ return 0;
}
if (reg.Negate)
- return GL_FALSE;
+ return 0;
- return GL_TRUE;
- } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
+ return 1;
+ } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
* if it doesn't fit perfectly into a .xyzw case... */
- if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate)
- return GL_TRUE;
+ if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
+ return 1;
- return GL_FALSE;
+ return 0;
} else {
/* ALU instructions support almost everything */
if (reg.Abs)
- return GL_TRUE;
+ return 1;
relevant = 0;
for(i = 0; i < 3; ++i) {
- GLuint swz = GET_SWZ(reg.Swizzle, i);
- if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
+ unsigned int swz = GET_SWZ(reg.Swizzle, i);
+ if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
relevant |= 1 << i;
}
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
- return GL_FALSE;
+ return 0;
- return GL_TRUE;
+ return 1;
}
}
/**
- * Implement a MOV with a potentially non-native swizzle.
+ * Split source register access.
*
* The only thing we *cannot* do in an ALU instruction is per-component
- * negation. Therefore, we split the MOV into two instructions when necessary.
+ * negation.
*/
-void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
+static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
+ struct rc_swizzle_split * split)
{
- GLuint negatebase[2] = { 0, 0 };
+ unsigned int negatebase[2] = { 0, 0 };
int i;
for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(src.Swizzle, i);
- if (swz == SWIZZLE_NIL)
+ unsigned int swz = GET_SWZ(src.Swizzle, i);
+ if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
continue;
negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
}
+ split->NumPhases = 0;
+
for(i = 0; i <= 1; ++i) {
if (!negatebase[i])
continue;
- struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
- inst->I.Opcode = OPCODE_MOV;
- inst->I.DstReg = dst;
- inst->I.DstReg.WriteMask = negatebase[i];
- inst->I.SrcReg[0] = src;
- inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
+ split->Phase[split->NumPhases++] = negatebase[i];
}
}
+struct rc_swizzle_caps r500_swizzle_caps = {
+ .IsNative = r500_swizzle_is_native,
+ .Split = r500_swizzle_split
+};
static char *toswiz(int swiz_val) {
switch(swiz_val) {
diff --git a/r300/compiler/r500_fragprog.h b/r300/compiler/r500_fragprog.h
index 9091f65..0918cdf 100644
--- a/r300/compiler/r500_fragprog.h
+++ b/r300/compiler/r500_fragprog.h
@@ -33,21 +33,21 @@
#ifndef __R500_FRAGPROG_H_
#define __R500_FRAGPROG_H_
-#include "shader/prog_parameter.h"
-#include "shader/prog_instruction.h"
-
#include "radeon_compiler.h"
-#include "radeon_nqssadce.h"
+#include "radeon_swizzle.h"
extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
-extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
+extern struct rc_swizzle_caps r500_swizzle_caps;
-extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src);
+extern int r500_transform_TEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data);
-extern GLboolean r500_transform_TEX(
+extern int r500_transform_IF(
struct radeon_compiler * c,
struct rc_instruction * inst,
void* data);
diff --git a/r300/compiler/r500_fragprog_emit.c b/r300/compiler/r500_fragprog_emit.c
index d694725..2942267 100644
--- a/r300/compiler/r500_fragprog_emit.c
+++ b/r300/compiler/r500_fragprog_emit.c
@@ -37,10 +37,6 @@
*
* \author Corbin Simpson <MostAwesomeDude@gmail.com>
*
- * \todo Depth write, WPOS/FOGC inputs
- *
- * \todo FogOption
- *
*/
#include "r500_fragprog.h"
@@ -51,7 +47,6 @@
#define PROG_CODE \
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
struct r500_fragment_program_code *code = &c->code->code.r500
#define error(fmt, args...) do { \
@@ -60,63 +55,80 @@
} while(0)
-static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode)
+struct branch_info {
+ int If;
+ int Else;
+ int Endif;
+};
+
+struct emit_state {
+ struct radeon_compiler * C;
+ struct r500_fragment_program_code * Code;
+
+ struct branch_info * Branches;
+ unsigned int CurrentBranchDepth;
+ unsigned int BranchesReserved;
+
+ unsigned int MaxBranchDepth;
+};
+
+static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
{
switch(opcode) {
- case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
- case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
- case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
- case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
- case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
- case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
+ case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+ case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+ case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
+ case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
+ case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
+ case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
default:
error("translate_rgb_op(%d): unknown opcode\n", opcode);
/* fall through */
- case OPCODE_NOP:
+ case RC_OPCODE_NOP:
/* fall through */
- case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
- case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
- case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
- case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
+ case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
+ case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
+ case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
+ case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
}
}
-static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode)
+static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
{
switch(opcode) {
- case OPCODE_CMP: return R500_ALPHA_OP_CMP;
- case OPCODE_COS: return R500_ALPHA_OP_COS;
- case OPCODE_DDX: return R500_ALPHA_OP_MDH;
- case OPCODE_DDY: return R500_ALPHA_OP_MDV;
- case OPCODE_DP3: return R500_ALPHA_OP_DP;
- case OPCODE_DP4: return R500_ALPHA_OP_DP;
- case OPCODE_EX2: return R500_ALPHA_OP_EX2;
- case OPCODE_FRC: return R500_ALPHA_OP_FRC;
- case OPCODE_LG2: return R500_ALPHA_OP_LN2;
+ case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
+ case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
+ case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
+ case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
+ case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
+ case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
+ case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
+ case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
+ case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
default:
error("translate_alpha_op(%d): unknown opcode\n", opcode);
/* fall through */
- case OPCODE_NOP:
+ case RC_OPCODE_NOP:
/* fall through */
- case OPCODE_MAD: return R500_ALPHA_OP_MAD;
- case OPCODE_MAX: return R500_ALPHA_OP_MAX;
- case OPCODE_MIN: return R500_ALPHA_OP_MIN;
- case OPCODE_RCP: return R500_ALPHA_OP_RCP;
- case OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
- case OPCODE_SIN: return R500_ALPHA_OP_SIN;
+ case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
+ case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
+ case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
+ case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
+ case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
+ case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
}
}
-static GLuint fix_hw_swizzle(GLuint swz)
+static unsigned int fix_hw_swizzle(unsigned int swz)
{
if (swz == 5) swz = 6;
- if (swz == SWIZZLE_NIL) swz = 4;
+ if (swz == RC_SWIZZLE_UNUSED) swz = 4;
return swz;
}
-static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg)
+static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
{
- GLuint t = inst->RGB.Arg[arg].Source;
+ unsigned int t = inst->RGB.Arg[arg].Source;
int comp;
t |= inst->RGB.Arg[arg].Negate << 11;
t |= inst->RGB.Arg[arg].Abs << 12;
@@ -127,39 +139,57 @@ static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg)
return t;
}
-static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i)
+static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
{
- GLuint t = inst->Alpha.Arg[i].Source;
+ unsigned int t = inst->Alpha.Arg[i].Source;
t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
t |= inst->Alpha.Arg[i].Negate << 5;
t |= inst->Alpha.Arg[i].Abs << 6;
return t;
}
-static void use_temporary(struct r500_fragment_program_code* code, GLuint index)
+static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
+{
+ switch(func) {
+ case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
+ case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
+ case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
+ case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
+ default:
+ rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
+ return 0;
+ }
+}
+
+static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
{
if (index > code->max_temp_idx)
code->max_temp_idx = index;
}
-static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
+static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
{
- if (!src.Constant)
+ if (src.File == RC_FILE_CONSTANT) {
+ return src.Index | 0x100;
+ } else if (src.File == RC_FILE_TEMPORARY) {
use_temporary(code, src.Index);
- return src.Index | src.Constant << 8;
+ return src.Index;
+ }
+
+ return 0;
}
/**
* Emit a paired ALU instruction.
*/
-static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
+static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
{
PROG_CODE;
if (code->inst_end >= 511) {
error("emit_alu: Too many instructions");
- return GL_FALSE;
+ return;
}
int ip = ++code->inst_end;
@@ -167,17 +197,22 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
- if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
+ if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
- else
+ if (inst->WriteALUResult) {
+ error("%s: cannot write output and ALU result at the same time");
+ return;
+ }
+ } else {
code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+ }
code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
if (inst->Alpha.DepthWriteMask) {
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
- c->code->writes_depth = GL_TRUE;
+ c->code->writes_depth = 1;
}
code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
@@ -206,12 +241,21 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
- return GL_TRUE;
+ if (inst->WriteALUResult) {
+ code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
+
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
+ else
+ code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
+
+ code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
+ }
}
-static GLuint translate_strq_swizzle(GLuint swizzle)
+static unsigned int translate_strq_swizzle(unsigned int swizzle)
{
- GLuint swiz = 0;
+ unsigned int swiz = 0;
int i;
for (i = 0; i < 4; i++)
swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
@@ -221,67 +265,194 @@ static GLuint translate_strq_swizzle(GLuint swizzle)
/**
* Emit a single TEX instruction
*/
-static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst)
+static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
{
PROG_CODE;
if (code->inst_end >= 511) {
error("emit_tex: Too many instructions");
- return GL_FALSE;
+ return 0;
}
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_TEX
- | (inst->WriteMask << 11)
+ | (inst->DstReg.WriteMask << 11)
| R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
| R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
- if (inst->TexSrcTarget == TEXTURE_RECT_INDEX)
- code->inst[ip].inst1 |= R500_TEX_UNSCALED;
+ if (inst->TexSrcTarget == RC_TEXTURE_RECT)
+ code->inst[ip].inst1 |= R500_TEX_UNSCALED;
switch (inst->Opcode) {
- case RADEON_OPCODE_KIL:
+ case RC_OPCODE_KIL:
code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
break;
- case RADEON_OPCODE_TEX:
+ case RC_OPCODE_TEX:
code->inst[ip].inst1 |= R500_TEX_INST_LD;
break;
- case RADEON_OPCODE_TXB:
+ case RC_OPCODE_TXB:
code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
break;
- case RADEON_OPCODE_TXP:
+ case RC_OPCODE_TXP:
code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
break;
default:
error("emit_tex can't handle opcode %x\n", inst->Opcode);
}
- code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex)
- | (translate_strq_swizzle(inst->SrcSwizzle) << 8)
- | R500_TEX_DST_ADDR(inst->DestIndex)
+ use_temporary(code, inst->SrcReg[0].Index);
+ if (inst->Opcode != RC_OPCODE_KIL)
+ use_temporary(code, inst->DstReg.Index);
+
+ code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
+ | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
+ | R500_TEX_DST_ADDR(inst->DstReg.Index)
| R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
| R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
- return GL_TRUE;
+ return 1;
}
-static const struct radeon_pair_handler pair_handler = {
- .EmitPaired = emit_paired,
- .EmitTex = emit_tex,
- .MaxHwTemps = 128
-};
+static void grow_branches(struct emit_state * s)
+{
+ unsigned int newreserved = s->BranchesReserved * 2;
+ struct branch_info * newbranches;
+
+ if (!newreserved)
+ newreserved = 4;
+
+ newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info));
+ memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info));
+
+ s->Branches = newbranches;
+ s->BranchesReserved = newreserved;
+}
+
+static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
+{
+ if (s->Code->inst_end >= 511) {
+ rc_error(s->C, "emit_tex: Too many instructions");
+ return;
+ }
+
+ unsigned int newip = ++s->Code->inst_end;
+
+ s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
+
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ if (s->CurrentBranchDepth >= 32) {
+ rc_error(s->C, "Branch depth exceeds hardware limit");
+ return;
+ }
+
+ if (s->CurrentBranchDepth >= s->BranchesReserved)
+ grow_branches(s);
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++];
+ branch->If = newip;
+ branch->Else = -1;
+ branch->Endif = -1;
+
+ if (s->CurrentBranchDepth > s->MaxBranchDepth)
+ s->MaxBranchDepth = s->CurrentBranchDepth;
+
+ /* actual instruction is filled in at ENDIF time */
+ } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) {
+ if (!s->CurrentBranchDepth) {
+ rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+ return;
+ }
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+ branch->Else = newip;
+
+ /* actual instruction is filled in at ENDIF time */
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ if (!s->CurrentBranchDepth) {
+ rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+ return;
+ }
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+ branch->Endif = newip;
+
+ s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
+ | R500_FC_B_OP0_INCR /* increment branch counter if stay */
+ ;
+
+ if (branch->Else >= 0) {
+ /* increment branch counter also if jump */
+ s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
+ s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
+
+ s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_B_ELSE /* all active pixels want to jump */
+ | R500_FC_B_OP0_NONE /* no counter op if stay */
+ | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
+ | R500_FC_B_POP_CNT(1)
+ ;
+ s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+ } else {
+ /* don't touch branch counter on jump */
+ s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
+ s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+ }
+
+ s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+ | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+ | R500_FC_B_OP1_NONE /* no branch counter if stay */
+ | R500_FC_B_POP_CNT(1)
+ ;
+ s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+
+ s->CurrentBranchDepth--;
+ } else {
+ rc_error(s->C, "%s: unknown opcode %i\n", __FUNCTION__, inst->U.I.Opcode);
+ }
+}
void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
{
+ struct emit_state s;
struct r500_fragment_program_code *code = &compiler->code->code.r500;
+ struct rc_instruction * inst;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &compiler->Base;
+ s.Code = code;
- _mesa_bzero(code, sizeof(*code));
+ memset(code, 0, sizeof(*code));
code->max_temp_idx = 1;
code->inst_end = -1;
- radeonPairProgram(compiler, &pair_handler, compiler);
+ for (inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+ inst = inst->Next) {
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->IsFlowControl) {
+ emit_flowcontrol(&s, inst);
+ } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+ continue;
+ } else {
+ emit_tex(compiler, &inst->U.I);
+ }
+ } else {
+ emit_paired(compiler, &inst->U.P);
+ }
+ }
+
+ if (code->max_temp_idx >= 128)
+ rc_error(&compiler->Base, "Too many hardware temporaries used");
+
if (compiler->Base.Error)
return;
@@ -296,4 +467,11 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
+
+ if (s.MaxBranchDepth >= 4) {
+ if (code->max_temp_idx < 1)
+ code->max_temp_idx = 1;
+
+ code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
+ }
}
diff --git a/r300/compiler/radeon_code.c b/r300/compiler/radeon_code.c
index a9dedf7..853b2be 100644
--- a/r300/compiler/radeon_code.c
+++ b/r300/compiler/radeon_code.c
@@ -25,11 +25,13 @@
*
*/
-#include "main/mtypes.h"
-#include "shader/prog_instruction.h"
-
#include "radeon_code.h"
+#include <stdlib.h>
+#include <string.h>
+
+#include "radeon_program.h"
+
void rc_constants_init(struct rc_constant_list * c)
{
memset(c, 0, sizeof(*c));
@@ -138,13 +140,13 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
unsigned index;
int free_index = -1;
struct rc_constant constant;
- unsigned comp;
for(index = 0; index < c->Count; ++index) {
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+ unsigned comp;
for(comp = 0; comp < c->Constants[index].Size; ++comp) {
if (c->Constants[index].u.Immediate[comp] == data) {
- *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp);
return index;
}
}
@@ -155,9 +157,9 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
}
if (free_index >= 0) {
- comp = c->Constants[free_index].Size++;
+ unsigned comp = c->Constants[free_index].Size++;
c->Constants[free_index].u.Immediate[comp] = data;
- *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp);
return free_index;
}
@@ -165,7 +167,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
constant.Type = RC_CONSTANT_IMMEDIATE;
constant.Size = 1;
constant.u.Immediate[0] = data;
- *swizzle = SWIZZLE_XXXX;
+ *swizzle = RC_SWIZZLE_XXXX;
return rc_constants_add(c, &constant);
}
diff --git a/r300/compiler/radeon_code.h b/r300/compiler/radeon_code.h
index 3e88554..902b7cf 100644
--- a/r300/compiler/radeon_code.h
+++ b/r300/compiler/radeon_code.h
@@ -89,6 +89,23 @@ unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const floa
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
/**
+ * Compare functions.
+ *
+ * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
+ * the correct GL compare function.
+ */
+typedef enum {
+ RC_COMPARE_FUNC_NEVER = 0,
+ RC_COMPARE_FUNC_LESS,
+ RC_COMPARE_FUNC_EQUAL,
+ RC_COMPARE_FUNC_LEQUAL,
+ RC_COMPARE_FUNC_GREATER,
+ RC_COMPARE_FUNC_NOTEQUAL,
+ RC_COMPARE_FUNC_GEQUAL,
+ RC_COMPARE_FUNC_ALWAYS
+} rc_compare_func;
+
+/**
* Stores state that influences the compilation of a fragment program.
*/
struct r300_fragment_program_external_state {
@@ -105,10 +122,12 @@ struct r300_fragment_program_external_state {
/**
* If the sampler is used as a shadow sampler,
- * this field is (texture_compare_func - GL_NEVER).
- * [e.g. if compare function is GL_LEQUAL, this field is 3]
+ * this field specifies the compare function.
+ *
+ * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
*
* Otherwise, this field is 0.
+ * \sa rc_compare_func
*/
unsigned texture_compare_func : 3;
} unit[16];
@@ -163,6 +182,8 @@ struct r500_fragment_program_code {
int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
int max_temp_idx;
+
+ uint32_t us_fc_ctrl;
};
struct rX00_fragment_program_code {
diff --git a/r300/compiler/radeon_compiler.c b/r300/compiler/radeon_compiler.c
index da950d5..c0e7a7f 100644
--- a/r300/compiler/radeon_compiler.c
+++ b/r300/compiler/radeon_compiler.c
@@ -23,6 +23,8 @@
#include "radeon_compiler.h"
#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
#include "radeon_program.h"
@@ -34,7 +36,7 @@ void rc_init(struct radeon_compiler * c)
memory_pool_init(&c->Pool);
c->Program.Instructions.Prev = &c->Program.Instructions;
c->Program.Instructions.Next = &c->Program.Instructions;
- c->Program.Instructions.I.Opcode = OPCODE_END;
+ c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
}
void rc_destroy(struct radeon_compiler * c)
@@ -60,7 +62,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...)
{
va_list ap;
- c->Error = GL_TRUE;
+ c->Error = 1;
if (!c->ErrorMsg) {
/* Only remember the first error */
@@ -91,28 +93,63 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...)
}
}
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
+{
+ rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
+ return 1;
+}
+
+/**
+ * Recompute c->Program.InputsRead and c->Program.OutputsWritten
+ * based on which inputs and outputs are actually referenced
+ * in program instructions.
+ */
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
+{
+ struct rc_instruction *inst;
+
+ c->Program.InputsRead = 0;
+ c->Program.OutputsWritten = 0;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
+ {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ int i;
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
+ c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
+ c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
+ }
+ }
+}
+
/**
* Rewrite the program such that everything that source the given input
* register will source new_input instead.
*/
-void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input)
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
{
struct rc_instruction * inst;
c->Program.InputsRead &= ~(1 << input);
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
- const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
- for(i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) {
- inst->I.SrcReg[i].File = new_input.File;
- inst->I.SrcReg[i].Index = new_input.Index;
- inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle);
- if (!inst->I.SrcReg[i].Abs) {
- inst->I.SrcReg[i].Negate ^= new_input.Negate;
- inst->I.SrcReg[i].Abs = new_input.Abs;
+ for(i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
+ inst->U.I.SrcReg[i].File = new_input.File;
+ inst->U.I.SrcReg[i].Index = new_input.Index;
+ inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
+ if (!inst->U.I.SrcReg[i].Abs) {
+ inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
+ inst->U.I.SrcReg[i].Abs = new_input.Abs;
}
c->Program.InputsRead |= 1 << new_input.Index;
@@ -134,12 +171,12 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou
c->Program.OutputsWritten &= ~(1 << output);
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
- const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) {
- inst->I.DstReg.Index = new_output;
- inst->I.DstReg.WriteMask &= writemask;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+ inst->U.I.DstReg.Index = new_output;
+ inst->U.I.DstReg.WriteMask &= writemask;
c->Program.OutputsWritten |= 1 << new_output;
}
@@ -157,33 +194,33 @@ void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_ou
struct rc_instruction * inst;
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
- const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) {
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = tempreg;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = tempreg;
}
}
}
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
- inst->I.Opcode = OPCODE_MOV;
- inst->I.DstReg.File = PROGRAM_OUTPUT;
- inst->I.DstReg.Index = output;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = output;
- inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[0].Index = tempreg;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = tempreg;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
- inst->I.Opcode = OPCODE_MOV;
- inst->I.DstReg.File = PROGRAM_OUTPUT;
- inst->I.DstReg.Index = dup_output;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = dup_output;
- inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[0].Index = tempreg;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = tempreg;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
c->Program.OutputsWritten |= 1 << dup_output;
}
@@ -201,60 +238,60 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig
/* perspective divide */
struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
- inst_rcp->I.Opcode = OPCODE_RCP;
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
- inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_rcp->I.DstReg.Index = tempregi;
- inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = tempregi;
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
- inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT;
- inst_rcp->I.SrcReg[0].Index = new_input;
- inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
+ inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
+ inst_rcp->U.I.SrcReg[0].Index = new_input;
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp);
- inst_mul->I.Opcode = OPCODE_MUL;
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
- inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mul->I.DstReg.Index = tempregi;
- inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ;
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = tempregi;
+ inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
- inst_mul->I.SrcReg[0].File = PROGRAM_INPUT;
- inst_mul->I.SrcReg[0].Index = new_input;
+ inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
+ inst_mul->U.I.SrcReg[0].Index = new_input;
- inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY;
- inst_mul->I.SrcReg[1].Index = tempregi;
- inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.SrcReg[1].Index = tempregi;
+ inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
/* viewport transformation */
struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul);
- inst_mad->I.Opcode = OPCODE_MAD;
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
- inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mad->I.DstReg.Index = tempregi;
- inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = tempregi;
+ inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
- inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst_mad->I.SrcReg[0].Index = tempregi;
- inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+ inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[0].Index = tempregi;
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO);
- inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR;
- inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
- inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO);
- inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR;
- inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index;
- inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+ inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[2].Index = inst_mad->U.I.SrcReg[1].Index;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO);
struct rc_instruction * inst;
for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
- const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
- for(i = 0; i < numsrcs; i++) {
- if (inst->I.SrcReg[i].File == PROGRAM_INPUT &&
- inst->I.SrcReg[i].Index == wpos) {
- inst->I.SrcReg[i].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[i].Index = tempregi;
+ for(i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+ inst->U.I.SrcReg[i].Index == wpos) {
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = tempregi;
}
}
}
diff --git a/r300/compiler/radeon_compiler.h b/r300/compiler/radeon_compiler.h
index e63ab88..87a732c 100644
--- a/r300/compiler/radeon_compiler.h
+++ b/r300/compiler/radeon_compiler.h
@@ -23,35 +23,11 @@
#ifndef RADEON_COMPILER_H
#define RADEON_COMPILER_H
-#include "main/mtypes.h"
-#include "shader/prog_instruction.h"
-
#include "memory_pool.h"
#include "radeon_code.h"
+#include "radeon_program.h"
-
-struct rc_instruction {
- struct rc_instruction * Prev;
- struct rc_instruction * Next;
- struct prog_instruction I;
-};
-
-struct rc_program {
- /**
- * Instructions.Next points to the first instruction,
- * Instructions.Prev points to the last instruction.
- */
- struct rc_instruction Instructions;
-
- /* Long term, we should probably remove InputsRead & OutputsWritten,
- * since updating dependent state can be fragile, and they aren't
- * actually used very often. */
- uint32_t InputsRead;
- uint32_t OutputsWritten;
- uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
-
- struct rc_constant_list Constants;
-};
+struct rc_swizzle_caps;
struct radeon_compiler {
struct memory_pool Pool;
@@ -59,6 +35,14 @@ struct radeon_compiler {
unsigned Debug:1;
unsigned Error:1;
char * ErrorMsg;
+
+ /**
+ * Variables used internally, not be touched by callers
+ * of the compiler
+ */
+ /*@{*/
+ struct rc_swizzle_caps * SwizzleCaps;
+ /*@}*/
};
void rc_init(struct radeon_compiler * c);
@@ -67,11 +51,26 @@ void rc_destroy(struct radeon_compiler * c);
void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
void rc_error(struct radeon_compiler * c, const char * fmt, ...);
-void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
+
+/**
+ * This macro acts like an if-statement that can be used to implement
+ * non-aborting assertions in the compiler.
+ *
+ * It checks whether \p cond is true. If not, an internal compiler error is
+ * flagged and the if-clause is run.
+ *
+ * A typical use-case would be:
+ *
+ * if (rc_assert(c, condition-that-must-be-true))
+ * return;
+ */
+#define rc_assert(c, cond) \
+ (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
void rc_calculate_inputs_outputs(struct radeon_compiler * c);
-void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input);
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input);
@@ -97,7 +96,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
struct r300_vertex_program_compiler {
struct radeon_compiler Base;
struct r300_vertex_program_code *code;
- GLbitfield RequiredOutputs;
+ uint32_t RequiredOutputs;
void * UserData;
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
diff --git a/r300/compiler/radeon_dataflow.c b/r300/compiler/radeon_dataflow.c
new file mode 100644
index 0000000..a003e77
--- /dev/null
+++ b/r300/compiler/radeon_dataflow.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_program.h"
+
+
+static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+{
+ struct rc_sub_instruction * inst = &fullinst->U.I;
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+ unsigned int src;
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ unsigned int refmask = 0, chan;
+
+ if (inst->SrcReg[src].File == RC_FILE_NONE)
+ return;
+
+ for(chan = 0; chan < 4; ++chan)
+ refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan);
+
+ refmask &= RC_MASK_XYZW;
+
+ for(chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(refmask, chan)) {
+ cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan);
+ }
+ }
+
+ if (refmask && inst->SrcReg[src].RelAddr)
+ cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
+ }
+}
+
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+ unsigned int refmasks[3] = { 0, 0, 0 };
+ unsigned int arg, src;
+
+ if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+ for(arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ unsigned int chan;
+
+ for(chan = 0; chan < 3; ++chan) {
+ unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
+ if (swz < 4)
+ refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz;
+ }
+ }
+ }
+
+ if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+ for(arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ if (inst->Alpha.Arg[arg].Swizzle < 4)
+ refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle;
+ }
+ }
+
+ for(src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used) {
+ unsigned int chan;
+ for(chan = 0; chan < 3; ++chan) {
+ if (GET_BIT(refmasks[src], chan))
+ cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan);
+ }
+ }
+
+ if (inst->Alpha.Src[src].Used) {
+ if (GET_BIT(refmasks[src], 3))
+ cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3);
+ }
+ }
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ reads_normal(inst, cb, userdata);
+ } else {
+ reads_pair(inst, cb, userdata);
+ }
+}
+
+
+
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+{
+ struct rc_sub_instruction * inst = &fullinst->U.I;
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ if (opcode->HasDstReg) {
+ unsigned int chan;
+
+ for(chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(inst->DstReg.WriteMask, chan))
+ cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan);
+ }
+ }
+
+ if (inst->WriteALUResult)
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+}
+
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+ unsigned int chan;
+
+ for(chan = 0; chan < 3; ++chan) {
+ if (GET_BIT(inst->RGB.WriteMask, chan))
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan);
+ }
+
+ if (inst->Alpha.WriteMask)
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3);
+
+ if (inst->WriteALUResult)
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ writes_normal(inst, cb, userdata);
+ } else {
+ writes_pair(inst, cb, userdata);
+ }
+}
diff --git a/r300/compiler/radeon_dataflow.h b/r300/compiler/radeon_dataflow.h
new file mode 100644
index 0000000..5aa4cb6
--- /dev/null
+++ b/r300/compiler/radeon_dataflow.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_DATAFLOW_H
+#define RADEON_DATAFLOW_H
+
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_swizzle_caps;
+
+
+/**
+ * Help analyze the register accesses of instructions.
+ */
+/*@{*/
+typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan);
+void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
+void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
+/*@}*/
+
+
+/**
+ * Compiler passes based on dataflow analysis.
+ */
+/*@{*/
+typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
+ void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
+void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata);
+void rc_dataflow_swizzles(struct radeon_compiler * c);
+/*@}*/
+
+#endif /* RADEON_DATAFLOW_H */
diff --git a/r300/compiler/radeon_dataflow_deadcode.c b/r300/compiler/radeon_dataflow_deadcode.c
new file mode 100644
index 0000000..d78efa1
--- /dev/null
+++ b/r300/compiler/radeon_dataflow_deadcode.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+struct updatemask_state {
+ unsigned char Output[RC_REGISTER_MAX_INDEX];
+ unsigned char Temporary[RC_REGISTER_MAX_INDEX];
+ unsigned char Address;
+ unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
+};
+
+struct instruction_state {
+ unsigned char WriteMask:4;
+ unsigned char WriteALUResult:1;
+ unsigned char SrcReg[3];
+};
+
+struct branchinfo {
+ unsigned int HaveElse:1;
+
+ struct updatemask_state StoreEndif;
+ struct updatemask_state StoreElse;
+};
+
+struct deadcode_state {
+ struct radeon_compiler * C;
+ struct instruction_state * Instructions;
+
+ struct updatemask_state R;
+
+ struct branchinfo * BranchStack;
+ unsigned int BranchStackSize;
+ unsigned int BranchStackReserved;
+};
+
+
+static void or_updatemasks(
+ struct updatemask_state * dst,
+ struct updatemask_state * a,
+ struct updatemask_state * b)
+{
+ unsigned int i;
+
+ for(i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
+ dst->Output[i] = a->Output[i] | b->Output[i];
+ dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
+ }
+
+ for(i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
+ dst->Special[i] = a->Special[i] | b->Special[i];
+
+ dst->Address = a->Address | b->Address;
+}
+
+static void push_branch(struct deadcode_state * s)
+{
+ if (s->BranchStackSize >= s->BranchStackReserved) {
+ unsigned int new_reserve = 2 * s->BranchStackReserved;
+ struct branchinfo * new_stack;
+
+ if (!new_reserve)
+ new_reserve = 4;
+
+ new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct branchinfo));
+ memcpy(new_stack, s->BranchStack, s->BranchStackSize * sizeof(struct branchinfo));
+
+ s->BranchStack = new_stack;
+ s->BranchStackReserved = new_reserve;
+ }
+
+ struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++];
+ branch->HaveElse = 0;
+ memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
+}
+
+static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
+{
+ if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
+ if (index >= RC_REGISTER_MAX_INDEX) {
+ rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
+ return 0;
+ }
+
+ if (file == RC_FILE_OUTPUT)
+ return &s->R.Output[index];
+ else
+ return &s->R.Temporary[index];
+ } else if (file == RC_FILE_ADDRESS) {
+ return &s->R.Address;
+ } else if (file == RC_FILE_SPECIAL) {
+ if (index >= RC_NUM_SPECIAL_REGISTERS) {
+ rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
+ return 0;
+ }
+
+ return &s->R.Special[index];
+ }
+
+ return 0;
+}
+
+static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned char * pused = get_used_ptr(s, file, index);
+ if (pused)
+ *pused |= mask;
+}
+
+static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ struct instruction_state * insts = &s->Instructions[inst->IP];
+ unsigned int usedmask = 0, src;
+
+ if (opcode->HasDstReg) {
+ unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
+ if (pused) {
+ usedmask = *pused & inst->U.I.DstReg.WriteMask;
+ *pused &= ~usedmask;
+ }
+ }
+
+ insts->WriteMask |= usedmask;
+
+ if (inst->U.I.WriteALUResult) {
+ unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
+ if (pused && *pused) {
+ if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+ usedmask |= RC_MASK_X;
+ else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+ usedmask |= RC_MASK_W;
+
+ *pused = 0;
+ insts->WriteALUResult = 1;
+ }
+ }
+
+ unsigned int srcmasks[3];
+ rc_compute_sources_for_writemask(opcode, usedmask, srcmasks);
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ unsigned int refmask = 0, chan;
+ unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
+ insts->SrcReg[src] |= newsrcmask;
+
+ for(chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(newsrcmask, chan))
+ refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+ }
+
+ /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
+ refmask &= RC_MASK_XYZW;
+
+ if (!refmask)
+ continue;
+
+ mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
+
+ if (inst->U.I.SrcReg[src].RelAddr)
+ mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
+ }
+}
+
+static void mark_output_use(void * data, unsigned int index, unsigned int mask)
+{
+ struct deadcode_state * s = data;
+
+ mark_used(s, RC_FILE_OUTPUT, index, mask);
+}
+
+void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata)
+{
+ struct deadcode_state s;
+ unsigned int nr_instructions;
+ struct rc_instruction * inst;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+
+ nr_instructions = rc_recompute_ips(c);
+ s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
+ memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
+
+ dce(userdata, &s, &mark_output_use);
+
+ for(inst = c->Program.Instructions.Prev;
+ inst != &c->Program.Instructions;
+ inst = inst->Prev) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->IsFlowControl) {
+ if (opcode->Opcode == RC_OPCODE_ENDIF) {
+ push_branch(&s);
+ } else {
+ if (s.BranchStackSize) {
+ struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
+
+ if (opcode->Opcode == RC_OPCODE_IF) {
+ or_updatemasks(&s.R,
+ &s.R,
+ branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
+
+ s.BranchStackSize--;
+ } else if (opcode->Opcode == RC_OPCODE_ELSE) {
+ if (branch->HaveElse) {
+ rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
+ } else {
+ memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
+ memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
+ branch->HaveElse = 1;
+ }
+ } else {
+ rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
+ }
+ } else {
+ rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__);
+ }
+ }
+ }
+
+ update_instruction(&s, inst);
+ }
+
+ unsigned int ip = 0;
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next, ++ip) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);\
+ int dead = 1;
+ unsigned int src, chan;
+
+ if (!opcode->HasDstReg) {
+ dead = 0;
+ } else {
+ inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
+ if (s.Instructions[ip].WriteMask)
+ dead = 0;
+
+ if (s.Instructions[ip].WriteALUResult)
+ dead = 0;
+ else
+ inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
+ }
+
+ if (dead) {
+ struct rc_instruction * todelete = inst;
+ inst = inst->Prev;
+ rc_remove_instruction(todelete);
+ continue;
+ }
+
+ unsigned int srcmasks[3];
+ unsigned int usemask = s.Instructions[ip].WriteMask;
+
+ if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+ usemask |= RC_MASK_X;
+ else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+ usemask |= RC_MASK_W;
+
+ rc_compute_sources_for_writemask(opcode, usemask, srcmasks);
+
+ for(src = 0; src < 3; ++src) {
+ for(chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(srcmasks[src], chan))
+ SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ }
+ }
+ }
+
+ rc_calculate_inputs_outputs(c);
+}
diff --git a/r300/compiler/radeon_dataflow_swizzles.c b/r300/compiler/radeon_dataflow_swizzles.c
new file mode 100644
index 0000000..d4ccd35
--- /dev/null
+++ b/r300/compiler/radeon_dataflow_swizzles.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static void rewrite_source(struct radeon_compiler * c,
+ struct rc_instruction * inst, unsigned src)
+{
+ struct rc_swizzle_split split;
+ unsigned int tempreg = rc_find_free_temporary(c);
+ unsigned int usemask, chan, phase;
+
+ usemask = 0;
+ for(chan = 0; chan < 4; ++chan) {
+ if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
+ usemask |= 1 << chan;
+ }
+
+ c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
+
+ for(phase = 0; phase < split.NumPhases; ++phase) {
+ struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
+ unsigned int phase_refmask;
+ unsigned int masked_negate;
+
+ mov->U.I.Opcode = RC_OPCODE_MOV;
+ mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ mov->U.I.DstReg.Index = tempreg;
+ mov->U.I.DstReg.WriteMask = split.Phase[phase];
+ mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
+
+ phase_refmask = 0;
+ for(chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(split.Phase[phase], chan))
+ SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ else
+ phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
+ }
+
+ phase_refmask &= RC_MASK_XYZW;
+
+ masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
+ if (masked_negate == 0)
+ mov->U.I.SrcReg[0].Negate = 0;
+ else if (masked_negate == split.Phase[phase])
+ mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+
+ }
+
+ inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[src].Index = tempreg;
+ inst->U.I.SrcReg[src].Swizzle = 0;
+ inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
+ inst->U.I.SrcReg[src].Abs = 0;
+ for(chan = 0; chan < 4; ++chan) {
+ SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
+ GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
+ }
+}
+
+void rc_dataflow_swizzles(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int src;
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+ rewrite_source(c, inst, src);
+ }
+ }
+}
diff --git a/r300/compiler/radeon_nqssadce.c b/r300/compiler/radeon_nqssadce.c
deleted file mode 100644
index aaaa50a..0000000
--- a/r300/compiler/radeon_nqssadce.c
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * @file
- *
- * "Not-quite SSA" and Dead-Code Elimination.
- *
- * @note This code uses SWIZZLE_NIL in a source register to indicate that
- * the corresponding component is ignored by the corresponding instruction.
- */
-
-#include "radeon_nqssadce.h"
-
-#include "radeon_compiler.h"
-
-
-/**
- * Return the @ref register_state for the given register (or 0 for untracked
- * registers, i.e. constants).
- */
-static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
-{
- switch(file) {
- case PROGRAM_TEMPORARY: return &s->Temps[index];
- case PROGRAM_OUTPUT: return &s->Outputs[index];
- case PROGRAM_ADDRESS: return &s->Address;
- default: return 0;
- }
-}
-
-
-/**
- * Left multiplication of a register with a swizzle
- *
- * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
- */
-struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
-{
- struct prog_src_register tmp = srcreg;
- int i;
- tmp.Swizzle = 0;
- tmp.Negate = NEGATE_NONE;
- for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(swizzle, i);
- if (swz < 4) {
- tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
- tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
- } else {
- tmp.Swizzle |= swz << (i*3);
- }
- }
- return tmp;
-}
-
-
-static void track_used_srcreg(struct nqssadce_state* s,
- GLint src, GLuint sourced)
-{
- struct prog_instruction * inst = &s->IP->I;
- int i;
- GLuint deswz_source = 0;
-
- for(i = 0; i < 4; ++i) {
- if (GET_BIT(sourced, i)) {
- GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
- deswz_source |= 1 << swz;
- } else {
- inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
- inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
- }
- }
-
- if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
- struct prog_dst_register dstreg = inst->DstReg;
- dstreg.File = PROGRAM_TEMPORARY;
- dstreg.Index = rc_find_free_temporary(s->Compiler);
- dstreg.WriteMask = sourced;
-
- s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
-
- inst->SrcReg[src].File = PROGRAM_TEMPORARY;
- inst->SrcReg[src].Index = dstreg.Index;
- inst->SrcReg[src].Swizzle = 0;
- inst->SrcReg[src].Negate = NEGATE_NONE;
- inst->SrcReg[src].Abs = 0;
- for(i = 0; i < 4; ++i) {
- if (GET_BIT(sourced, i))
- inst->SrcReg[src].Swizzle |= i << (3*i);
- else
- inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
- }
- deswz_source = sourced;
- }
-
- struct register_state *regstate;
-
- if (inst->SrcReg[src].RelAddr) {
- regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
- if (regstate)
- regstate->Sourced |= WRITEMASK_X;
- } else {
- regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
- if (regstate)
- regstate->Sourced |= deswz_source & 0xf;
- }
-}
-
-static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex)
-{
- int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode);
- int i;
- for(i = 0; i < nsrc; ++i)
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex)
- inst->I.SrcReg[i].Index = newindex;
-}
-
-static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
-{
- GLuint newindex = rc_find_free_temporary(s->Compiler);
- struct rc_instruction * inst;
- for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) {
- if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex)
- inst->I.DstReg.Index = newindex;
- unalias_srcregs(inst, oldindex, newindex);
- }
- unalias_srcregs(s->IP, oldindex, newindex);
-}
-
-
-/**
- * Handle one instruction.
- */
-static void process_instruction(struct nqssadce_state* s)
-{
- struct prog_instruction *inst = &s->IP->I;
- GLuint WriteMask;
-
- if (inst->Opcode == OPCODE_END)
- return;
-
- if (inst->Opcode != OPCODE_KIL) {
- struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
- if (!regstate) {
- rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n",
- inst->DstReg.File, inst->DstReg.Index);
- return;
- }
-
- inst->DstReg.WriteMask &= regstate->Sourced;
- regstate->Sourced &= ~inst->DstReg.WriteMask;
-
- if (inst->DstReg.WriteMask == 0) {
- struct rc_instruction * inst_remove = s->IP;
- s->IP = s->IP->Prev;
- rc_remove_instruction(inst_remove);
- return;
- }
-
- if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
- unalias_temporary(s, inst->DstReg.Index);
- }
-
- WriteMask = inst->DstReg.WriteMask;
-
- switch (inst->Opcode) {
- case OPCODE_ARL:
- case OPCODE_DDX:
- case OPCODE_DDY:
- case OPCODE_FRC:
- case OPCODE_MOV:
- track_used_srcreg(s, 0, WriteMask);
- break;
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
- case OPCODE_SGE:
- case OPCODE_SLT:
- track_used_srcreg(s, 0, WriteMask);
- track_used_srcreg(s, 1, WriteMask);
- break;
- case OPCODE_CMP:
- case OPCODE_MAD:
- track_used_srcreg(s, 0, WriteMask);
- track_used_srcreg(s, 1, WriteMask);
- track_used_srcreg(s, 2, WriteMask);
- break;
- case OPCODE_COS:
- case OPCODE_EX2:
- case OPCODE_LG2:
- case OPCODE_RCP:
- case OPCODE_RSQ:
- case OPCODE_SIN:
- track_used_srcreg(s, 0, 0x1);
- break;
- case OPCODE_DP3:
- track_used_srcreg(s, 0, 0x7);
- track_used_srcreg(s, 1, 0x7);
- break;
- case OPCODE_DP4:
- track_used_srcreg(s, 0, 0xf);
- track_used_srcreg(s, 1, 0xf);
- break;
- case OPCODE_KIL:
- case OPCODE_TEX:
- case OPCODE_TXB:
- case OPCODE_TXP:
- track_used_srcreg(s, 0, 0xf);
- break;
- case OPCODE_DST:
- track_used_srcreg(s, 0, 0x6);
- track_used_srcreg(s, 1, 0xa);
- break;
- case OPCODE_EXP:
- case OPCODE_LOG:
- case OPCODE_POW:
- track_used_srcreg(s, 0, 0x3);
- break;
- case OPCODE_LIT:
- track_used_srcreg(s, 0, 0xb);
- break;
- default:
- rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
- return;
- }
-
- s->IP = s->IP->Prev;
-}
-
-void rc_calculate_inputs_outputs(struct radeon_compiler * c)
-{
- struct rc_instruction *inst;
-
- c->Program.InputsRead = 0;
- c->Program.OutputsWritten = 0;
-
- for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
- {
- int i;
- int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode);
-
- for (i = 0; i < num_src_regs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_INPUT)
- c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index;
- }
-
- if (_mesa_num_inst_dst_regs(inst->I.Opcode)) {
- if (inst->I.DstReg.File == PROGRAM_OUTPUT)
- c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index;
- }
- }
-}
-
-void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data)
-{
- struct nqssadce_state s;
-
- _mesa_bzero(&s, sizeof(s));
- s.Compiler = c;
- s.Descr = descr;
- s.UserData = data;
- s.Descr->Init(&s);
- s.IP = c->Program.Instructions.Prev;
-
- while(s.IP != &c->Program.Instructions && !c->Error)
- process_instruction(&s);
-
- rc_calculate_inputs_outputs(c);
-}
diff --git a/r300/compiler/radeon_nqssadce.h b/r300/compiler/radeon_nqssadce.h
deleted file mode 100644
index b3fc77a..0000000
--- a/r300/compiler/radeon_nqssadce.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __RADEON_PROGRAM_NQSSADCE_H_
-#define __RADEON_PROGRAM_NQSSADCE_H_
-
-#include "radeon_program.h"
-
-struct register_state {
- /**
- * Bitmask indicating which components of the register are sourced
- * by later instructions.
- */
- GLuint Sourced : 4;
-};
-
-/**
- * Maintain state such as which registers are used, which registers are
- * read from, etc.
- */
-struct nqssadce_state {
- struct radeon_compiler *Compiler;
- struct radeon_nqssadce_descr *Descr;
-
- /**
- * All instructions after this instruction pointer have been dealt with.
- */
- struct rc_instruction * IP;
-
- /**
- * Which registers are read by subsequent instructions?
- */
- struct register_state Temps[MAX_PROGRAM_TEMPS];
- struct register_state Outputs[VERT_RESULT_MAX];
- struct register_state Address;
-
- void * UserData;
-};
-
-
-/**
- * This structure contains a description of the hardware in-so-far as
- * it is required for the NqSSA-DCE pass.
- */
-struct radeon_nqssadce_descr {
- /**
- * Fill in which outputs
- */
- void (*Init)(struct nqssadce_state *);
-
- /**
- * Check whether the given swizzle, absolute and negate combination
- * can be implemented natively by the hardware for this opcode.
- */
- GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg);
-
- /**
- * Emit (at the current IP) the instruction MOV dst, src;
- * The transformation will work recursively on the emitted instruction(s).
- */
- void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
-};
-
-void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data);
-struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
-
-#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/r300/compiler/radeon_opcodes.c b/r300/compiler/radeon_opcodes.c
new file mode 100644
index 0000000..9285748
--- /dev/null
+++ b/r300/compiler/radeon_opcodes.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_opcodes.h"
+
+#include "radeon_program_constants.h"
+
+struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
+ {
+ .Opcode = RC_OPCODE_NOP,
+ .Name = "NOP"
+ },
+ {
+ .Opcode = RC_OPCODE_ILLEGAL_OPCODE,
+ .Name = "ILLEGAL OPCODE"
+ },
+ {
+ .Opcode = RC_OPCODE_ABS,
+ .Name = "ABS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ADD,
+ .Name = "ADD",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ARL,
+ .Name = "ARL",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_CMP,
+ .Name = "CMP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_COS,
+ .Name = "COS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DDX,
+ .Name = "DDX",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DDY,
+ .Name = "DDY",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DP3,
+ .Name = "DP3",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DP4,
+ .Name = "DP4",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DPH,
+ .Name = "DPH",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DST,
+ .Name = "DST",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_EX2,
+ .Name = "EX2",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_EXP,
+ .Name = "EXP",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_FLR,
+ .Name = "FLR",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_FRC,
+ .Name = "FRC",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_KIL,
+ .Name = "KIL",
+ .NumSrcRegs = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LG2,
+ .Name = "LG2",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LIT,
+ .Name = "LIT",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LOG,
+ .Name = "LOG",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LRP,
+ .Name = "LRP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MAD,
+ .Name = "MAD",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MAX,
+ .Name = "MAX",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MIN,
+ .Name = "MIN",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MOV,
+ .Name = "MOV",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MUL,
+ .Name = "MUL",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_POW,
+ .Name = "POW",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_RCP,
+ .Name = "RCP",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_RSQ,
+ .Name = "RSQ",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SCS,
+ .Name = "SCS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SEQ,
+ .Name = "SEQ",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SFL,
+ .Name = "SFL",
+ .NumSrcRegs = 0,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SGE,
+ .Name = "SGE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SGT,
+ .Name = "SGT",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SIN,
+ .Name = "SIN",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SLE,
+ .Name = "SLE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SLT,
+ .Name = "SLT",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SNE,
+ .Name = "SNE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SUB,
+ .Name = "SUB",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SWZ,
+ .Name = "SWZ",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_XPD,
+ .Name = "XPD",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TEX,
+ .Name = "TEX",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXB,
+ .Name = "TXB",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXD,
+ .Name = "TXD",
+ .HasTexture = 1,
+ .NumSrcRegs = 3,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXL,
+ .Name = "TXL",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXP,
+ .Name = "TXP",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_IF,
+ .Name = "IF",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ELSE,
+ .Name = "ELSE",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_ENDIF,
+ .Name = "ENDIF",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_REPL_ALPHA,
+ .Name = "REPL_ALPHA",
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_BEGIN_TEX,
+ .Name = "BEGIN_TEX"
+ }
+};
+
+void rc_compute_sources_for_writemask(
+ const struct rc_opcode_info * opcode,
+ unsigned int writemask,
+ unsigned int *srcmasks)
+{
+ unsigned int src;
+
+ srcmasks[0] = 0;
+ srcmasks[1] = 0;
+ srcmasks[2] = 0;
+
+ if (opcode->Opcode == RC_OPCODE_KIL)
+ srcmasks[0] |= RC_MASK_XYZW;
+ else if (opcode->Opcode == RC_OPCODE_IF)
+ srcmasks[0] |= RC_MASK_X;
+
+ if (!writemask)
+ return;
+
+ if (opcode->IsComponentwise) {
+ for(src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= writemask;
+ } else if (opcode->IsStandardScalar) {
+ for(src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= RC_MASK_X;
+ } else {
+ switch(opcode->Opcode) {
+ case RC_OPCODE_ARL:
+ srcmasks[0] |= RC_MASK_X;
+ break;
+ case RC_OPCODE_DP3:
+ srcmasks[0] |= RC_MASK_XYZ;
+ srcmasks[1] |= RC_MASK_XYZ;
+ break;
+ case RC_OPCODE_DP4:
+ srcmasks[0] |= RC_MASK_XYZW;
+ srcmasks[1] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_TEX:
+ case RC_OPCODE_TXB:
+ case RC_OPCODE_TXP:
+ srcmasks[0] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_DST:
+ srcmasks[0] |= 0x6;
+ srcmasks[1] |= 0xa;
+ break;
+ case RC_OPCODE_EXP:
+ case RC_OPCODE_LOG:
+ srcmasks[0] |= RC_MASK_XY;
+ break;
+ case RC_OPCODE_LIT:
+ srcmasks[0] |= 0xb;
+ break;
+ default:
+ break;
+ }
+ }
+}
diff --git a/r300/compiler/radeon_opcodes.h b/r300/compiler/radeon_opcodes.h
new file mode 100644
index 0000000..a3c5b86
--- /dev/null
+++ b/r300/compiler/radeon_opcodes.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_OPCODES_H
+#define RADEON_OPCODES_H
+
+#include <assert.h>
+
+/**
+ * Opcodes understood by the Radeon compiler.
+ */
+typedef enum {
+ RC_OPCODE_NOP = 0,
+ RC_OPCODE_ILLEGAL_OPCODE,
+
+ /** vec4 instruction: dst.c = abs(src0.c); */
+ RC_OPCODE_ABS,
+
+ /** vec4 instruction: dst.c = src0.c + src1.c; */
+ RC_OPCODE_ADD,
+
+ /** special instruction: load address register
+ * dst.x = floor(src.x), where dst must be an address register */
+ RC_OPCODE_ARL,
+
+ /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
+ RC_OPCODE_CMP,
+
+ /** scalar instruction: dst = cos(src0.x) */
+ RC_OPCODE_COS,
+
+ /** special instruction: take vec4 partial derivative in X direction
+ * dst.c = d src0.c / dx */
+ RC_OPCODE_DDX,
+
+ /** special instruction: take vec4 partial derivative in Y direction
+ * dst.c = d src0.c / dy */
+ RC_OPCODE_DDY,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
+ RC_OPCODE_DP3,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
+ RC_OPCODE_DP4,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */
+ RC_OPCODE_DPH,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_DST,
+
+ /** scalar instruction: dst = 2**src0.x */
+ RC_OPCODE_EX2,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_EXP,
+
+ /** vec4 instruction: dst.c = floor(src0.c) */
+ RC_OPCODE_FLR,
+
+ /** vec4 instruction: dst.c = src0.c - floor(src0.c) */
+ RC_OPCODE_FRC,
+
+ /** special instruction: stop execution if any component of src0 is negative */
+ RC_OPCODE_KIL,
+
+ /** scalar instruction: dst = log_2(src0.x) */
+ RC_OPCODE_LG2,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_LIT,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_LOG,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */
+ RC_OPCODE_LRP,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
+ RC_OPCODE_MAD,
+
+ /** vec4 instruction: dst.c = max(src0.c, src1.c) */
+ RC_OPCODE_MAX,
+
+ /** vec4 instruction: dst.c = min(src0.c, src1.c) */
+ RC_OPCODE_MIN,
+
+ /** vec4 instruction: dst.c = src0.c */
+ RC_OPCODE_MOV,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c */
+ RC_OPCODE_MUL,
+
+ /** scalar instruction: dst = src0.x ** src1.x */
+ RC_OPCODE_POW,
+
+ /** scalar instruction: dst = 1 / src0.x */
+ RC_OPCODE_RCP,
+
+ /** scalar instruction: dst = 1 / sqrt(src0.x) */
+ RC_OPCODE_RSQ,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_SCS,
+
+ /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SEQ,
+
+ /** vec4 instruction: dst.c = 0.0 */
+ RC_OPCODE_SFL,
+
+ /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SGE,
+
+ /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SGT,
+
+ /** scalar instruction: dst = sin(src0.x) */
+ RC_OPCODE_SIN,
+
+ /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SLE,
+
+ /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SLT,
+
+ /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SNE,
+
+ /** vec4 instruction: dst.c = src0.c - src1.c */
+ RC_OPCODE_SUB,
+
+ /** vec4 instruction: dst.c = src0.c */
+ RC_OPCODE_SWZ,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_XPD,
+
+ RC_OPCODE_TEX,
+ RC_OPCODE_TXB,
+ RC_OPCODE_TXD,
+ RC_OPCODE_TXL,
+ RC_OPCODE_TXP,
+
+ /** branch instruction:
+ * If src0.x != 0.0, continue with the next instruction;
+ * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
+ */
+ RC_OPCODE_IF,
+
+ /** branch instruction: jump to matching RC_OPCODE_ENDIF */
+ RC_OPCODE_ELSE,
+
+ /** branch instruction: has no effect */
+ RC_OPCODE_ENDIF,
+
+ /** special instruction, used in R300-R500 fragment program pair instructions
+ * indicates that the result of the alpha operation shall be replicated
+ * across all other channels */
+ RC_OPCODE_REPL_ALPHA,
+
+ /** special instruction, used in R300-R500 fragment programs
+ * to indicate the start of a block of texture instructions that
+ * can run simultaneously. */
+ RC_OPCODE_BEGIN_TEX,
+
+ MAX_RC_OPCODE
+} rc_opcode;
+
+
+struct rc_opcode_info {
+ rc_opcode Opcode;
+ const char * Name;
+
+ /** true if the instruction reads from a texture.
+ *
+ * \note This is false for the KIL instruction, even though KIL is
+ * a texture instruction from a hardware point of view. */
+ unsigned int HasTexture:1;
+
+ unsigned int NumSrcRegs:2;
+ unsigned int HasDstReg:1;
+
+ /** true if this instruction affects control flow */
+ unsigned int IsFlowControl:1;
+
+ /** true if this is a vector instruction that operates on components in parallel
+ * without any cross-component interaction */
+ unsigned int IsComponentwise:1;
+
+ /** true if this instruction sources only its operands X components
+ * to compute one result which is smeared across all output channels */
+ unsigned int IsStandardScalar:1;
+};
+
+extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
+
+static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
+{
+ assert((unsigned int)opcode < MAX_RC_OPCODE);
+ assert(rc_opcodes[opcode].Opcode == opcode);
+
+ return &rc_opcodes[opcode];
+}
+
+void rc_compute_sources_for_writemask(
+ const struct rc_opcode_info * opcode,
+ unsigned int writemask,
+ unsigned int *srcmasks);
+
+#endif /* RADEON_OPCODES_H */
diff --git a/r300/compiler/radeon_pair_regalloc.c b/r300/compiler/radeon_pair_regalloc.c
new file mode 100644
index 0000000..23c2a5e
--- /dev/null
+++ b/r300/compiler/radeon_pair_regalloc.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct live_intervals {
+ int Start;
+ int End;
+ struct live_intervals * Next;
+};
+
+struct register_info {
+ struct live_intervals Live;
+
+ unsigned int Used:1;
+ unsigned int Allocated:1;
+ rc_register_file File:3;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct hardware_register {
+ struct live_intervals * Used;
+};
+
+struct regalloc_state {
+ struct radeon_compiler * C;
+
+ struct register_info Input[RC_REGISTER_MAX_INDEX];
+ struct register_info Temporary[RC_REGISTER_MAX_INDEX];
+
+ struct hardware_register * HwTemporary;
+ unsigned int NumHwTemporaries;
+};
+
+static void print_live_intervals(struct live_intervals * src)
+{
+ if (!src) {
+ DBG("(null)");
+ return;
+ }
+
+ while(src) {
+ DBG("(%i,%i)", src->Start, src->End);
+ src = src->Next;
+ }
+}
+
+static void add_live_intervals(struct regalloc_state * s,
+ struct live_intervals ** dst, struct live_intervals * src)
+{
+ struct live_intervals ** dst_backup = dst;
+
+ if (VERBOSE) {
+ DBG("add_live_intervals: ");
+ print_live_intervals(*dst);
+ DBG(" to ");
+ print_live_intervals(src);
+ DBG("\n");
+ }
+
+ while(src) {
+ if (*dst && (*dst)->End < src->Start) {
+ dst = &(*dst)->Next;
+ } else if (!*dst || (*dst)->Start > src->End) {
+ struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
+ li->Start = src->Start;
+ li->End = src->End;
+ li->Next = *dst;
+ *dst = li;
+ src = src->Next;
+ } else {
+ if (src->End > (*dst)->End)
+ (*dst)->End = src->End;
+ if (src->Start < (*dst)->Start)
+ (*dst)->Start = src->Start;
+ src = src->Next;
+ }
+ }
+
+ if (VERBOSE) {
+ DBG(" result: ");
+ print_live_intervals(*dst_backup);
+ DBG("\n");
+ }
+}
+
+static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
+{
+ if (VERBOSE) {
+ DBG("overlap_live_intervals: ");
+ print_live_intervals(dst);
+ DBG(" to ");
+ print_live_intervals(src);
+ DBG("\n");
+ }
+
+ while(src && dst) {
+ if (dst->End <= src->Start) {
+ dst = dst->Next;
+ } else if (dst->End <= src->End) {
+ DBG(" overlap\n");
+ return 1;
+ } else if (dst->Start < src->End) {
+ DBG(" overlap\n");
+ return 1;
+ } else {
+ src = src->Next;
+ }
+ }
+
+ DBG(" no overlap\n");
+
+ return 0;
+}
+
+static int try_add_live_intervals(struct regalloc_state * s,
+ struct live_intervals ** dst, struct live_intervals * src)
+{
+ if (overlap_live_intervals(*dst, src))
+ return 0;
+
+ add_live_intervals(s, dst, src);
+ return 1;
+}
+
+static void scan_callback(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct regalloc_state * s = data;
+ struct register_info * reg;
+
+ if (file == RC_FILE_TEMPORARY)
+ reg = &s->Temporary[index];
+ else if (file == RC_FILE_INPUT)
+ reg = &s->Input[index];
+ else
+ return;
+
+ if (!reg->Used) {
+ reg->Used = 1;
+ if (file == RC_FILE_INPUT)
+ reg->Live.Start = -1;
+ else
+ reg->Live.Start = inst->IP;
+ reg->Live.End = inst->IP;
+ } else {
+ if (inst->IP > reg->Live.End)
+ reg->Live.End = inst->IP;
+ }
+}
+
+static void compute_live_intervals(struct regalloc_state * s)
+{
+ struct rc_instruction * inst;
+
+ rc_recompute_ips(s->C);
+
+ for(inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads(inst, scan_callback, s);
+ rc_for_all_writes(inst, scan_callback, s);
+ }
+}
+
+static void rewrite_register(struct regalloc_state * s,
+ rc_register_file * file, unsigned int * index)
+{
+ const struct register_info * reg;
+
+ if (*file == RC_FILE_TEMPORARY)
+ reg = &s->Temporary[*index];
+ else if (*file == RC_FILE_INPUT)
+ reg = &s->Input[*index];
+ else
+ return;
+
+ if (reg->Allocated) {
+ *file = reg->File;
+ *index = reg->Index;
+ }
+}
+
+static void rewrite_normal_instruction(struct regalloc_state * s, struct rc_sub_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+ unsigned int src;
+
+ if (opcode->HasDstReg) {
+ rc_register_file file = inst->DstReg.File;
+ unsigned int index = inst->DstReg.Index;
+
+ rewrite_register(s, &file, &index);
+
+ inst->DstReg.File = file;
+ inst->DstReg.Index = index;
+ }
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ rc_register_file file = inst->SrcReg[src].File;
+ unsigned int index = inst->SrcReg[src].Index;
+
+ rewrite_register(s, &file, &index);
+
+ inst->SrcReg[src].File = file;
+ inst->SrcReg[src].Index = index;
+ }
+}
+
+static void rewrite_pair_instruction(struct regalloc_state * s, struct rc_pair_instruction * inst)
+{
+ unsigned int src;
+
+ if (inst->RGB.WriteMask) {
+ rc_register_file file = RC_FILE_TEMPORARY;
+ unsigned int index = inst->RGB.DestIndex;
+
+ rewrite_register(s, &file, &index);
+
+ inst->RGB.DestIndex = index;
+ }
+
+ if (inst->Alpha.WriteMask) {
+ rc_register_file file = RC_FILE_TEMPORARY;
+ unsigned int index = inst->Alpha.DestIndex;
+
+ rewrite_register(s, &file, &index);
+
+ inst->Alpha.DestIndex = index;
+ }
+
+ for(src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used) {
+ rc_register_file file = inst->RGB.Src[src].File;
+ unsigned int index = inst->RGB.Src[src].Index;
+
+ rewrite_register(s, &file, &index);
+
+ inst->RGB.Src[src].File = file;
+ inst->RGB.Src[src].Index = index;
+ }
+
+ if (inst->Alpha.Src[src].Used) {
+ rc_register_file file = inst->Alpha.Src[src].File;
+ unsigned int index = inst->Alpha.Src[src].Index;
+
+ rewrite_register(s, &file, &index);
+
+ inst->Alpha.Src[src].File = file;
+ inst->Alpha.Src[src].Index = index;
+ }
+ }
+}
+
+static void do_regalloc(struct regalloc_state * s)
+{
+ struct rc_instruction * inst;
+ unsigned int index;
+
+ /* Simple and stupid greedy register allocation */
+ for(index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+ struct register_info * reg = &s->Temporary[index];
+ unsigned int hwreg;
+
+ if (!reg->Used)
+ continue;
+
+ for(hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
+ if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, &reg->Live)) {
+ reg->Allocated = 1;
+ reg->File = RC_FILE_TEMPORARY;
+ reg->Index = hwreg;
+ goto success;
+ }
+ }
+
+ rc_error(s->C, "Ran out of hardware temporaries\n");
+ return;
+
+ success:;
+ }
+
+ /* Rewrite all instructions based on the translation table we built */
+ for(inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ if (inst->Type == RC_INSTRUCTION_NORMAL)
+ rewrite_normal_instruction(s, &inst->U.I);
+ else
+ rewrite_pair_instruction(s, &inst->U.P);
+ }
+}
+
+static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
+{
+ struct regalloc_state * s = data;
+
+ if (!s->Input[input].Used)
+ return;
+
+ add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
+
+ s->Input[input].Allocated = 1;
+ s->Input[input].File = RC_FILE_TEMPORARY;
+ s->Input[input].Index = hwreg;
+
+}
+
+void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps)
+{
+ struct regalloc_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &c->Base;
+ s.NumHwTemporaries = maxtemps;
+ s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register));
+ memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register));
+
+ compute_live_intervals(&s);
+
+ c->AllocateHwInputs(c, &alloc_input, &s);
+
+ do_regalloc(&s);
+}
diff --git a/r300/compiler/radeon_pair_schedule.c b/r300/compiler/radeon_pair_schedule.c
new file mode 100644
index 0000000..2890c00
--- /dev/null
+++ b/r300/compiler/radeon_pair_schedule.c
@@ -0,0 +1,508 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct schedule_instruction {
+ struct rc_instruction * Instruction;
+
+ /** Next instruction in the linked list of ready instructions. */
+ struct schedule_instruction *NextReady;
+
+ /** Values that this instruction reads and writes */
+ struct reg_value * WriteValues[4];
+ struct reg_value * ReadValues[12];
+ unsigned int NumWriteValues:3;
+ unsigned int NumReadValues:4;
+
+ /**
+ * Number of (read and write) dependencies that must be resolved before
+ * this instruction can be scheduled.
+ */
+ unsigned int NumDependencies:5;
+};
+
+
+/**
+ * Used to keep track of which instructions read a value.
+ */
+struct reg_value_reader {
+ struct schedule_instruction *Reader;
+ struct reg_value_reader *Next;
+};
+
+/**
+ * Used to keep track which values are stored in each component of a
+ * RC_FILE_TEMPORARY.
+ */
+struct reg_value {
+ struct schedule_instruction * Writer;
+
+ /**
+ * Unordered linked list of instructions that read from this value.
+ * When this value becomes available, we increase all readers'
+ * dependency count.
+ */
+ struct reg_value_reader *Readers;
+
+ /**
+ * Number of readers of this value. This is decremented each time
+ * a reader of the value is committed.
+ * When the reader cound reaches zero, the dependency count
+ * of the instruction writing \ref Next is decremented.
+ */
+ unsigned int NumReaders;
+
+ struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
+};
+
+struct register_state {
+ struct reg_value * Values[4];
+};
+
+struct schedule_state {
+ struct radeon_compiler * C;
+ struct schedule_instruction * Current;
+
+ struct register_state Temporary[RC_REGISTER_MAX_INDEX];
+
+ /**
+ * Linked lists of instructions that can be scheduled right now,
+ * based on which ALU/TEX resources they require.
+ */
+ /*@{*/
+ struct schedule_instruction *ReadyFullALU;
+ struct schedule_instruction *ReadyRGB;
+ struct schedule_instruction *ReadyAlpha;
+ struct schedule_instruction *ReadyTEX;
+ /*@}*/
+};
+
+static struct reg_value ** get_reg_valuep(struct schedule_state * s,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ if (file != RC_FILE_TEMPORARY)
+ return 0;
+
+ if (index >= RC_REGISTER_MAX_INDEX) {
+ rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
+ return 0;
+ }
+
+ return &s->Temporary[index].Values[chan];
+}
+
+static struct reg_value * get_reg_value(struct schedule_state * s,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+ if (!pv)
+ return 0;
+ return *pv;
+}
+
+static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
+{
+ inst->NextReady = *list;
+ *list = inst;
+}
+
+static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ DBG("%i is now ready\n", sinst->Instruction->IP);
+
+ if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
+ add_inst_to_list(&s->ReadyTEX, sinst);
+ else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
+ add_inst_to_list(&s->ReadyRGB, sinst);
+ else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
+ add_inst_to_list(&s->ReadyAlpha, sinst);
+ else
+ add_inst_to_list(&s->ReadyFullALU, sinst);
+}
+
+static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ assert(sinst->NumDependencies > 0);
+ sinst->NumDependencies--;
+ if (!sinst->NumDependencies)
+ instruction_ready(s, sinst);
+}
+
+static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ unsigned int i;
+
+ DBG("%i: commit\n", sinst->Instruction->IP);
+
+ for(i = 0; i < sinst->NumReadValues; ++i) {
+ struct reg_value * v = sinst->ReadValues[i];
+ assert(v->NumReaders > 0);
+ v->NumReaders--;
+ if (!v->NumReaders) {
+ if (v->Next)
+ decrease_dependencies(s, v->Next->Writer);
+ }
+ }
+
+ for(i = 0; i < sinst->NumWriteValues; ++i) {
+ struct reg_value * v = sinst->WriteValues[i];
+ struct reg_value_reader * r;
+
+ if (v->NumReaders) {
+ for(r = v->Readers; r; r = r->Next) {
+ decrease_dependencies(s, r->Reader);
+ }
+ } else {
+ /* This happens in instruction sequences of the type
+ * OP r.x, ...;
+ * OP r.x, r.x, ...;
+ * See also the subtlety in how instructions that both
+ * read and write the same register are scanned.
+ */
+ if (v->Next)
+ decrease_dependencies(s, v->Next->Writer);
+ }
+ }
+}
+
+/**
+ * Emit all ready texture instructions in a single block.
+ *
+ * Emit as a single block to (hopefully) sample many textures in parallel,
+ * and to avoid hardware indirections on R300.
+ */
+static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
+{
+ struct schedule_instruction *readytex;
+
+ assert(s->ReadyTEX);
+
+ /* Don't let the ready list change under us! */
+ readytex = s->ReadyTEX;
+ s->ReadyTEX = 0;
+
+ /* Node marker for R300 */
+ struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev);
+ inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
+
+ /* Link texture instructions back in */
+ while(readytex) {
+ struct schedule_instruction * tex = readytex;
+ readytex = readytex->NextReady;
+
+ rc_insert_instruction(before->Prev, tex->Instruction);
+ commit_instruction(s, tex);
+ }
+}
+
+
+static int destructive_merge_instructions(
+ struct rc_pair_instruction * rgb,
+ struct rc_pair_instruction * alpha)
+{
+ unsigned int arg;
+
+ assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
+ assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
+
+ /* Copy alpha args into rgb */
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
+
+ for(arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
+ rc_register_file file = 0;
+ unsigned int index = 0;
+
+ if (alpha->Alpha.Arg[arg].Swizzle < 3) {
+ srcrgb = 1;
+ file = alpha->RGB.Src[oldsrc].File;
+ index = alpha->RGB.Src[oldsrc].Index;
+ } else if (alpha->Alpha.Arg[arg].Swizzle < 4) {
+ srcalpha = 1;
+ file = alpha->Alpha.Src[oldsrc].File;
+ index = alpha->Alpha.Src[oldsrc].Index;
+ }
+
+ int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
+ if (source < 0)
+ return 0;
+
+ rgb->Alpha.Arg[arg].Source = source;
+ rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
+ rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
+ rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
+ }
+
+ /* Copy alpha opcode into rgb */
+ rgb->Alpha.Opcode = alpha->Alpha.Opcode;
+ rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
+ rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
+ rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
+ rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
+ rgb->Alpha.Saturate = alpha->Alpha.Saturate;
+
+ /* Merge ALU result writing */
+ if (alpha->WriteALUResult) {
+ if (rgb->WriteALUResult)
+ return 0;
+
+ rgb->WriteALUResult = alpha->WriteALUResult;
+ rgb->ALUResultCompare = alpha->ALUResultCompare;
+ }
+
+ return 1;
+}
+
+/**
+ * Try to merge the given instructions into the rgb instructions.
+ *
+ * Return true on success; on failure, return false, and keep
+ * the instructions untouched.
+ */
+static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
+{
+ struct rc_pair_instruction backup;
+
+ memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
+
+ if (destructive_merge_instructions(rgb, alpha))
+ return 1;
+
+ memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
+ return 0;
+}
+
+
+/**
+ * Find a good ALU instruction or pair of ALU instruction and emit it.
+ *
+ * Prefer emitting full ALU instructions, so that when we reach a point
+ * where no full ALU instruction can be emitted, we have more candidates
+ * for RGB/Alpha pairing.
+ */
+static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
+{
+ struct schedule_instruction * sinst;
+
+ if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
+ if (s->ReadyFullALU) {
+ sinst = s->ReadyFullALU;
+ s->ReadyFullALU = s->ReadyFullALU->NextReady;
+ } else if (s->ReadyRGB) {
+ sinst = s->ReadyRGB;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+ } else {
+ sinst = s->ReadyAlpha;
+ s->ReadyAlpha = s->ReadyAlpha->NextReady;
+ }
+
+ rc_insert_instruction(before->Prev, sinst->Instruction);
+ commit_instruction(s, sinst);
+ } else {
+ struct schedule_instruction **prgb;
+ struct schedule_instruction **palpha;
+
+ /* Some pairings might fail because they require too
+ * many source slots; try all possible pairings if necessary */
+ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+ for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
+ struct schedule_instruction * psirgb = *prgb;
+ struct schedule_instruction * psialpha = *palpha;
+
+ if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
+ continue;
+
+ *prgb = (*prgb)->NextReady;
+ *palpha = (*palpha)->NextReady;
+ rc_insert_instruction(before->Prev, psirgb->Instruction);
+ commit_instruction(s, psirgb);
+ commit_instruction(s, psialpha);
+ goto success;
+ }
+ }
+
+ /* No success in pairing; just take the first RGB instruction */
+ sinst = s->ReadyRGB;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+
+ rc_insert_instruction(before->Prev, sinst->Instruction);
+ commit_instruction(s, sinst);
+ success: ;
+ }
+}
+
+static void scan_read(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct schedule_state * s = data;
+ struct reg_value * v = get_reg_value(s, file, index, chan);
+
+ if (!v)
+ return;
+
+ if (v->Writer == s->Current) {
+ /* The instruction reads and writes to a register component.
+ * In this case, we only want to increment dependencies by one.
+ */
+ return;
+ }
+
+ DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+ struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
+ reader->Reader = s->Current;
+ reader->Next = v->Readers;
+ v->Readers = reader;
+ v->NumReaders++;
+
+ s->Current->NumDependencies++;
+
+ if (s->Current->NumReadValues >= 12) {
+ rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
+ } else {
+ s->Current->ReadValues[s->Current->NumReadValues++] = v;
+ }
+}
+
+static void scan_write(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct schedule_state * s = data;
+ struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+
+ if (!pv)
+ return;
+
+ DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+ struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
+ memset(newv, 0, sizeof(*newv));
+
+ newv->Writer = s->Current;
+
+ if (*pv) {
+ (*pv)->Next = newv;
+ s->Current->NumDependencies++;
+ }
+
+ *pv = newv;
+
+ if (s->Current->NumWriteValues >= 4) {
+ rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
+ } else {
+ s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
+ }
+}
+
+static void schedule_block(struct r300_fragment_program_compiler * c,
+ struct rc_instruction * begin, struct rc_instruction * end)
+{
+ struct schedule_state s;
+ struct rc_instruction * inst;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &c->Base;
+
+ /* Scan instructions for data dependencies */
+ unsigned int ip = 0;
+ for(inst = begin; inst != end; inst = inst->Next) {
+ s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
+ memset(s.Current, 0, sizeof(struct schedule_instruction));
+
+ s.Current->Instruction = inst;
+ inst->IP = ip++;
+
+ DBG("%i: Scanning\n", inst->IP);
+
+ /* The order of things here is subtle and maybe slightly
+ * counter-intuitive, to account for the case where an
+ * instruction writes to the same register as it reads
+ * from. */
+ rc_for_all_writes(inst, &scan_write, &s);
+ rc_for_all_reads(inst, &scan_read, &s);
+
+ DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
+
+ if (!s.Current->NumDependencies)
+ instruction_ready(&s, s.Current);
+ }
+
+ /* Temporarily unlink all instructions */
+ begin->Prev->Next = end;
+ end->Prev = begin->Prev;
+
+ /* Schedule instructions back */
+ while(!s.C->Error &&
+ (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
+ if (s.ReadyTEX)
+ emit_all_tex(&s, end);
+
+ while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
+ emit_one_alu(&s, end);
+ }
+}
+
+static int is_controlflow(struct rc_instruction * inst)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ return opcode->IsFlowControl;
+ }
+ return 0;
+}
+
+void rc_pair_schedule(struct r300_fragment_program_compiler *c)
+{
+ struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+ while(inst != &c->Base.Program.Instructions) {
+ if (is_controlflow(inst)) {
+ inst = inst->Next;
+ continue;
+ }
+
+ struct rc_instruction * first = inst;
+
+ while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
+ inst = inst->Next;
+
+ DBG("Schedule one block\n");
+ schedule_block(c, first, inst);
+ }
+}
diff --git a/r300/compiler/radeon_pair_translate.c b/r300/compiler/radeon_pair_translate.c
new file mode 100644
index 0000000..933cf13
--- /dev/null
+++ b/r300/compiler/radeon_pair_translate.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_compiler.h"
+
+
+/**
+ * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
+ * and reverse the order of arguments for CMP.
+ */
+static void final_rewrite(struct rc_sub_instruction *inst)
+{
+ struct rc_src_register tmp;
+
+ switch(inst->Opcode) {
+ case RC_OPCODE_ADD:
+ inst->SrcReg[2] = inst->SrcReg[1];
+ inst->SrcReg[1].File = RC_FILE_NONE;
+ inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+ inst->SrcReg[1].Negate = RC_MASK_NONE;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ case RC_OPCODE_CMP:
+ tmp = inst->SrcReg[2];
+ inst->SrcReg[2] = inst->SrcReg[0];
+ inst->SrcReg[0] = tmp;
+ break;
+ case RC_OPCODE_MOV:
+ /* AMD say we should use CMP.
+ * However, when we transform
+ * KIL -r0;
+ * into
+ * CMP tmp, -r0, -r0, 0;
+ * KIL tmp;
+ * we get incorrect behaviour on R500 when r0 == 0.0.
+ * It appears that the R500 KIL hardware treats -0.0 as less
+ * than zero.
+ */
+ inst->SrcReg[1].File = RC_FILE_NONE;
+ inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+ inst->SrcReg[2].File = RC_FILE_NONE;
+ inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ case RC_OPCODE_MUL:
+ inst->SrcReg[2].File = RC_FILE_NONE;
+ inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ default:
+ /* nothing to do */
+ break;
+ }
+}
+
+
+/**
+ * Classify an instruction according to which ALUs etc. it needs
+ */
+static void classify_instruction(struct rc_sub_instruction * inst,
+ int * needrgb, int * needalpha, int * istranscendent)
+{
+ *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
+ *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
+ *istranscendent = 0;
+
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ *needrgb = 1;
+ else if (inst->WriteALUResult == RC_ALURESULT_W)
+ *needalpha = 1;
+
+ switch(inst->Opcode) {
+ case RC_OPCODE_ADD:
+ case RC_OPCODE_CMP:
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ case RC_OPCODE_FRC:
+ case RC_OPCODE_MAD:
+ case RC_OPCODE_MAX:
+ case RC_OPCODE_MIN:
+ case RC_OPCODE_MOV:
+ case RC_OPCODE_MUL:
+ break;
+ case RC_OPCODE_COS:
+ case RC_OPCODE_EX2:
+ case RC_OPCODE_LG2:
+ case RC_OPCODE_RCP:
+ case RC_OPCODE_RSQ:
+ case RC_OPCODE_SIN:
+ *istranscendent = 1;
+ *needalpha = 1;
+ break;
+ case RC_OPCODE_DP4:
+ *needalpha = 1;
+ /* fall through */
+ case RC_OPCODE_DP3:
+ *needrgb = 1;
+ break;
+ default:
+ break;
+ }
+}
+
+
+/**
+ * Fill the given ALU instruction's opcodes and source operands into the given pair,
+ * if possible.
+ */
+static void set_pair_instruction(struct r300_fragment_program_compiler *c,
+ struct rc_pair_instruction * pair,
+ struct rc_sub_instruction * inst)
+{
+ memset(pair, 0, sizeof(struct rc_pair_instruction));
+
+ int needrgb, needalpha, istranscendent;
+ classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
+
+ if (needrgb) {
+ if (istranscendent)
+ pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
+ else
+ pair->RGB.Opcode = inst->Opcode;
+ if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+ pair->RGB.Saturate = 1;
+ }
+ if (needalpha) {
+ pair->Alpha.Opcode = inst->Opcode;
+ if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+ pair->Alpha.Saturate = 1;
+ }
+
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+ int nargs = opcode->NumSrcRegs;
+ int i;
+
+ /* Special case for DDX/DDY (MDH/MDV). */
+ if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) {
+ nargs++;
+ }
+
+ for(i = 0; i < opcode->NumSrcRegs; ++i) {
+ int source;
+ if (needrgb && !istranscendent) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ int j;
+ for(j = 0; j < 3; ++j) {
+ unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+ if (swz < 3)
+ srcrgb = 1;
+ else if (swz < 4)
+ srcalpha = 1;
+ }
+ source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+ inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ pair->RGB.Arg[i].Source = source;
+ pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
+ pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
+ }
+ if (needalpha) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3);
+ if (swz < 3)
+ srcrgb = 1;
+ else if (swz < 4)
+ srcalpha = 1;
+ source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+ inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ pair->Alpha.Arg[i].Source = source;
+ pair->Alpha.Arg[i].Swizzle = swz;
+ pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
+ }
+ }
+
+ /* Destination handling */
+ if (inst->DstReg.File == RC_FILE_OUTPUT) {
+ if (inst->DstReg.Index == c->OutputColor) {
+ pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
+ pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ } else if (inst->DstReg.Index == c->OutputDepth) {
+ pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ }
+ } else {
+ if (needrgb) {
+ pair->RGB.DestIndex = inst->DstReg.Index;
+ pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
+ }
+ if (needalpha) {
+ pair->Alpha.DestIndex = inst->DstReg.Index;
+ pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ }
+ }
+
+ if (inst->WriteALUResult) {
+ pair->WriteALUResult = inst->WriteALUResult;
+ pair->ALUResultCompare = inst->ALUResultCompare;
+ }
+}
+
+
+/**
+ * Translate all ALU instructions into corresponding pair instructions,
+ * performing no other changes.
+ */
+void rc_pair_translate(struct r300_fragment_program_compiler *c)
+{
+ struct rc_instruction *inst;
+
+ for(inst = c->Base.Program.Instructions.Next;
+ inst != &c->Base.Program.Instructions;
+ inst = inst->Next) {
+ if (inst->Type != RC_INSTRUCTION_NORMAL)
+ continue;
+
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
+ continue;
+
+ struct rc_sub_instruction copy = inst->U.I;
+
+ final_rewrite(&copy);
+ inst->Type = RC_INSTRUCTION_PAIR;
+ set_pair_instruction(c, &inst->U.P, &copy);
+ }
+}
diff --git a/r300/compiler/radeon_program.c b/r300/compiler/radeon_program.c
index 605edb6..fb4752f 100644
--- a/r300/compiler/radeon_program.c
+++ b/r300/compiler/radeon_program.c
@@ -27,9 +27,9 @@
#include "radeon_program.h"
+#include <stdio.h>
+
#include "radeon_compiler.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
/**
@@ -69,38 +69,58 @@ void radeonLocalTransform(
}
}
+/**
+ * Left multiplication of a register with a swizzle
+ */
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+{
+ struct rc_src_register tmp = srcreg;
+ int i;
+ tmp.Swizzle = 0;
+ tmp.Negate = 0;
+ for(i = 0; i < 4; ++i) {
+ rc_swizzle swz = GET_SWZ(swizzle, i);
+ if (swz < 4) {
+ tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+ tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+ } else {
+ tmp.Swizzle |= swz << (i*3);
+ }
+ }
+ return tmp;
+}
-GLint rc_find_free_temporary(struct radeon_compiler * c)
+unsigned int rc_find_free_temporary(struct radeon_compiler * c)
{
struct rc_instruction * rcinst;
- GLboolean used[MAX_PROGRAM_TEMPS];
- GLuint i;
+ char used[RC_REGISTER_MAX_INDEX];
+ unsigned int i;
memset(used, 0, sizeof(used));
for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) {
- const struct prog_instruction *inst = &rcinst->I;
- const GLuint nsrc = _mesa_num_inst_src_regs(inst->Opcode);
- const GLuint ndst = _mesa_num_inst_dst_regs(inst->Opcode);
- GLuint k;
-
- for (k = 0; k < nsrc; k++) {
- if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
- used[inst->SrcReg[k].Index] = GL_TRUE;
+ const struct rc_sub_instruction *inst = &rcinst->U.I;
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode);
+ unsigned int k;
+
+ for (k = 0; k < opcode->NumSrcRegs; k++) {
+ if (inst->SrcReg[k].File == RC_FILE_TEMPORARY)
+ used[inst->SrcReg[k].Index] = 1;
}
- if (ndst) {
- if (inst->DstReg.File == PROGRAM_TEMPORARY)
- used[inst->DstReg.Index] = GL_TRUE;
+ if (opcode->HasDstReg) {
+ if (inst->DstReg.File == RC_FILE_TEMPORARY)
+ used[inst->DstReg.Index] = 1;
}
}
- for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
if (!used[i])
return i;
}
- return -1;
+ rc_error(c, "Ran out of temporary registers\n");
+ return 0;
}
@@ -108,24 +128,31 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
{
struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
- inst->Prev = 0;
- inst->Next = 0;
+ memset(inst, 0, sizeof(struct rc_instruction));
- _mesa_init_instructions(&inst->I, 1);
+ inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
return inst;
}
-
-struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)
{
- struct rc_instruction * inst = rc_alloc_instruction(c);
-
inst->Prev = after;
inst->Next = after->Next;
inst->Prev->Next = inst;
inst->Next->Prev = inst;
+}
+
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+{
+ struct rc_instruction * inst = rc_alloc_instruction(c);
+
+ rc_insert_instruction(after, inst);
return inst;
}
@@ -136,76 +163,21 @@ void rc_remove_instruction(struct rc_instruction * inst)
inst->Next->Prev = inst->Prev;
}
-
-void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
-{
- struct prog_instruction *source;
- unsigned int i;
-
- for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) {
- struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
- dest->I = *source;
- }
-
- c->Program.ShadowSamplers = program->ShadowSamplers;
- c->Program.InputsRead = program->InputsRead;
- c->Program.OutputsWritten = program->OutputsWritten;
-
- int isNVProgram = 0;
-
- if (program->Target == GL_VERTEX_PROGRAM_ARB) {
- struct gl_vertex_program * vp = (struct gl_vertex_program *) program;
- isNVProgram = vp->IsNVProgram;
- }
-
- if (isNVProgram) {
- /* NV_vertex_program has a fixed-sized constant environment.
- * This could be handled more efficiently for programs that
- * do not use relative addressing.
- */
- for(i = 0; i < 96; ++i) {
- struct rc_constant constant;
-
- constant.Type = RC_CONSTANT_EXTERNAL;
- constant.Size = 4;
- constant.u.External = i;
-
- rc_constants_add(&c->Program.Constants, &constant);
- }
- } else {
- for(i = 0; i < program->Parameters->NumParameters; ++i) {
- struct rc_constant constant;
-
- constant.Type = RC_CONSTANT_EXTERNAL;
- constant.Size = 4;
- constant.u.External = i;
-
- rc_constants_add(&c->Program.Constants, &constant);
- }
- }
-}
-
-
/**
- * Print program to stderr, default options.
+ * Return the number of instructions in the program.
*/
-void rc_print_program(const struct rc_program *prog)
+unsigned int rc_recompute_ips(struct radeon_compiler * c)
{
- GLuint indent = 0;
- GLuint linenum = 1;
- struct rc_instruction *inst;
-
- fprintf(stderr, "# Radeon Compiler Program\n");
+ unsigned int ip = 0;
+ struct rc_instruction * inst;
- for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
- fprintf(stderr, "%3d: ", linenum);
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ inst->IP = ip++;
+ }
- /* Massive hack: We rely on the fact that the printers do not actually
- * use the gl_program argument (last argument) in debug mode */
- indent = _mesa_fprint_instruction_opt(
- stderr, &inst->I,
- indent, PROG_PRINT_DEBUG, 0);
+ c->Program.Instructions.IP = 0xcafedead;
- linenum++;
- }
+ return ip;
}
diff --git a/r300/compiler/radeon_program.h b/r300/compiler/radeon_program.h
index 5619586..0359288 100644
--- a/r300/compiler/radeon_program.h
+++ b/r300/compiler/radeon_program.h
@@ -28,37 +28,144 @@
#ifndef __RADEON_PROGRAM_H_
#define __RADEON_PROGRAM_H_
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-#include "shader/prog_instruction.h"
+#include <stdint.h>
+#include <string.h>
+
+#include "radeon_opcodes.h"
+#include "radeon_code.h"
+#include "radeon_program_constants.h"
+#include "radeon_program_pair.h"
struct radeon_compiler;
-struct rc_instruction;
-struct rc_program;
-enum {
- PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
+struct rc_src_register {
+ rc_register_file File:3;
+
+ /** Negative values may be used for relative addressing. */
+ signed int Index:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int RelAddr:1;
+
+ unsigned int Swizzle:12;
+
+ /** Take the component-wise absolute value */
+ unsigned int Abs:1;
+
+ /** Post-Abs negation. */
+ unsigned int Negate:4;
+};
+
+struct rc_dst_register {
+ rc_register_file File:3;
+
+ /** Negative values may be used for relative addressing. */
+ signed int Index:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int RelAddr:1;
+
+ unsigned int WriteMask:4;
+};
+
+/**
+ * Instructions are maintained by the compiler in a doubly linked list
+ * of these structures.
+ *
+ * This instruction format is intended to be expanded for hardware-specific
+ * trickery. At different stages of compilation, a different set of
+ * instruction types may be valid.
+ */
+struct rc_sub_instruction {
+ struct rc_src_register SrcReg[3];
+ struct rc_dst_register DstReg;
+
+ /**
+ * Opcode of this instruction, according to \ref rc_opcode enums.
+ */
+ rc_opcode Opcode:8;
+
+ /**
+ * Saturate each value of the result to the range [0,1] or [-1,1],
+ * according to \ref rc_saturate_mode enums.
+ */
+ rc_saturate_mode SaturateMode:2;
+
+ /**
+ * Writing to the special register RC_SPECIAL_ALU_RESULT
+ */
+ /*@{*/
+ rc_write_aluresult WriteALUResult:2;
+ rc_compare_func ALUResultCompare:3;
+ /*@}*/
+
+ /**
+ * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
+ */
+ /*@{*/
+ /** Source texture unit. */
+ unsigned int TexSrcUnit:5;
+
+ /** Source texture target, one of the \ref rc_texture_target enums */
+ rc_texture_target TexSrcTarget:3;
+
+ /** True if tex instruction should do shadow comparison */
+ unsigned int TexShadow:1;
+ /*@}*/
+};
+
+typedef enum {
+ RC_INSTRUCTION_NORMAL = 0,
+ RC_INSTRUCTION_PAIR
+} rc_instruction_type;
+
+struct rc_instruction {
+ struct rc_instruction * Prev;
+ struct rc_instruction * Next;
+
+ rc_instruction_type Type;
+ union {
+ struct rc_sub_instruction I;
+ struct rc_pair_instruction P;
+ } U;
+
+ /**
+ * Warning: IPs are not stable. If you want to use them,
+ * you need to recompute them at the beginning of each pass
+ * using \ref rc_recompute_ips
+ */
+ unsigned int IP;
+};
+
+struct rc_program {
+ /**
+ * Instructions.Next points to the first instruction,
+ * Instructions.Prev points to the last instruction.
+ */
+ struct rc_instruction Instructions;
+
+ /* Long term, we should probably remove InputsRead & OutputsWritten,
+ * since updating dependent state can be fragile, and they aren't
+ * actually used very often. */
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
+ uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+ struct rc_constant_list Constants;
};
enum {
- OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */
+ OPCODE_REPL_ALPHA = MAX_RC_OPCODE /**< used in paired instructions */
};
-#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
-#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
-static inline GLuint get_swz(GLuint swz, GLuint idx)
+static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
{
if (idx & 0x4)
return idx;
return GET_SWZ(swz, idx);
}
-static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w)
+static inline unsigned int combine_swizzles4(unsigned int src,
+ rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
{
- GLuint ret = 0;
+ unsigned int ret = 0;
ret |= get_swz(src, swz_x);
ret |= get_swz(src, swz_y) << 3;
@@ -68,22 +175,24 @@ static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, G
return ret;
}
-static inline GLuint combine_swizzles(GLuint src, GLuint swz)
+static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz)
{
- GLuint ret = 0;
+ unsigned int ret = 0;
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X));
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3;
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6;
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
return ret;
}
-static INLINE void reset_srcreg(struct prog_src_register* reg)
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
+
+static inline void reset_srcreg(struct rc_src_register* reg)
{
- _mesa_bzero(reg, sizeof(*reg));
- reg->Swizzle = SWIZZLE_NOOP;
+ memset(reg, 0, sizeof(struct rc_src_register));
+ reg->Swizzle = RC_SWIZZLE_XYZW;
}
@@ -92,13 +201,13 @@ static INLINE void reset_srcreg(struct prog_src_register* reg)
*
* The function will be called once for each instruction.
* It has to either emit the appropriate transformed code for the instruction
- * and return GL_TRUE, or return GL_FALSE if it doesn't understand the
+ * and return true, or return false if it doesn't understand the
* instruction.
*
* The function gets passed the userData as last parameter.
*/
struct radeon_program_transformation {
- GLboolean (*function)(
+ int (*function)(
struct radeon_compiler*,
struct rc_instruction*,
void*);
@@ -110,12 +219,15 @@ void radeonLocalTransform(
int num_transformations,
struct radeon_program_transformation* transformations);
-GLint rc_find_free_temporary(struct radeon_compiler * c);
+unsigned int rc_find_free_temporary(struct radeon_compiler * c);
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);
void rc_remove_instruction(struct rc_instruction * inst);
+unsigned int rc_recompute_ips(struct radeon_compiler * c);
+
void rc_print_program(const struct rc_program *prog);
#endif
diff --git a/r300/compiler/radeon_program_alu.c b/r300/compiler/radeon_program_alu.c
index f23ce30..ced66af 100644
--- a/r300/compiler/radeon_program_alu.c
+++ b/r300/compiler/radeon_program_alu.c
@@ -40,175 +40,164 @@
static struct rc_instruction *emit1(
struct radeon_compiler * c, struct rc_instruction * after,
- gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
- struct prog_src_register SrcReg)
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->I.Opcode = Opcode;
- fpi->I.SaturateMode = Saturate;
- fpi->I.DstReg = DstReg;
- fpi->I.SrcReg[0] = SrcReg;
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg;
return fpi;
}
static struct rc_instruction *emit2(
struct radeon_compiler * c, struct rc_instruction * after,
- gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
- struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->I.Opcode = Opcode;
- fpi->I.SaturateMode = Saturate;
- fpi->I.DstReg = DstReg;
- fpi->I.SrcReg[0] = SrcReg0;
- fpi->I.SrcReg[1] = SrcReg1;
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg0;
+ fpi->U.I.SrcReg[1] = SrcReg1;
return fpi;
}
static struct rc_instruction *emit3(
struct radeon_compiler * c, struct rc_instruction * after,
- gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
- struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
- struct prog_src_register SrcReg2)
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
+ struct rc_src_register SrcReg2)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->I.Opcode = Opcode;
- fpi->I.SaturateMode = Saturate;
- fpi->I.DstReg = DstReg;
- fpi->I.SrcReg[0] = SrcReg0;
- fpi->I.SrcReg[1] = SrcReg1;
- fpi->I.SrcReg[2] = SrcReg2;
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg0;
+ fpi->U.I.SrcReg[1] = SrcReg1;
+ fpi->U.I.SrcReg[2] = SrcReg2;
return fpi;
}
-static struct prog_dst_register dstreg(int file, int index)
+static struct rc_dst_register dstreg(int file, int index)
{
- struct prog_dst_register dst;
+ struct rc_dst_register dst;
dst.File = file;
dst.Index = index;
- dst.WriteMask = WRITEMASK_XYZW;
- dst.CondMask = COND_TR;
+ dst.WriteMask = RC_MASK_XYZW;
dst.RelAddr = 0;
- dst.CondSwizzle = SWIZZLE_NOOP;
- dst.CondSrc = 0;
- dst.pad = 0;
return dst;
}
-static struct prog_dst_register dstregtmpmask(int index, int mask)
+static struct rc_dst_register dstregtmpmask(int index, int mask)
{
- struct prog_dst_register dst = {0};
- dst.File = PROGRAM_TEMPORARY;
+ struct rc_dst_register dst = {0};
+ dst.File = RC_FILE_TEMPORARY;
dst.Index = index;
dst.WriteMask = mask;
dst.RelAddr = 0;
- dst.CondMask = COND_TR;
- dst.CondSwizzle = SWIZZLE_NOOP;
- dst.CondSrc = 0;
- dst.pad = 0;
return dst;
}
-static const struct prog_src_register builtin_zero = {
- .File = PROGRAM_BUILTIN,
+static const struct rc_src_register builtin_zero = {
+ .File = RC_FILE_NONE,
.Index = 0,
- .Swizzle = SWIZZLE_0000
+ .Swizzle = RC_SWIZZLE_0000
};
-static const struct prog_src_register builtin_one = {
- .File = PROGRAM_BUILTIN,
+static const struct rc_src_register builtin_one = {
+ .File = RC_FILE_NONE,
.Index = 0,
- .Swizzle = SWIZZLE_1111
+ .Swizzle = RC_SWIZZLE_1111
};
-static const struct prog_src_register srcreg_undefined = {
- .File = PROGRAM_UNDEFINED,
+static const struct rc_src_register srcreg_undefined = {
+ .File = RC_FILE_NONE,
.Index = 0,
- .Swizzle = SWIZZLE_NOOP
+ .Swizzle = RC_SWIZZLE_XYZW
};
-static struct prog_src_register srcreg(int file, int index)
+static struct rc_src_register srcreg(int file, int index)
{
- struct prog_src_register src = srcreg_undefined;
+ struct rc_src_register src = srcreg_undefined;
src.File = file;
src.Index = index;
return src;
}
-static struct prog_src_register srcregswz(int file, int index, int swz)
+static struct rc_src_register srcregswz(int file, int index, int swz)
{
- struct prog_src_register src = srcreg_undefined;
+ struct rc_src_register src = srcreg_undefined;
src.File = file;
src.Index = index;
src.Swizzle = swz;
return src;
}
-static struct prog_src_register absolute(struct prog_src_register reg)
+static struct rc_src_register absolute(struct rc_src_register reg)
{
- struct prog_src_register newreg = reg;
+ struct rc_src_register newreg = reg;
newreg.Abs = 1;
- newreg.Negate = NEGATE_NONE;
+ newreg.Negate = RC_MASK_NONE;
return newreg;
}
-static struct prog_src_register negate(struct prog_src_register reg)
+static struct rc_src_register negate(struct rc_src_register reg)
{
- struct prog_src_register newreg = reg;
- newreg.Negate = newreg.Negate ^ NEGATE_XYZW;
+ struct rc_src_register newreg = reg;
+ newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
return newreg;
}
-static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w)
+static struct rc_src_register swizzle(struct rc_src_register reg,
+ rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
{
- struct prog_src_register swizzled = reg;
- swizzled.Swizzle = MAKE_SWIZZLE4(
- x >= 4 ? x : GET_SWZ(reg.Swizzle, x),
- y >= 4 ? y : GET_SWZ(reg.Swizzle, y),
- z >= 4 ? z : GET_SWZ(reg.Swizzle, z),
- w >= 4 ? w : GET_SWZ(reg.Swizzle, w));
+ struct rc_src_register swizzled = reg;
+ swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
return swizzled;
}
-static struct prog_src_register scalar(struct prog_src_register reg)
+static struct rc_src_register scalar(struct rc_src_register reg)
{
- return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+ return swizzle(reg, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X);
}
static void transform_ABS(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- struct prog_src_register src = inst->I.SrcReg[0];
+ struct rc_src_register src = inst->U.I.SrcReg[0];
src.Abs = 1;
- src.Negate = NEGATE_NONE;
- emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src);
+ src.Negate = RC_MASK_NONE;
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
rc_remove_instruction(inst);
}
static void transform_DP3(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- struct prog_src_register src0 = inst->I.SrcReg[0];
- struct prog_src_register src1 = inst->I.SrcReg[1];
- src0.Negate &= ~NEGATE_W;
+ struct rc_src_register src0 = inst->U.I.SrcReg[0];
+ struct rc_src_register src1 = inst->U.I.SrcReg[1];
+ src0.Negate &= ~RC_MASK_W;
src0.Swizzle &= ~(7 << (3 * 3));
- src0.Swizzle |= SWIZZLE_ZERO << (3 * 3);
- src1.Negate &= ~NEGATE_W;
+ src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ src1.Negate &= ~RC_MASK_W;
src1.Swizzle &= ~(7 << (3 * 3));
- src1.Swizzle |= SWIZZLE_ZERO << (3 * 3);
- emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1);
+ src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
static void transform_DPH(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- struct prog_src_register src0 = inst->I.SrcReg[0];
- src0.Negate &= ~NEGATE_W;
+ struct rc_src_register src0 = inst->U.I.SrcReg[0];
+ src0.Negate &= ~RC_MASK_W;
src0.Swizzle &= ~(7 << (3 * 3));
- src0.Swizzle |= SWIZZLE_ONE << (3 * 3);
- emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]);
+ src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
rc_remove_instruction(inst);
}
@@ -219,9 +208,9 @@ static void transform_DPH(struct radeon_compiler* c,
static void transform_DST(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg,
- swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
- swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
rc_remove_instruction(inst);
}
@@ -229,9 +218,9 @@ static void transform_FLR(struct radeon_compiler* c,
struct rc_instruction* inst)
{
int tempreg = rc_find_free_temporary(c);
- emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]);
- emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg,
- inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
rc_remove_instruction(inst);
}
@@ -256,64 +245,64 @@ static void transform_FLR(struct radeon_compiler* c,
static void transform_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- GLuint constant;
- GLuint constant_swizzle;
- GLuint temp;
- struct prog_src_register srctemp;
+ unsigned int constant;
+ unsigned int constant_swizzle;
+ unsigned int temp;
+ struct rc_src_register srctemp;
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
- if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) {
+ if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
struct rc_instruction * inst_mov;
inst_mov = emit1(c, inst,
- OPCODE_MOV, 0, inst->I.DstReg,
- srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c)));
+ RC_OPCODE_MOV, 0, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
}
- temp = inst->I.DstReg.Index;
- srctemp = srcreg(PROGRAM_TEMPORARY, temp);
+ temp = inst->U.I.DstReg.Index;
+ srctemp = srcreg(RC_FILE_TEMPORARY, temp);
// tmp.x = max(0.0, Src.x);
// tmp.y = max(0.0, Src.y);
// tmp.w = clamp(Src.z, -128+eps, 128-eps);
- emit2(c, inst->Prev, OPCODE_MAX, 0,
- dstregtmpmask(temp, WRITEMASK_XYW),
- inst->I.SrcReg[0],
- swizzle(srcreg(PROGRAM_CONSTANT, constant),
- SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
- emit2(c, inst->Prev, OPCODE_MIN, 0,
- dstregtmpmask(temp, WRITEMASK_Z),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
+ emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+ dstregtmpmask(temp, RC_MASK_XYW),
+ inst->U.I.SrcReg[0],
+ swizzle(srcreg(RC_FILE_CONSTANT, constant),
+ RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
+ emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
+ dstregtmpmask(temp, RC_MASK_Z),
+ swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
// tmp.w = Pow(tmp.y, tmp.w)
- emit1(c, inst->Prev, OPCODE_LG2, 0,
- dstregtmpmask(temp, WRITEMASK_W),
- swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit2(c, inst->Prev, OPCODE_MUL, 0,
- dstregtmpmask(temp, WRITEMASK_W),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
- emit1(c, inst->Prev, OPCODE_EX2, 0,
- dstregtmpmask(temp, WRITEMASK_W),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+ emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle(srctemp, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ swizzle(srctemp, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z));
+ emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W));
// tmp.z = (tmp.x > 0) ? tmp.w : 0.0
- emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode,
- dstregtmpmask(temp, WRITEMASK_Z),
- negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
+ dstregtmpmask(temp, RC_MASK_Z),
+ negate(swizzle(srctemp, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)),
+ swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
builtin_zero);
// tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
- emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode,
- dstregtmpmask(temp, WRITEMASK_XYW),
- swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
+ dstregtmpmask(temp, RC_MASK_XYW),
+ swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
rc_remove_instruction(inst);
}
@@ -323,12 +312,12 @@ static void transform_LRP(struct radeon_compiler* c,
{
int tempreg = rc_find_free_temporary(c);
- emit2(c, inst->Prev, OPCODE_ADD, 0,
- dstreg(PROGRAM_TEMPORARY, tempreg),
- inst->I.SrcReg[1], negate(inst->I.SrcReg[2]));
- emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode,
- inst->I.DstReg,
- inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg),
+ inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]);
rc_remove_instruction(inst);
}
@@ -337,14 +326,14 @@ static void transform_POW(struct radeon_compiler* c,
struct rc_instruction* inst)
{
int tempreg = rc_find_free_temporary(c);
- struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
- struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
- tempdst.WriteMask = WRITEMASK_W;
- tempsrc.Swizzle = SWIZZLE_WWWW;
+ struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg);
+ struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg);
+ tempdst.WriteMask = RC_MASK_W;
+ tempsrc.Swizzle = RC_SWIZZLE_WWWW;
- emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0]));
- emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1]));
- emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc);
+ emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, scalar(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->U.I.SrcReg[1]));
+ emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
rc_remove_instruction(inst);
}
@@ -352,7 +341,26 @@ static void transform_POW(struct radeon_compiler* c,
static void transform_RSQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]);
+ inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
+}
+
+static void transform_SEQ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SFL(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
+ rc_remove_instruction(inst);
}
static void transform_SGE(struct radeon_compiler* c,
@@ -360,9 +368,33 @@ static void transform_SGE(struct radeon_compiler* c,
{
int tempreg = rc_find_free_temporary(c);
- emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
- emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
- srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SGT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SLE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
@@ -372,9 +404,21 @@ static void transform_SLT(struct radeon_compiler* c,
{
int tempreg = rc_find_free_temporary(c);
- emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
- emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
- srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SNE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
@@ -382,14 +426,14 @@ static void transform_SLT(struct radeon_compiler* c,
static void transform_SUB(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- inst->I.Opcode = OPCODE_ADD;
- inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]);
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]);
}
static void transform_SWZ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- inst->I.Opcode = OPCODE_MOV;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
}
static void transform_XPD(struct radeon_compiler* c,
@@ -397,13 +441,13 @@ static void transform_XPD(struct radeon_compiler* c,
{
int tempreg = rc_find_free_temporary(c);
- emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
- swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
- swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
- emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg,
- swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
- swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
- negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+ negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
rc_remove_instruction(inst);
}
@@ -414,7 +458,7 @@ static void transform_XPD(struct radeon_compiler* c,
* no userData necessary.
*
* Eliminates the following ALU instructions:
- * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD
+ * ABS, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
* using:
* MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
*
@@ -423,27 +467,32 @@ static void transform_XPD(struct radeon_compiler* c,
*
* @note should be applicable to R300 and R500 fragment programs.
*/
-GLboolean radeonTransformALU(
+int radeonTransformALU(
struct radeon_compiler * c,
struct rc_instruction* inst,
void* unused)
{
- switch(inst->I.Opcode) {
- case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE;
- case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
- case OPCODE_DST: transform_DST(c, inst); return GL_TRUE;
- case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
- case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE;
- case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
- case OPCODE_POW: transform_POW(c, inst); return GL_TRUE;
- case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE;
- case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE;
- case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE;
- case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
- case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
- case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
+ case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+ case RC_OPCODE_DST: transform_DST(c, inst); return 1;
+ case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+ case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
+ case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+ case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+ case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
+ case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
+ case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+ case RC_OPCODE_SGE: transform_SGE(c, inst); return 1;
+ case RC_OPCODE_SGT: transform_SGT(c, inst); return 1;
+ case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
+ case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
+ case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
+ case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+ case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
- return GL_FALSE;
+ return 0;
}
}
@@ -452,37 +501,37 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* Note: r500 can take absolute values, but r300 cannot. */
- inst->I.Opcode = OPCODE_MAX;
- inst->I.SrcReg[1] = inst->I.SrcReg[0];
- inst->I.SrcReg[1].Negate ^= NEGATE_XYZW;
+ inst->U.I.Opcode = RC_OPCODE_MAX;
+ inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0];
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
*/
-GLboolean r300_transform_vertex_alu(
+int r300_transform_vertex_alu(
struct radeon_compiler * c,
struct rc_instruction* inst,
void* unused)
{
- switch(inst->I.Opcode) {
- case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE;
- case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE;
- case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
- case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
- case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
- case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
- case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
- case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+ case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
+ case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+ case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+ case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+ case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+ case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
- return GL_FALSE;
+ return 0;
}
}
-static void sincos_constants(struct radeon_compiler* c, GLuint *constants)
+static void sincos_constants(struct radeon_compiler* c, unsigned int *constants)
{
- static const GLfloat SinCosConsts[2][4] = {
+ static const float SinCosConsts[2][4] = {
{
1.273239545, // 4/PI
-0.405284735, // -4/(PI*PI)
@@ -511,26 +560,26 @@ static void sincos_constants(struct radeon_compiler* c, GLuint *constants)
* MAD dest, tmp.y, weight, tmp.x
*/
static void sin_approx(
- struct radeon_compiler* c, struct rc_instruction * before,
- struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
-{
- GLuint tempreg = rc_find_free_temporary(c);
-
- emit2(c, before->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- srcreg(PROGRAM_CONSTANT, constants[0]));
- emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
- absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
- negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
- emit3(c, before->Prev, OPCODE_MAD, 0, dst,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+ struct radeon_compiler* c, struct rc_instruction * inst,
+ struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
+{
+ unsigned int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ srcreg(RC_FILE_CONSTANT, constants[0]));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y),
+ absolute(swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ absolute(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)),
+ negate(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X));
}
/**
@@ -538,81 +587,81 @@ static void sin_approx(
* using only the basic instructions
* MOV, ADD, MUL, MAD, FRC
*/
-GLboolean radeonTransformTrigSimple(struct radeon_compiler* c,
+int radeonTransformTrigSimple(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
- if (inst->I.Opcode != OPCODE_COS &&
- inst->I.Opcode != OPCODE_SIN &&
- inst->I.Opcode != OPCODE_SCS)
- return GL_FALSE;
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
- GLuint constants[2];
- GLuint tempreg = rc_find_free_temporary(c);
+ unsigned int constants[2];
+ unsigned int tempreg = rc_find_free_temporary(c);
sincos_constants(c, constants);
- if (inst->I.Opcode == OPCODE_COS) {
+ if (inst->U.I.Opcode == RC_OPCODE_COS) {
// MAD tmp.x, src, 1/(2*PI), 0.75
// FRC tmp.x, tmp.x
// MAD tmp.z, tmp.x, 2*PI, -PI
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
-
- sin_approx(c, inst, inst->I.DstReg,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)));
+
+ sin_approx(c, inst, inst->U.I.DstReg,
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
constants);
- } else if (inst->I.Opcode == OPCODE_SIN) {
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
-
- sin_approx(c, inst, inst->I.DstReg,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)));
+
+ sin_approx(c, inst, inst->U.I.DstReg,
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
constants);
} else {
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
- emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- srcreg(PROGRAM_TEMPORARY, tempreg));
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- srcreg(PROGRAM_TEMPORARY, tempreg),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
-
- struct prog_dst_register dst = inst->I.DstReg;
-
- dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X;
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ srcreg(RC_FILE_TEMPORARY, tempreg));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ srcreg(RC_FILE_TEMPORARY, tempreg),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)));
+
+ struct rc_dst_register dst = inst->U.I.DstReg;
+
+ dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
sin_approx(c, inst, dst,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
constants);
- dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y;
+ dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
sin_approx(c, inst, dst,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y),
constants);
}
rc_remove_instruction(inst);
- return GL_TRUE;
+ return 1;
}
@@ -624,53 +673,53 @@ GLboolean radeonTransformTrigSimple(struct radeon_compiler* c,
*
* @warning This transformation implicitly changes the semantics of SIN and COS!
*/
-GLboolean radeonTransformTrigScale(struct radeon_compiler* c,
+int radeonTransformTrigScale(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
- if (inst->I.Opcode != OPCODE_COS &&
- inst->I.Opcode != OPCODE_SIN &&
- inst->I.Opcode != OPCODE_SCS)
- return GL_FALSE;
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
- static const GLfloat RCP_2PI = 0.15915494309189535;
- GLuint temp;
- GLuint constant;
- GLuint constant_swizzle;
+ static const float RCP_2PI = 0.15915494309189535;
+ unsigned int temp;
+ unsigned int constant;
+ unsigned int constant_swizzle;
temp = rc_find_free_temporary(c);
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
- emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
- swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
- emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
- srcreg(PROGRAM_TEMPORARY, temp));
-
- if (inst->I.Opcode == OPCODE_COS) {
- emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg,
- srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->I.Opcode == OPCODE_SIN) {
- emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode,
- inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->I.Opcode == OPCODE_SCS) {
- struct prog_dst_register moddst = inst->I.DstReg;
-
- if (inst->I.DstReg.WriteMask & WRITEMASK_X) {
- moddst.WriteMask = WRITEMASK_X;
- emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst,
- srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+ srcreg(RC_FILE_TEMPORARY, temp));
+
+ if (inst->U.I.Opcode == RC_OPCODE_COS) {
+ emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
+ } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+ emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+ inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
+ } else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
+ struct rc_dst_register moddst = inst->U.I.DstReg;
+
+ if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
+ moddst.WriteMask = RC_MASK_X;
+ emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+ srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
}
- if (inst->I.DstReg.WriteMask & WRITEMASK_Y) {
- moddst.WriteMask = WRITEMASK_Y;
- emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst,
- srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
+ moddst.WriteMask = RC_MASK_Y;
+ emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+ srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
}
}
rc_remove_instruction(inst);
- return GL_TRUE;
+ return 1;
}
/**
@@ -681,15 +730,15 @@ GLboolean radeonTransformTrigScale(struct radeon_compiler* c,
* @warning This explicitly changes the form of DDX and DDY!
*/
-GLboolean radeonTransformDeriv(struct radeon_compiler* c,
+int radeonTransformDeriv(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
- if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY)
- return GL_FALSE;
+ if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
+ return 0;
- inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
- inst->I.SrcReg[1].Negate = NEGATE_XYZW;
+ inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE);
+ inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
- return GL_TRUE;
+ return 1;
}
diff --git a/r300/compiler/radeon_program_alu.h b/r300/compiler/radeon_program_alu.h
index 147efec..7cb5f84 100644
--- a/r300/compiler/radeon_program_alu.h
+++ b/r300/compiler/radeon_program_alu.h
@@ -30,27 +30,27 @@
#include "radeon_program.h"
-GLboolean radeonTransformALU(
+int radeonTransformALU(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
-GLboolean r300_transform_vertex_alu(
+int r300_transform_vertex_alu(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
-GLboolean radeonTransformTrigSimple(
+int radeonTransformTrigSimple(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
-GLboolean radeonTransformTrigScale(
+int radeonTransformTrigScale(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
-GLboolean radeonTransformDeriv(
+int radeonTransformDeriv(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
diff --git a/r300/compiler/radeon_program_constants.h b/r300/compiler/radeon_program_constants.h
new file mode 100644
index 0000000..7c0d672
--- /dev/null
+++ b/r300/compiler/radeon_program_constants.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_PROGRAM_CONSTANTS_H
+#define RADEON_PROGRAM_CONSTANTS_H
+
+typedef enum {
+ RC_SATURATE_NONE = 0,
+ RC_SATURATE_ZERO_ONE,
+ RC_SATURATE_MINUS_PLUS_ONE
+} rc_saturate_mode;
+
+typedef enum {
+ RC_TEXTURE_2D_ARRAY,
+ RC_TEXTURE_1D_ARRAY,
+ RC_TEXTURE_CUBE,
+ RC_TEXTURE_3D,
+ RC_TEXTURE_RECT,
+ RC_TEXTURE_2D,
+ RC_TEXTURE_1D
+} rc_texture_target;
+
+typedef enum {
+ /**
+ * Used to indicate unused register descriptions and
+ * source register that use a constant swizzle.
+ */
+ RC_FILE_NONE = 0,
+ RC_FILE_TEMPORARY,
+
+ /**
+ * Input register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_INPUT,
+
+ /**
+ * Output register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_OUTPUT,
+ RC_FILE_ADDRESS,
+
+ /**
+ * Indicates a constant from the \ref rc_constant_list .
+ */
+ RC_FILE_CONSTANT,
+
+ /**
+ * Indicates a special register, see RC_SPECIAL_xxx.
+ */
+ RC_FILE_SPECIAL
+} rc_register_file;
+
+enum {
+ /** R500 fragment program ALU result "register" */
+ RC_SPECIAL_ALU_RESULT = 0,
+
+ /** Must be last */
+ RC_NUM_SPECIAL_REGISTERS
+};
+
+#define RC_REGISTER_INDEX_BITS 10
+#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
+
+typedef enum {
+ RC_SWIZZLE_X = 0,
+ RC_SWIZZLE_Y,
+ RC_SWIZZLE_Z,
+ RC_SWIZZLE_W,
+ RC_SWIZZLE_ZERO,
+ RC_SWIZZLE_ONE,
+ RC_SWIZZLE_HALF,
+ RC_SWIZZLE_UNUSED
+} rc_swizzle;
+
+#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
+#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
+#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
+#define SET_SWZ(swz, idx, newv) \
+ do { \
+ (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
+ } while(0)
+
+#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
+#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
+#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
+#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
+#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
+#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
+
+/**
+ * \name Bitmasks for components of vectors.
+ *
+ * Used for write masks, negation masks, etc.
+ */
+/*@{*/
+#define RC_MASK_NONE 0
+#define RC_MASK_X 1
+#define RC_MASK_Y 2
+#define RC_MASK_Z 4
+#define RC_MASK_W 8
+#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
+#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
+#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
+#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
+/*@}*/
+
+typedef enum {
+ RC_ALURESULT_NONE = 0,
+ RC_ALURESULT_X,
+ RC_ALURESULT_W
+} rc_write_aluresult;
+
+#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/r300/compiler/radeon_program_pair.c b/r300/compiler/radeon_program_pair.c
index 4c26db5..ee83959 100644
--- a/r300/compiler/radeon_program_pair.c
+++ b/r300/compiler/radeon_program_pair.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Nicolai Haehnle.
+ * Copyright (C) 2008-2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
@@ -25,584 +25,29 @@
*
*/
-/**
- * @file
- *
- * Perform temporary register allocation and attempt to pair off instructions
- * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction
- * vs. ALU instruction scheduling.
- */
-
#include "radeon_program_pair.h"
-#include "memory_pool.h"
-#include "radeon_compiler.h"
-#include "shader/prog_print.h"
-
-#define error(fmt, args...) do { \
- rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \
- __FILE__, __FUNCTION__, ##args); \
-} while(0)
-
-struct pair_state_instruction {
- struct prog_instruction Instruction;
- GLuint IP; /**< Position of this instruction in original program */
-
- GLuint IsTex:1; /**< Is a texture instruction */
- GLuint NeedRGB:1; /**< Needs the RGB ALU */
- GLuint NeedAlpha:1; /**< Needs the Alpha ALU */
- GLuint IsTranscendent:1; /**< Is a special transcendent instruction */
-
- /**
- * Number of (read and write) dependencies that must be resolved before
- * this instruction can be scheduled.
- */
- GLuint NumDependencies:5;
-
- /**
- * Next instruction in the linked list of ready instructions.
- */
- struct pair_state_instruction *NextReady;
-
- /**
- * Values that this instruction writes
- */
- struct reg_value *Values[4];
-};
-
-
-/**
- * Used to keep track of which instructions read a value.
- */
-struct reg_value_reader {
- struct pair_state_instruction *Reader;
- struct reg_value_reader *Next;
-};
-
-/**
- * Used to keep track which values are stored in each component of a
- * PROGRAM_TEMPORARY.
- */
-struct reg_value {
- struct pair_state_instruction *Writer;
- struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
-
- /**
- * Unordered linked list of instructions that read from this value.
- */
- struct reg_value_reader *Readers;
-
- /**
- * Number of readers of this value. This is calculated during @ref scan_instructions
- * and continually decremented during code emission.
- * When this count reaches zero, the instruction that writes the @ref Next value
- * can be scheduled.
- */
- GLuint NumReaders;
-};
-
-/**
- * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register
- * to the proper hardware temporary.
- */
-struct pair_register_translation {
- GLuint Allocated:1;
- GLuint HwIndex:8;
- GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */
-
- /**
- * Notes the value that is currently contained in each component
- * (only used for PROGRAM_TEMPORARY registers).
- */
- struct reg_value *Value[4];
-};
-
-struct pair_state {
- struct r300_fragment_program_compiler * Compiler;
- const struct radeon_pair_handler *Handler;
- GLboolean Verbose;
- void *UserData;
-
- /**
- * Translate Mesa registers to hardware registers
- */
- struct pair_register_translation Inputs[FRAG_ATTRIB_MAX];
- struct pair_register_translation Temps[MAX_PROGRAM_TEMPS];
-
- struct {
- GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
- } HwTemps[128];
-
- /**
- * Linked list of instructions that can be scheduled right now,
- * based on which ALU/TEX resources they require.
- */
- struct pair_state_instruction *ReadyFullALU;
- struct pair_state_instruction *ReadyRGB;
- struct pair_state_instruction *ReadyAlpha;
- struct pair_state_instruction *ReadyTEX;
-};
-
-
-static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index)
-{
- switch(file) {
- case PROGRAM_TEMPORARY: return &s->Temps[index];
- case PROGRAM_INPUT: return &s->Inputs[index];
- default: return 0;
- }
-}
-
-static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex)
-{
- struct pair_register_translation *t = get_register(s, file, index);
- ASSERT(!s->HwTemps[hwindex].RefCount);
- ASSERT(!t->Allocated);
- s->HwTemps[hwindex].RefCount = t->RefCount;
- t->Allocated = 1;
- t->HwIndex = hwindex;
-}
-
-static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index)
-{
- GLuint hwindex;
-
- struct pair_register_translation *t = get_register(s, file, index);
- if (!t) {
- error("get_hw_reg: %i[%i]\n", file, index);
- return 0;
- }
-
- if (t->Allocated)
- return t->HwIndex;
-
- for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex)
- if (!s->HwTemps[hwindex].RefCount)
- break;
-
- if (hwindex >= s->Handler->MaxHwTemps) {
- error("Ran out of hardware temporaries");
- return 0;
- }
-
- alloc_hw_reg(s, file, index, hwindex);
- return hwindex;
-}
-
-
-static void deref_hw_reg(struct pair_state *s, GLuint hwindex)
-{
- if (!s->HwTemps[hwindex].RefCount) {
- error("Hwindex %i refcount error", hwindex);
- return;
- }
-
- s->HwTemps[hwindex].RefCount--;
-}
-
-static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst)
-{
- pairinst->NextReady = *list;
- *list = pairinst;
-}
-
-/**
- * The given instruction has become ready. Link it into the ready
- * instructions.
- */
-static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst)
-{
- if (s->Verbose)
- _mesa_printf("instruction_ready(%i)\n", pairinst->IP);
-
- if (pairinst->IsTex)
- add_pairinst_to_list(&s->ReadyTEX, pairinst);
- else if (!pairinst->NeedAlpha)
- add_pairinst_to_list(&s->ReadyRGB, pairinst);
- else if (!pairinst->NeedRGB)
- add_pairinst_to_list(&s->ReadyAlpha, pairinst);
- else
- add_pairinst_to_list(&s->ReadyFullALU, pairinst);
-}
-
-
-/**
- * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
- * and reverse the order of arguments for CMP.
- */
-static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
-{
- struct prog_src_register tmp;
-
- switch(inst->Opcode) {
- case OPCODE_ADD:
- inst->SrcReg[2] = inst->SrcReg[1];
- inst->SrcReg[1].File = PROGRAM_BUILTIN;
- inst->SrcReg[1].Swizzle = SWIZZLE_1111;
- inst->SrcReg[1].Negate = NEGATE_NONE;
- inst->Opcode = OPCODE_MAD;
- break;
- case OPCODE_CMP:
- tmp = inst->SrcReg[2];
- inst->SrcReg[2] = inst->SrcReg[0];
- inst->SrcReg[0] = tmp;
- break;
- case OPCODE_MOV:
- /* AMD say we should use CMP.
- * However, when we transform
- * KIL -r0;
- * into
- * CMP tmp, -r0, -r0, 0;
- * KIL tmp;
- * we get incorrect behaviour on R500 when r0 == 0.0.
- * It appears that the R500 KIL hardware treats -0.0 as less
- * than zero.
- */
- inst->SrcReg[1].File = PROGRAM_BUILTIN;
- inst->SrcReg[1].Swizzle = SWIZZLE_1111;
- inst->SrcReg[2].File = PROGRAM_BUILTIN;
- inst->SrcReg[2].Swizzle = SWIZZLE_0000;
- inst->Opcode = OPCODE_MAD;
- break;
- case OPCODE_MUL:
- inst->SrcReg[2].File = PROGRAM_BUILTIN;
- inst->SrcReg[2].Swizzle = SWIZZLE_0000;
- inst->Opcode = OPCODE_MAD;
- break;
- default:
- /* nothing to do */
- break;
- }
-}
-
-
-/**
- * Classify an instruction according to which ALUs etc. it needs
- */
-static void classify_instruction(struct pair_state *s,
- struct pair_state_instruction *psi)
-{
- psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
- psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
-
- switch(psi->Instruction.Opcode) {
- case OPCODE_ADD:
- case OPCODE_CMP:
- case OPCODE_DDX:
- case OPCODE_DDY:
- case OPCODE_FRC:
- case OPCODE_MAD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MOV:
- case OPCODE_MUL:
- break;
- case OPCODE_COS:
- case OPCODE_EX2:
- case OPCODE_LG2:
- case OPCODE_RCP:
- case OPCODE_RSQ:
- case OPCODE_SIN:
- psi->IsTranscendent = 1;
- psi->NeedAlpha = 1;
- break;
- case OPCODE_DP4:
- psi->NeedAlpha = 1;
- /* fall through */
- case OPCODE_DP3:
- psi->NeedRGB = 1;
- break;
- case OPCODE_KIL:
- case OPCODE_TEX:
- case OPCODE_TXB:
- case OPCODE_TXP:
- case OPCODE_END:
- psi->IsTex = 1;
- break;
- default:
- error("Unknown opcode %d\n", psi->Instruction.Opcode);
- break;
- }
-}
-
-
-/**
- * Count which (input, temporary) register is read and written how often,
- * and scan the instruction stream to find dependencies.
- */
-static void scan_instructions(struct pair_state *s)
-{
- struct rc_instruction *source;
- GLuint ip;
-
- for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0;
- source != &s->Compiler->Base.Program.Instructions;
- source = source->Next, ++ip) {
- struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst));
- memset(pairinst, 0, sizeof(struct pair_state_instruction));
-
- pairinst->Instruction = source->I;
- pairinst->IP = ip;
- final_rewrite(s, &pairinst->Instruction);
- classify_instruction(s, pairinst);
-
- int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode);
- int j;
- for(j = 0; j < nsrc; j++) {
- struct pair_register_translation *t =
- get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index);
- if (!t)
- continue;
-
- t->RefCount++;
-
- if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) {
- int i;
- for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i);
- if (swz >= 4)
- continue; /* constant or NIL swizzle */
- if (!t->Value[swz])
- continue; /* this is an undefined read */
-
- /* Do not add a dependency if this instruction
- * also rewrites the value. The code below adds
- * a dependency for the DstReg, which is a superset
- * of the SrcReg dependency. */
- if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY &&
- pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index &&
- GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz))
- continue;
-
- struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r));
- pairinst->NumDependencies++;
- t->Value[swz]->NumReaders++;
- r->Reader = pairinst;
- r->Next = t->Value[swz]->Readers;
- t->Value[swz]->Readers = r;
- }
- }
- }
-
- int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode);
- if (ndst) {
- struct pair_register_translation *t =
- get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index);
- if (t) {
- t->RefCount++;
-
- if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) {
- int j;
- for(j = 0; j < 4; ++j) {
- if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j))
- continue;
-
- struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v));
- memset(v, 0, sizeof(struct reg_value));
- v->Writer = pairinst;
- if (t->Value[j]) {
- pairinst->NumDependencies++;
- t->Value[j]->Next = v;
- }
- t->Value[j] = v;
- pairinst->Values[j] = v;
- }
- }
- }
- }
-
- if (s->Verbose)
- _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);
-
- if (!pairinst->NumDependencies)
- instruction_ready(s, pairinst);
- }
-
- /* Clear the PROGRAM_TEMPORARY state */
- int i, j;
- for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) {
- for(j = 0; j < 4; ++j)
- s->Temps[i].Value[j] = 0;
- }
-}
-
-
-static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst)
-{
- ASSERT(pairinst->NumDependencies > 0);
- if (!--pairinst->NumDependencies)
- instruction_ready(s, pairinst);
-}
/**
- * Update the dependency tracking state based on what the instruction
- * at the given IP does.
+ * Return the source slot where we installed the given register access,
+ * or -1 if no slot was free anymore.
*/
-static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst)
-{
- struct prog_instruction *inst = &pairinst->Instruction;
-
- if (s->Verbose)
- _mesa_printf("commit_instruction(%i)\n", pairinst->IP);
-
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- struct pair_register_translation *t = &s->Temps[inst->DstReg.Index];
- deref_hw_reg(s, t->HwIndex);
-
- int i;
- for(i = 0; i < 4; ++i) {
- if (!GET_BIT(inst->DstReg.WriteMask, i))
- continue;
-
- t->Value[i] = pairinst->Values[i];
- if (t->Value[i]->NumReaders) {
- struct reg_value_reader *r;
- for(r = pairinst->Values[i]->Readers; r; r = r->Next)
- decrement_dependencies(s, r->Reader);
- } else if (t->Value[i]->Next) {
- /* This happens when the only reader writes
- * the register at the same time */
- decrement_dependencies(s, t->Value[i]->Next->Writer);
- }
- }
- }
-
- int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
- int i;
- for(i = 0; i < nsrc; i++) {
- struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index);
- if (!t)
- continue;
-
- deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index));
-
- if (inst->SrcReg[i].File != PROGRAM_TEMPORARY)
- continue;
-
- int j;
- for(j = 0; j < 4; ++j) {
- GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
- if (swz >= 4)
- continue;
- if (!t->Value[swz])
- continue;
-
- /* Do not free a dependency if this instruction
- * also rewrites the value. See scan_instructions. */
- if (inst->DstReg.File == PROGRAM_TEMPORARY &&
- inst->DstReg.Index == inst->SrcReg[i].Index &&
- GET_BIT(inst->DstReg.WriteMask, swz))
- continue;
-
- if (!--t->Value[swz]->NumReaders) {
- if (t->Value[swz]->Next)
- decrement_dependencies(s, t->Value[swz]->Next->Writer);
- }
- }
- }
-}
-
-
-/**
- * Emit all ready texture instructions in a single block.
- *
- * Emit as a single block to (hopefully) sample many textures in parallel,
- * and to avoid hardware indirections on R300.
- *
- * In R500, we don't really know when the result of a texture instruction
- * arrives. So allocate all destinations first, to make sure they do not
- * arrive early and overwrite a texture coordinate we're going to use later
- * in the block.
- */
-static void emit_all_tex(struct pair_state *s)
-{
- struct pair_state_instruction *readytex;
- struct pair_state_instruction *pairinst;
-
- ASSERT(s->ReadyTEX);
-
- // Don't let the ready list change under us!
- readytex = s->ReadyTEX;
- s->ReadyTEX = 0;
-
- // Allocate destination hardware registers in one block to avoid conflicts.
- for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- struct prog_instruction *inst = &pairinst->Instruction;
- if (inst->Opcode != OPCODE_KIL)
- get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
- }
-
- if (s->Compiler->Base.Debug)
- _mesa_printf(" BEGIN_TEX\n");
-
- if (s->Handler->BeginTexBlock)
- s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData);
-
- for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- struct prog_instruction *inst = &pairinst->Instruction;
- commit_instruction(s, pairinst);
-
- if (inst->Opcode != OPCODE_KIL)
- inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
- inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
-
- if (s->Compiler->Base.Debug) {
- _mesa_printf(" ");
- _mesa_print_instruction(inst);
- fflush(stderr);
- }
-
- struct radeon_pair_texture_instruction rpti;
-
- switch(inst->Opcode) {
- case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break;
- case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break;
- case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break;
- default:
- case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break;
- }
-
- rpti.DestIndex = inst->DstReg.Index;
- rpti.WriteMask = inst->DstReg.WriteMask;
- rpti.TexSrcUnit = inst->TexSrcUnit;
- rpti.TexSrcTarget = inst->TexSrcTarget;
- rpti.SrcIndex = inst->SrcReg[0].Index;
- rpti.SrcSwizzle = inst->SrcReg[0].Swizzle;
-
- s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti);
- }
-
- if (s->Compiler->Base.Debug)
- _mesa_printf(" END_TEX\n");
-}
-
-
-static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair,
- struct prog_src_register src, GLboolean rgb, GLboolean alpha)
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+ unsigned int rgb, unsigned int alpha,
+ rc_register_file file, unsigned int index)
{
int candidate = -1;
int candidate_quality = -1;
int i;
- if (!rgb && !alpha)
+ if ((!rgb && !alpha) || file == RC_FILE_NONE)
return 0;
- GLuint constant;
- GLuint index;
-
- if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) {
- constant = 0;
- index = get_hw_reg(s, src.File, src.Index);
- } else {
- constant = 1;
- index = src.Index;
- }
-
for(i = 0; i < 3; ++i) {
int q = 0;
if (rgb) {
if (pair->RGB.Src[i].Used) {
- if (pair->RGB.Src[i].Constant != constant ||
+ if (pair->RGB.Src[i].File != file ||
pair->RGB.Src[i].Index != index)
continue;
q++;
@@ -610,7 +55,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
}
if (alpha) {
if (pair->Alpha.Src[i].Used) {
- if (pair->Alpha.Src[i].Constant != constant ||
+ if (pair->Alpha.Src[i].File != file ||
pair->Alpha.Src[i].Index != index)
continue;
q++;
@@ -625,334 +70,15 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
if (candidate >= 0) {
if (rgb) {
pair->RGB.Src[candidate].Used = 1;
- pair->RGB.Src[candidate].Constant = constant;
+ pair->RGB.Src[candidate].File = file;
pair->RGB.Src[candidate].Index = index;
}
if (alpha) {
pair->Alpha.Src[candidate].Used = 1;
- pair->Alpha.Src[candidate].Constant = constant;
+ pair->Alpha.Src[candidate].File = file;
pair->Alpha.Src[candidate].Index = index;
}
}
return candidate;
}
-
-/**
- * Fill the given ALU instruction's opcodes and source operands into the given pair,
- * if possible.
- */
-static GLboolean fill_instruction_into_pair(
- struct pair_state *s,
- struct radeon_pair_instruction *pair,
- struct pair_state_instruction *pairinst)
-{
- struct prog_instruction *inst = &pairinst->Instruction;
-
- ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP);
- ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP);
-
- if (pairinst->NeedRGB) {
- if (pairinst->IsTranscendent)
- pair->RGB.Opcode = OPCODE_REPL_ALPHA;
- else
- pair->RGB.Opcode = inst->Opcode;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
- pair->RGB.Saturate = 1;
- }
- if (pairinst->NeedAlpha) {
- pair->Alpha.Opcode = inst->Opcode;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
- pair->Alpha.Saturate = 1;
- }
-
- int nargs = _mesa_num_inst_src_regs(inst->Opcode);
- int i;
-
- /* Special case for DDX/DDY (MDH/MDV). */
- if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) {
- if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used)
- return GL_FALSE;
- else
- nargs++;
- }
-
- for(i = 0; i < nargs; ++i) {
- int source;
- if (pairinst->NeedRGB && !pairinst->IsTranscendent) {
- GLboolean srcrgb = GL_FALSE;
- GLboolean srcalpha = GL_FALSE;
- int j;
- for(j = 0; j < 3; ++j) {
- GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
- if (swz < 3)
- srcrgb = GL_TRUE;
- else if (swz < 4)
- srcalpha = GL_TRUE;
- }
- source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
- if (source < 0)
- return GL_FALSE;
- pair->RGB.Arg[i].Source = source;
- pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
- pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
- pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z));
- }
- if (pairinst->NeedAlpha) {
- GLboolean srcrgb = GL_FALSE;
- GLboolean srcalpha = GL_FALSE;
- GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3);
- if (swz < 3)
- srcrgb = GL_TRUE;
- else if (swz < 4)
- srcalpha = GL_TRUE;
- source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
- if (source < 0)
- return GL_FALSE;
- pair->Alpha.Arg[i].Source = source;
- pair->Alpha.Arg[i].Swizzle = swz;
- pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
- pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W);
- }
- }
-
- return GL_TRUE;
-}
-
-
-/**
- * Fill in the destination register information.
- *
- * This is split from filling in source registers because we want
- * to avoid allocating hardware temporaries for destinations until
- * we are absolutely certain that we're going to emit a certain
- * instruction pairing.
- */
-static void fill_dest_into_pair(
- struct pair_state *s,
- struct radeon_pair_instruction *pair,
- struct pair_state_instruction *pairinst)
-{
- struct prog_instruction *inst = &pairinst->Instruction;
-
- if (inst->DstReg.File == PROGRAM_OUTPUT) {
- if (inst->DstReg.Index == s->Compiler->OutputColor) {
- pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
- pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- } else if (inst->DstReg.Index == s->Compiler->OutputDepth) {
- pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- }
- } else {
- GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
- if (pairinst->NeedRGB) {
- pair->RGB.DestIndex = hwindex;
- pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
- }
- if (pairinst->NeedAlpha) {
- pair->Alpha.DestIndex = hwindex;
- pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- }
- }
-}
-
-
-/**
- * Find a good ALU instruction or pair of ALU instruction and emit it.
- *
- * Prefer emitting full ALU instructions, so that when we reach a point
- * where no full ALU instruction can be emitted, we have more candidates
- * for RGB/Alpha pairing.
- */
-static void emit_alu(struct pair_state *s)
-{
- struct radeon_pair_instruction pair;
- struct pair_state_instruction *psi;
-
- if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
- if (s->ReadyFullALU) {
- psi = s->ReadyFullALU;
- s->ReadyFullALU = s->ReadyFullALU->NextReady;
- } else if (s->ReadyRGB) {
- psi = s->ReadyRGB;
- s->ReadyRGB = s->ReadyRGB->NextReady;
- } else {
- psi = s->ReadyAlpha;
- s->ReadyAlpha = s->ReadyAlpha->NextReady;
- }
-
- _mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, psi);
- fill_dest_into_pair(s, &pair, psi);
- commit_instruction(s, psi);
- } else {
- struct pair_state_instruction **prgb;
- struct pair_state_instruction **palpha;
-
- /* Some pairings might fail because they require too
- * many source slots; try all possible pairings if necessary */
- for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
- for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
- struct pair_state_instruction * psirgb = *prgb;
- struct pair_state_instruction * psialpha = *palpha;
- _mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, psirgb);
- if (!fill_instruction_into_pair(s, &pair, psialpha))
- continue;
- *prgb = (*prgb)->NextReady;
- *palpha = (*palpha)->NextReady;
- fill_dest_into_pair(s, &pair, psirgb);
- fill_dest_into_pair(s, &pair, psialpha);
- commit_instruction(s, psirgb);
- commit_instruction(s, psialpha);
- goto success;
- }
- }
-
- /* No success in pairing; just take the first RGB instruction */
- psi = s->ReadyRGB;
- s->ReadyRGB = s->ReadyRGB->NextReady;
-
- _mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, psi);
- fill_dest_into_pair(s, &pair, psi);
- commit_instruction(s, psi);
- success: ;
- }
-
- if (s->Compiler->Base.Debug)
- radeonPrintPairInstruction(&pair);
-
- s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair);
-}
-
-/* Callback function for assigning input registers to hardware registers */
-static void alloc_helper(void * data, unsigned input, unsigned hwreg)
-{
- struct pair_state * s = data;
- alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg);
-}
-
-void radeonPairProgram(
- struct r300_fragment_program_compiler * compiler,
- const struct radeon_pair_handler* handler, void *userdata)
-{
- struct pair_state s;
-
- _mesa_bzero(&s, sizeof(s));
- s.Compiler = compiler;
- s.Handler = handler;
- s.UserData = userdata;
- s.Verbose = GL_FALSE && s.Compiler->Base.Debug;
-
- if (s.Compiler->Base.Debug)
- _mesa_printf("Emit paired program\n");
-
- scan_instructions(&s);
- s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s);
-
- while(!s.Compiler->Base.Error &&
- (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
- if (s.ReadyTEX)
- emit_all_tex(&s);
-
- while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)
- emit_alu(&s);
- }
-
- if (s.Compiler->Base.Debug)
- _mesa_printf(" END\n");
-}
-
-
-static void print_pair_src(int i, struct radeon_pair_instruction_source* src)
-{
- _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index);
-}
-
-static const char* opcode_string(GLuint opcode)
-{
- if (opcode == OPCODE_REPL_ALPHA)
- return "SOP";
- else
- return _mesa_opcode_string(opcode);
-}
-
-static int num_pairinst_args(GLuint opcode)
-{
- if (opcode == OPCODE_REPL_ALPHA)
- return 0;
- else
- return _mesa_num_inst_src_regs(opcode);
-}
-
-static char swizzle_char(GLuint swz)
-{
- switch(swz) {
- case SWIZZLE_X: return 'x';
- case SWIZZLE_Y: return 'y';
- case SWIZZLE_Z: return 'z';
- case SWIZZLE_W: return 'w';
- case SWIZZLE_ZERO: return '0';
- case SWIZZLE_ONE: return '1';
- case SWIZZLE_NIL: return '_';
- default: return '?';
- }
-}
-
-void radeonPrintPairInstruction(struct radeon_pair_instruction *inst)
-{
- int nargs;
- int i;
-
- _mesa_printf(" RGB: ");
- for(i = 0; i < 3; ++i) {
- if (inst->RGB.Src[i].Used)
- print_pair_src(i, inst->RGB.Src + i);
- }
- _mesa_printf("\n");
- _mesa_printf(" Alpha:");
- for(i = 0; i < 3; ++i) {
- if (inst->Alpha.Src[i].Used)
- print_pair_src(i, inst->Alpha.Src + i);
- }
- _mesa_printf("\n");
-
- _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : "");
- if (inst->RGB.WriteMask)
- _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex,
- (inst->RGB.WriteMask & 1) ? "x" : "",
- (inst->RGB.WriteMask & 2) ? "y" : "",
- (inst->RGB.WriteMask & 4) ? "z" : "");
- if (inst->RGB.OutputWriteMask)
- _mesa_printf(" COLOR.%s%s%s",
- (inst->RGB.OutputWriteMask & 1) ? "x" : "",
- (inst->RGB.OutputWriteMask & 2) ? "y" : "",
- (inst->RGB.OutputWriteMask & 4) ? "z" : "");
- nargs = num_pairinst_args(inst->RGB.Opcode);
- for(i = 0; i < nargs; ++i) {
- const char* abs = inst->RGB.Arg[i].Abs ? "|" : "";
- const char* neg = inst->RGB.Arg[i].Negate ? "-" : "";
- _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source,
- swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)),
- swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)),
- swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)),
- abs);
- }
- _mesa_printf("\n");
-
- _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : "");
- if (inst->Alpha.WriteMask)
- _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex);
- if (inst->Alpha.OutputWriteMask)
- _mesa_printf(" COLOR.w");
- if (inst->Alpha.DepthWriteMask)
- _mesa_printf(" DEPTH.w");
- nargs = num_pairinst_args(inst->Alpha.Opcode);
- for(i = 0; i < nargs; ++i) {
- const char* abs = inst->Alpha.Arg[i].Abs ? "|" : "";
- const char* neg = inst->Alpha.Arg[i].Negate ? "-" : "";
- _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source,
- swizzle_char(inst->Alpha.Arg[i].Swizzle), abs);
- }
- _mesa_printf("\n");
-}
diff --git a/r300/compiler/radeon_program_pair.h b/r300/compiler/radeon_program_pair.h
index ff76178..1600598 100644
--- a/r300/compiler/radeon_program_pair.h
+++ b/r300/compiler/radeon_program_pair.h
@@ -28,116 +28,97 @@
#ifndef __RADEON_PROGRAM_PAIR_H_
#define __RADEON_PROGRAM_PAIR_H_
-#include "radeon_program.h"
+#include "radeon_code.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_constants.h"
struct r300_fragment_program_compiler;
/**
- * Represents a paired instruction, as found in R300 and R500
+ * \file
+ * Represents a paired ALU instruction, as found in R300 and R500
* fragment programs.
+ *
+ * Note that this representation is taking some liberties as far
+ * as register files are concerned, to allow separate register
+ * allocation.
+ *
+ * Also note that there are some subtleties in that the semantics
+ * of certain opcodes are implicitly changed in this representation;
+ * see \ref rc_pair_translate
*/
+
+
struct radeon_pair_instruction_source {
- GLuint Index:8;
- GLuint Constant:1;
- GLuint Used:1;
+ unsigned int Used:1;
+ rc_register_file File:3;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
};
struct radeon_pair_instruction_rgb {
- GLuint Opcode:8;
- GLuint DestIndex:8;
- GLuint WriteMask:3;
- GLuint OutputWriteMask:3;
- GLuint Saturate:1;
+ rc_opcode Opcode:8;
+ unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int WriteMask:3;
+ unsigned int OutputWriteMask:3;
+ unsigned int Saturate:1;
struct radeon_pair_instruction_source Src[3];
struct {
- GLuint Source:2;
- GLuint Swizzle:9;
- GLuint Abs:1;
- GLuint Negate:1;
+ unsigned int Source:2;
+ unsigned int Swizzle:9;
+ unsigned int Abs:1;
+ unsigned int Negate:1;
} Arg[3];
};
struct radeon_pair_instruction_alpha {
- GLuint Opcode:8;
- GLuint DestIndex:8;
- GLuint WriteMask:1;
- GLuint OutputWriteMask:1;
- GLuint DepthWriteMask:1;
- GLuint Saturate:1;
+ rc_opcode Opcode:8;
+ unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int WriteMask:1;
+ unsigned int OutputWriteMask:1;
+ unsigned int DepthWriteMask:1;
+ unsigned int Saturate:1;
struct radeon_pair_instruction_source Src[3];
struct {
- GLuint Source:2;
- GLuint Swizzle:3;
- GLuint Abs:1;
- GLuint Negate:1;
+ unsigned int Source:2;
+ unsigned int Swizzle:3;
+ unsigned int Abs:1;
+ unsigned int Negate:1;
} Arg[3];
};
-struct radeon_pair_instruction {
+struct rc_pair_instruction {
struct radeon_pair_instruction_rgb RGB;
struct radeon_pair_instruction_alpha Alpha;
-};
-
-enum {
- RADEON_OPCODE_TEX = 0,
- RADEON_OPCODE_TXB,
- RADEON_OPCODE_TXP,
- RADEON_OPCODE_KIL
+ rc_write_aluresult WriteALUResult:2;
+ rc_compare_func ALUResultCompare:3;
};
-struct radeon_pair_texture_instruction {
- GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */
-
- GLuint DestIndex:8;
- GLuint WriteMask:4;
- GLuint TexSrcUnit:5;
- GLuint TexSrcTarget:3;
-
- GLuint SrcIndex:8;
- GLuint SrcSwizzle:12;
-};
+/**
+ * General helper functions for dealing with the paired instruction format.
+ */
+/*@{*/
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+ unsigned int rgb, unsigned int alpha,
+ rc_register_file file, unsigned int index);
+/*@}*/
/**
- *
+ * Compiler passes that operate with the paired format.
*/
-struct radeon_pair_handler {
- /**
- * Write a paired instruction to the hardware.
- *
- * @return GL_FALSE on error.
- */
- GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*);
-
- /**
- * Write a texture instruction to the hardware.
- * Register indices have already been rewritten to the allocated
- * hardware register numbers.
- *
- * @return GL_FALSE on error.
- */
- GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*);
-
- /**
- * Called before a block of contiguous, independent texture
- * instructions is emitted.
- */
- GLboolean (*BeginTexBlock)(void*);
-
- unsigned MaxHwTemps;
-};
-
-void radeonPairProgram(
- struct r300_fragment_program_compiler * compiler,
- const struct radeon_pair_handler*, void *userdata);
+/*@{*/
+struct radeon_pair_handler;
-void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
+void rc_pair_translate(struct r300_fragment_program_compiler *c);
+void rc_pair_schedule(struct r300_fragment_program_compiler *c);
+void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps);
+/*@}*/
#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/r300/compiler/radeon_program_print.c b/r300/compiler/radeon_program_print.c
new file mode 100644
index 0000000..c980f5c
--- /dev/null
+++ b/r300/compiler/radeon_program_print.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+static const char * textarget_to_string(rc_texture_target target)
+{
+ switch(target) {
+ case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
+ case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
+ case RC_TEXTURE_CUBE: return "CUBE";
+ case RC_TEXTURE_3D: return "3D";
+ case RC_TEXTURE_RECT: return "RECT";
+ case RC_TEXTURE_2D: return "2D";
+ case RC_TEXTURE_1D: return "1D";
+ default: return "BAD_TEXTURE_TARGET";
+ }
+}
+
+static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
+{
+ if (func == RC_COMPARE_FUNC_NEVER) {
+ fprintf(f, "false");
+ } else if (func == RC_COMPARE_FUNC_ALWAYS) {
+ fprintf(f, "true");
+ } else {
+ const char * op;
+ switch(func) {
+ case RC_COMPARE_FUNC_LESS: op = "<"; break;
+ case RC_COMPARE_FUNC_EQUAL: op = "=="; break;
+ case RC_COMPARE_FUNC_LEQUAL: op = "<="; break;
+ case RC_COMPARE_FUNC_GREATER: op = ">"; break;
+ case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break;
+ case RC_COMPARE_FUNC_GEQUAL: op = ">="; break;
+ default: op = "???"; break;
+ }
+ fprintf(f, "%s %s %s", lhs, op, rhs);
+ }
+}
+
+static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
+{
+ if (file == RC_FILE_NONE) {
+ fprintf(f, "none");
+ } else if (file == RC_FILE_SPECIAL) {
+ switch(index) {
+ case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
+ default: fprintf(f, "special[%i]", index); break;
+ }
+ } else {
+ const char * filename;
+ switch(file) {
+ case RC_FILE_TEMPORARY: filename = "temp"; break;
+ case RC_FILE_INPUT: filename = "input"; break;
+ case RC_FILE_OUTPUT: filename = "output"; break;
+ case RC_FILE_ADDRESS: filename = "addr"; break;
+ case RC_FILE_CONSTANT: filename = "const"; break;
+ default: filename = "BAD FILE"; break;
+ }
+ fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
+ }
+}
+
+static void rc_print_mask(FILE * f, unsigned int mask)
+{
+ if (mask & RC_MASK_X) fprintf(f, "x");
+ if (mask & RC_MASK_Y) fprintf(f, "y");
+ if (mask & RC_MASK_Z) fprintf(f, "z");
+ if (mask & RC_MASK_W) fprintf(f, "w");
+}
+
+static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
+{
+ rc_print_register(f, dst.File, dst.Index, dst.RelAddr);
+ if (dst.WriteMask != RC_MASK_XYZW) {
+ fprintf(f, ".");
+ rc_print_mask(f, dst.WriteMask);
+ }
+}
+
+static char rc_swizzle_char(unsigned int swz)
+{
+ switch(swz) {
+ case RC_SWIZZLE_X: return 'x';
+ case RC_SWIZZLE_Y: return 'y';
+ case RC_SWIZZLE_Z: return 'z';
+ case RC_SWIZZLE_W: return 'w';
+ case RC_SWIZZLE_ZERO: return '0';
+ case RC_SWIZZLE_ONE: return '1';
+ case RC_SWIZZLE_HALF: return 'H';
+ case RC_SWIZZLE_UNUSED: return '_';
+ }
+ return '?';
+}
+
+static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
+{
+ unsigned int comp;
+ for(comp = 0; comp < 4; ++comp) {
+ rc_swizzle swz = GET_SWZ(swizzle, comp);
+ if (GET_BIT(negate, comp))
+ fprintf(f, "-");
+ fprintf(f, "%c", rc_swizzle_char(swz));
+ }
+}
+
+static void rc_print_src_register(FILE * f, struct rc_src_register src)
+{
+ int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
+
+ if (src.Negate == RC_MASK_XYZW)
+ fprintf(f, "-");
+ if (src.Abs)
+ fprintf(f, "|");
+
+ rc_print_register(f, src.File, src.Index, src.RelAddr);
+
+ if (src.Abs && !trivial_negate)
+ fprintf(f, "|");
+
+ if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
+ fprintf(f, ".");
+ rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
+ }
+
+ if (src.Abs && trivial_negate)
+ fprintf(f, "|");
+}
+
+static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int reg;
+
+ fprintf(f, "%s", opcode->Name);
+
+ switch(inst->U.I.SaturateMode) {
+ case RC_SATURATE_NONE: break;
+ case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
+ case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
+ default: fprintf(f, "_BAD_SAT"); break;
+ }
+
+ if (opcode->HasDstReg) {
+ fprintf(f, " ");
+ rc_print_dst_register(f, inst->U.I.DstReg);
+ if (opcode->NumSrcRegs)
+ fprintf(f, ",");
+ }
+
+ for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+ if (reg > 0)
+ fprintf(f, ",");
+ fprintf(f, " ");
+ rc_print_src_register(f, inst->U.I.SrcReg[reg]);
+ }
+
+ if (opcode->HasTexture) {
+ fprintf(f, ", %s%s[%u]",
+ textarget_to_string(inst->U.I.TexSrcTarget),
+ inst->U.I.TexShadow ? "SHADOW" : "",
+ inst->U.I.TexSrcUnit);
+ }
+
+ fprintf(f, ";");
+
+ if (inst->U.I.WriteALUResult) {
+ fprintf(f, " [aluresult = (");
+ rc_print_comparefunc(f,
+ (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
+ inst->U.I.ALUResultCompare, "0");
+ fprintf(f, ")]");
+ }
+
+ fprintf(f, "\n");
+}
+
+static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+ int printedsrc = 0;
+ unsigned int src, arg;
+
+ for(src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used) {
+ if (printedsrc)
+ fprintf(f, ", ");
+ fprintf(f, "src%i.xyz = ", src);
+ rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
+ printedsrc = 1;
+ }
+ if (inst->Alpha.Src[src].Used) {
+ if (printedsrc)
+ fprintf(f, ", ");
+ fprintf(f, "src%i.w = ", src);
+ rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
+ printedsrc = 1;
+ }
+ }
+ fprintf(f, "\n");
+
+ if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+ fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
+ if (inst->RGB.WriteMask)
+ fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
+ (inst->RGB.WriteMask & 1) ? "x" : "",
+ (inst->RGB.WriteMask & 2) ? "y" : "",
+ (inst->RGB.WriteMask & 4) ? "z" : "");
+ if (inst->RGB.OutputWriteMask)
+ fprintf(f, " color.%s%s%s",
+ (inst->RGB.OutputWriteMask & 1) ? "x" : "",
+ (inst->RGB.OutputWriteMask & 2) ? "y" : "",
+ (inst->RGB.OutputWriteMask & 4) ? "z" : "");
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ fprintf(f, " aluresult");
+
+ for (arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
+ const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
+ fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source,
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
+ abs);
+ }
+ fprintf(f, "\n");
+ }
+
+ if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+ fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
+ if (inst->Alpha.WriteMask)
+ fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
+ if (inst->Alpha.OutputWriteMask)
+ fprintf(f, " color.w");
+ if (inst->Alpha.DepthWriteMask)
+ fprintf(f, " depth.w");
+ if (inst->WriteALUResult == RC_ALURESULT_W)
+ fprintf(f, " aluresult");
+
+ for(arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
+ const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
+ fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source,
+ rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs);
+ }
+ fprintf(f, "\n");
+ }
+
+ if (inst->WriteALUResult) {
+ fprintf(f, " [aluresult = (");
+ rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
+ fprintf(f, ")]\n");
+ }
+}
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog)
+{
+ unsigned int linenum = 0;
+ struct rc_instruction *inst;
+
+ fprintf(stderr, "# Radeon Compiler Program\n");
+
+ for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+ fprintf(stderr, "%3d: ", linenum);
+
+ if (inst->Type == RC_INSTRUCTION_PAIR)
+ rc_print_pair_instruction(stderr, inst);
+ else
+ rc_print_normal_instruction(stderr, inst);
+
+ linenum++;
+ }
+}
diff --git a/r300/compiler/radeon_swizzle.h b/r300/compiler/radeon_swizzle.h
new file mode 100644
index 0000000..c81d5f7
--- /dev/null
+++ b/r300/compiler/radeon_swizzle.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_SWIZZLE_H
+#define RADEON_SWIZZLE_H
+
+#include "radeon_program.h"
+
+struct rc_swizzle_split {
+ unsigned char NumPhases;
+ unsigned char Phase[4];
+};
+
+/**
+ * Describe the swizzling capability of target hardware.
+ */
+struct rc_swizzle_caps {
+ /**
+ * Check whether the given swizzle, absolute and negate combination
+ * can be implemented natively by the hardware for this opcode.
+ *
+ * \return 1 if the swizzle is native for the given opcode
+ */
+ int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
+
+ /**
+ * Determine how to split access to the masked channels of the
+ * given source register to obtain ALU-native swizzles.
+ */
+ void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
+};
+
+#endif /* RADEON_SWIZZLE_H */
diff --git a/r300/r300_cmdbuf.c b/r300/r300_cmdbuf.c
index 0fe32a5..ad8db6e 100644
--- a/r300/r300_cmdbuf.c
+++ b/r300/r300_cmdbuf.c
@@ -46,14 +46,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_context.h"
#include "r300_ioctl.h"
-#include "radeon_reg.h"
#include "r300_reg.h"
#include "r300_cmdbuf.h"
#include "r300_emit.h"
#include "radeon_bocs_wrapper.h"
#include "radeon_mipmap_tree.h"
#include "r300_state.h"
-#include "radeon_reg.h"
#include "radeon_queryobj.h"
/** # of dwords reserved for additional instructions that may need to be written
@@ -171,7 +169,7 @@ static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
if (t && !t->image_override) {
BEGIN_BATCH_NO_AUTOSTATE(4);
OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
- OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t),
RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
END_BATCH();
} else if (!t) {
@@ -279,16 +277,33 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
cbpitch = (rrb->pitch / rrb->cpp);
if (rrb->cpp == 4)
cbpitch |= R300_COLOR_FORMAT_ARGB8888;
- else switch (rrb->base._ActualFormat) {
- case GL_RGB5:
+ else switch (rrb->base.Format) {
+ case MESA_FORMAT_RGB565:
+ assert(_mesa_little_endian());
cbpitch |= R300_COLOR_FORMAT_RGB565;
break;
- case GL_RGBA4:
+ case MESA_FORMAT_RGB565_REV:
+ assert(!_mesa_little_endian());
+ cbpitch |= R300_COLOR_FORMAT_RGB565;
+ break;
+ case MESA_FORMAT_ARGB4444:
+ assert(_mesa_little_endian());
cbpitch |= R300_COLOR_FORMAT_ARGB4444;
break;
- case GL_RGB5_A1:
+ case MESA_FORMAT_ARGB4444_REV:
+ assert(!_mesa_little_endian());
+ cbpitch |= R300_COLOR_FORMAT_ARGB4444;
+ break;
+ case MESA_FORMAT_ARGB1555:
+ assert(_mesa_little_endian());
cbpitch |= R300_COLOR_FORMAT_ARGB1555;
break;
+ case MESA_FORMAT_ARGB1555_REV:
+ assert(!_mesa_little_endian());
+ cbpitch |= R300_COLOR_FORMAT_ARGB1555;
+ break;
+ default:
+ _mesa_problem(ctx, "unexpected format in emit_cb_offset()");
}
if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
@@ -684,11 +699,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
- if ((r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ||
- ( !r300->radeon.radeonScreen->kernel_mm && (
- (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS400) ||
- (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
- (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) ) ) ) {
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) {
ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
} else {
ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, never, 3, 0);
@@ -697,6 +708,14 @@ void r300InitCmdBuf(r300ContextPtr r300)
ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
r300->hw.zs.cmd[R300_ZS_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
+ if (is_r500) {
+ if (r300->radeon.radeonScreen->kernel_mm)
+ ALLOC_STATE(zsb, always, R300_ZSB_CMDSIZE, 0);
+ else
+ ALLOC_STATE(zsb, never, R300_ZSB_CMDSIZE, 0);
+ r300->hw.zsb.cmd[R300_ZSB_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R500_ZB_STENCILREFMASK_BF, 1);
+ }
ALLOC_STATE(zstencil_format, always, 5, 0);
r300->hw.zstencil_format.cmd[0] =
diff --git a/r300/r300_context.c b/r300/r300_context.c
index 2ea1b82..5f07b95 100644
--- a/r300/r300_context.c
+++ b/r300/r300_context.c
@@ -84,15 +84,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define need_GL_EXT_framebuffer_object
#define need_GL_EXT_fog_coord
#define need_GL_EXT_gpu_program_parameters
+#define need_GL_EXT_provoking_vertex
#define need_GL_EXT_secondary_color
#define need_GL_EXT_stencil_two_side
#define need_GL_ATI_separate_stencil
#define need_GL_NV_vertex_program
-#include "extension_helper.h"
+#include "main/remap_helper.h"
-const struct dri_extension card_extensions[] = {
+static const struct dri_extension card_extensions[] = {
/* *INDENT-OFF* */
{"GL_ARB_depth_texture", NULL},
{"GL_ARB_fragment_program", NULL},
@@ -116,6 +117,7 @@ const struct dri_extension card_extensions[] = {
{"GL_EXT_packed_depth_stencil", NULL},
{"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
{"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
+ {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions },
{"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
{"GL_EXT_shadow_funcs", NULL},
{"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions},
@@ -143,7 +145,7 @@ const struct dri_extension card_extensions[] = {
};
-const struct dri_extension mm_extensions[] = {
+static const struct dri_extension mm_extensions[] = {
{ "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions },
{ "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
{ NULL, NULL }
@@ -153,7 +155,7 @@ const struct dri_extension mm_extensions[] = {
* The GL 2.0 functions are needed to make display lists work with
* functions added by GL_ATI_separate_stencil.
*/
-const struct dri_extension gl_20_extension[] = {
+static const struct dri_extension gl_20_extension[] = {
{"GL_VERSION_2_0", GL_VERSION_2_0_functions },
};
@@ -374,11 +376,21 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
if (screen->chip_family >= CHIP_FAMILY_RV515) {
ctx->Const.FragmentProgram.MaxNativeTemps = R500_PFS_NUM_TEMP_REGS;
ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */
- ctx->Const.FragmentProgram.MaxNativeParameters = R500_PFS_NUM_CONST_REGS;
- ctx->Const.FragmentProgram.MaxNativeAluInstructions = R500_PFS_MAX_INST;
- ctx->Const.FragmentProgram.MaxNativeTexInstructions = R500_PFS_MAX_INST;
- ctx->Const.FragmentProgram.MaxNativeInstructions = R500_PFS_MAX_INST;
- ctx->Const.FragmentProgram.MaxNativeTexIndirections = R500_PFS_MAX_INST;
+
+ /* The hardware limits are higher than this,
+ * but the non-KMS DRM interface artificially limits us
+ * to this many instructions.
+ *
+ * We could of course work around it in the KMS path,
+ * but it would be a mess, so it seems wiser
+ * to leave it as is. Going forward, the Gallium driver
+ * will not be subject to these limitations.
+ */
+ ctx->Const.FragmentProgram.MaxNativeParameters = 255;
+ ctx->Const.FragmentProgram.MaxNativeAluInstructions = 255;
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = 255;
+ ctx->Const.FragmentProgram.MaxNativeInstructions = 255;
+ ctx->Const.FragmentProgram.MaxNativeTexIndirections = 255;
ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
} else {
ctx->Const.FragmentProgram.MaxNativeTemps = R300_PFS_NUM_TEMP_REGS;
@@ -427,11 +439,11 @@ static void r300InitGLExtensions(GLcontext *ctx)
if (r300->options.stencil_two_side_disabled)
_mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
- if (r300->options.s3tc_force_enabled) {
+ if (r300->options.s3tc_force_disabled) {
+ _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+ } else if (ctx->Mesa_DXTn || r300->options.s3tc_force_enabled) {
_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
_mesa_enable_extension(ctx, "GL_S3_s3tc");
- } else if (r300->options.s3tc_force_disabled) {
- _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc");
}
if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) {
diff --git a/r300/r300_context.h b/r300/r300_context.h
index 1dadcc0..518d5cd 100644
--- a/r300/r300_context.h
+++ b/r300/r300_context.h
@@ -234,6 +234,10 @@ typedef struct r300_context *r300ContextPtr;
#define R300_ZS_CNTL_2 3
#define R300_ZS_CMDSIZE 4
+#define R300_ZSB_CMD_0 0
+#define R300_ZSB_CNTL_0 1
+#define R300_ZSB_CMDSIZE 2
+
#define R300_ZB_CMD_0 0
#define R300_ZB_OFFSET 1
#define R300_ZB_PITCH 2
@@ -343,6 +347,7 @@ struct r300_hw_state {
struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */
struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */
struct radeon_state_atom zs; /* zstencil control (4F00) */
+ struct radeon_state_atom zsb; /* zstencil bf */
struct radeon_state_atom zstencil_format;
struct radeon_state_atom zb; /* z buffer (4F20) */
struct radeon_state_atom zb_depthclearvalue; /* (4F28) */
diff --git a/r300/r300_draw.c b/r300/r300_draw.c
index e9968f9..06a0490 100644
--- a/r300/r300_draw.c
+++ b/r300/r300_draw.c
@@ -29,7 +29,7 @@
#include "main/glheader.h"
#include "main/context.h"
#include "main/state.h"
-#include "main/api_validate.h"
+/* #include "main/api_validate.h" */
#include "main/enums.h"
#include "main/simple_list.h"
diff --git a/r300/r300_emit.h b/r300/r300_emit.h
index 8e57e35..a456d88 100644
--- a/r300/r300_emit.h
+++ b/r300/r300_emit.h
@@ -42,7 +42,6 @@
#include "main/glheader.h"
#include "r300_context.h"
#include "r300_cmdbuf.h"
-#include "radeon_reg.h"
static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn,
int reg, int count)
diff --git a/r300/r300_fragprog_common.c b/r300/r300_fragprog_common.c
index 0bdc90b..267ee81 100644
--- a/r300/r300_fragprog_common.c
+++ b/r300/r300_fragprog_common.c
@@ -44,6 +44,7 @@
#include "compiler/radeon_compiler.h"
+#include "radeon_mesa_to_rc.h"
#include "r300_state.h"
@@ -131,7 +132,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler,
*/
static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
{
- struct prog_src_register src;
+ struct rc_src_register src;
int i;
fp->fog_attr = FRAG_ATTRIB_MAX;
@@ -155,7 +156,7 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r
}
memset(&src, 0, sizeof(src));
- src.File = PROGRAM_INPUT;
+ src.File = RC_FILE_INPUT;
src.Index = fp->fog_attr;
src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src);
@@ -232,13 +233,26 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
fflush(stderr);
}
- rc_mesa_to_rc_program(&compiler.Base, &cont->Base.Base);
+ radeon_mesa_to_rc_program(&compiler.Base, &cont->Base.Base);
insert_WPOS_trailer(&compiler, fp);
rewriteFog(&compiler, fp);
r3xx_compile_fragment_program(&compiler);
+
+ if (compiler.is_r500) {
+ /* We need to support the non-KMS DRM interface, which
+ * artificially limits the number of instructions and
+ * constants which are available to us.
+ *
+ * See also the comment in r300_context.c where we
+ * set the MAX_NATIVE_xxx values.
+ */
+ if (fp->code.code.r500.inst_end >= 255 || fp->code.constants.Count > 255)
+ rc_error(&compiler.Base, "Program is too big (upgrade to r300g to avoid this limitation).\n");
+ }
+
fp->error = compiler.Base.Error;
fp->InputsRead = compiler.Base.Program.InputsRead;
diff --git a/r300/r300_reg.h b/r300/r300_reg.h
index 39b4b61..ea684e7 100644
--- a/r300/r300_reg.h
+++ b/r300/r300_reg.h
@@ -1022,15 +1022,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \
- R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \
- R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST )
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD)
/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */
# define R300_RE_SHADE_MODEL_FLAT ( \
R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \
R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \
- R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \
- R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST )
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD)
/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */
#define R300_GA_SOLID_RG 0x427c
@@ -1791,6 +1789,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_ALU_DSTC_OUTPUT_X (1 << 26)
# define R300_ALU_DSTC_OUTPUT_Y (1 << 27)
# define R300_ALU_DSTC_OUTPUT_Z (1 << 28)
+# define R300_RGB_TARGET(x) ((x) << 29)
#define R300_US_ALU_ALPHA_ADDR_0 0x47C0
# define R300_ALU_SRC0A_SHIFT 0
@@ -1808,6 +1807,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_ALU_DSTA_REG (1 << 23)
# define R300_ALU_DSTA_OUTPUT (1 << 24)
# define R300_ALU_DSTA_DEPTH (1 << 27)
+# define R300_ALPHA_TARGET(x) ((x) << 25)
#define R300_US_ALU_RGB_INST_0 0x48C0
# define R300_ALU_ARGC_SRC0C_XYZ 0
@@ -2315,6 +2315,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_Z_WRITE_ENABLE (1 << 2)
# define R300_Z_SIGNED_COMPARE (1 << 3)
# define R300_STENCIL_FRONT_BACK (1 << 4)
+# define R400_ZSIGNED_MAGNITUDE (1 << 5)
+# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6)
#define R300_ZB_ZSTENCILCNTL 0x4f04
/* functions */
@@ -3002,6 +3004,8 @@ enum {
# define R500_INST_RGB_CLAMP (1 << 19)
# define R500_INST_ALPHA_CLAMP (1 << 20)
# define R500_INST_ALU_RESULT_SEL (1 << 21)
+# define R500_INST_ALU_RESULT_SEL_RED (0 << 21)
+# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21)
# define R500_INST_ALPHA_PRED_INV (1 << 22)
# define R500_INST_ALU_RESULT_OP_EQ (0 << 23)
# define R500_INST_ALU_RESULT_OP_LT (1 << 23)
diff --git a/r300/r300_render.c b/r300/r300_render.c
index 3cd3875..4ae593c 100644
--- a/r300/r300_render.c
+++ b/r300/r300_render.c
@@ -67,8 +67,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "vbo/vbo_split.h"
#include "tnl/tnl.h"
#include "tnl/t_vp_build.h"
-#include "radeon_reg.h"
-#include "radeon_macros.h"
#include "r300_context.h"
#include "r300_ioctl.h"
#include "r300_state.h"
diff --git a/r300/r300_state.c b/r300/r300_state.c
index 9301543..ac20c08 100644
--- a/r300/r300_state.c
+++ b/r300/r300_state.c
@@ -45,7 +45,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/framebuffer.h"
#include "main/simple_list.h"
#include "main/api_arrayelt.h"
-#include "main/texformat.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
@@ -590,7 +589,9 @@ static void r300SetDepthState(GLcontext * ctx)
r300ContextPtr r300 = R300_CONTEXT(ctx);
R300_STATECHANGE(r300, zs);
- r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE|R300_STENCIL_FRONT_BACK;
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] &= (R300_STENCIL_ENABLE |
+ R300_STENCIL_FRONT_BACK |
+ R500_STENCIL_REFMASK_FRONT_BACK);
r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT);
if (ctx->Depth.Test) {
@@ -604,11 +605,16 @@ static void r300SetDepthState(GLcontext * ctx)
static void r300CatchStencilFallback(GLcontext *ctx)
{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
const unsigned back = ctx->Stencil._BackFace;
- if (ctx->Stencil._Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
- || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back]
- || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) {
+ if (rmesa->radeon.radeonScreen->kernel_mm &&
+ (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)) {
+ r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE);
+ } else if (ctx->Stencil._Enabled &&
+ (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
+ || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back]
+ || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) {
r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE);
} else {
r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE);
@@ -915,11 +921,24 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
(flag << R300_S_BACK_FUNC_SHIFT);
rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask;
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R500_STENCIL_REFMASK_FRONT_BACK;
+ R300_STATECHANGE(rmesa, zsb);
+ refmask = ((ctx->Stencil.Ref[back] & 0xff) << R300_STENCILREF_SHIFT)
+ | ((ctx->Stencil.ValueMask[back] & 0xff) << R300_STENCILMASK_SHIFT);
+
+ rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] &=
+ ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) |
+ (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT));
+ rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= refmask;
+ }
}
static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ const unsigned back = ctx->Stencil._BackFace;
r300CatchStencilFallback(ctx);
@@ -931,6 +950,13 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
(ctx->Stencil.
WriteMask[0] & R300_STENCILREF_MASK) <<
R300_STENCILWRITEMASK_SHIFT;
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ R300_STATECHANGE(rmesa, zsb);
+ rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |=
+ (ctx->Stencil.
+ WriteMask[back] & R300_STENCILREF_MASK) <<
+ R300_STENCILWRITEMASK_SHIFT;
+ }
}
static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
@@ -2253,6 +2279,14 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
R300_STATECHANGE(r300, zb);
}
+ if (new_state & (_NEW_LIGHT)) {
+ R300_STATECHANGE(r300, shade2);
+ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION)
+ r300->hw.shade2.cmd[1] |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+ else
+ r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+ }
+
r300->radeon.NewGLState |= new_state;
}
diff --git a/r300/r300_tex.c b/r300/r300_tex.c
index 433e5a8..726b3ff 100644
--- a/r300/r300_tex.c
+++ b/r300/r300_tex.c
@@ -40,7 +40,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/image.h"
#include "main/mipmap.h"
#include "main/simple_list.h"
-#include "main/texformat.h"
#include "main/texstore.h"
#include "main/teximage.h"
#include "main/texobj.h"
@@ -196,6 +195,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
GLenum pname, const GLfloat * params)
{
radeonTexObj* t = radeon_tex_obj(texObj);
+ GLenum texBaseFormat;
if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
fprintf(stderr, "%s( %s )\n", __FUNCTION__,
@@ -223,23 +223,16 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
case GL_TEXTURE_MAX_LEVEL:
case GL_TEXTURE_MIN_LOD:
case GL_TEXTURE_MAX_LOD:
- /* This isn't the most efficient solution but there doesn't appear to
- * be a nice alternative. Since there's no LOD clamping,
- * we just have to rely on loading the right subset of mipmap levels
- * to simulate a clamped LOD.
- */
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = 0;
- t->validated = GL_FALSE;
- }
+ t->validated = GL_FALSE;
break;
case GL_DEPTH_TEXTURE_MODE:
if (!texObj->Image[0][texObj->BaseLevel])
return;
- if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat
- == GL_DEPTH_COMPONENT) {
+ texBaseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat;
+
+ if (texBaseFormat == GL_DEPTH_COMPONENT ||
+ texBaseFormat == GL_DEPTH_STENCIL) {
r300SetDepthTexMode(texObj);
break;
} else {
@@ -268,7 +261,11 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
if (rmesa) {
int i;
- radeon_firevertices(&rmesa->radeon);
+ struct radeon_bo *bo;
+ bo = !t->mt ? t->bo : t->mt->bo;
+ if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->radeon.cmdbuf.cs)) {
+ radeon_firevertices(&rmesa->radeon);
+ }
for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
if (rmesa->hw.textures[i] == t)
@@ -280,10 +277,8 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
t->bo = NULL;
}
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = 0;
- }
+ radeon_miptree_unreference(&t->mt);
+
_mesa_delete_texture_object(ctx, texObj);
}
diff --git a/r300/r300_texstate.c b/r300/r300_texstate.c
index f030451..bbe8b1e 100644
--- a/r300/r300_texstate.c
+++ b/r300/r300_texstate.c
@@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/imports.h"
#include "main/context.h"
#include "main/macros.h"
-#include "main/texformat.h"
#include "main/teximage.h"
#include "main/texobj.h"
#include "main/enums.h"
@@ -84,6 +83,7 @@ static const struct tx_table {
_ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)),
_ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)),
#endif
+ _ASSIGN(XRGB8888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)),
_ASSIGN(RGB888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)),
_ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
_ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
@@ -138,9 +138,9 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X16),
},
{
- R300_EASY_TX_FORMAT(X, X, X, ONE, X24_Y8),
- R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8),
- R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X24_Y8),
+ R300_EASY_TX_FORMAT(Y, Y, Y, ONE, X24_Y8),
+ R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8),
+ R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, Y, X24_Y8),
},
{
R300_EASY_TX_FORMAT(X, X, X, ONE, X32),
@@ -156,11 +156,11 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
t = radeon_tex_obj(tObj);
- switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
+ switch (tObj->Image[0][tObj->BaseLevel]->TexFormat) {
case MESA_FORMAT_Z16:
format = formats[0];
break;
- case MESA_FORMAT_Z24_S8:
+ case MESA_FORMAT_S8_Z24:
format = formats[1];
break;
case MESA_FORMAT_Z32:
@@ -203,19 +203,17 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
{
const struct gl_texture_image *firstImage;
- int firstlevel = t->mt ? t->mt->firstLevel : 0;
-
- firstImage = t->base.Image[0][firstlevel];
+ firstImage = t->base.Image[0][t->minLod];
if (!t->image_override
- && VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
- if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
+ && VALID_FORMAT(firstImage->TexFormat)) {
+ if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
r300SetDepthTexMode(&t->base);
} else {
- t->pp_txformat = tx_table[firstImage->TexFormat->MesaFormat].format;
+ t->pp_txformat = tx_table[firstImage->TexFormat].format;
}
- t->pp_txfilter |= tx_table[firstImage->TexFormat->MesaFormat].filter;
+ t->pp_txfilter |= tx_table[firstImage->TexFormat].filter;
} else if (!t->image_override) {
_mesa_problem(NULL, "unexpected texture format in %s",
__FUNCTION__);
@@ -225,10 +223,10 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
if (t->image_override && t->bo)
return;
- t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
- | ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)
- | ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)
- | ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT));
+ t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)))
+ | ((R300_TX_HEIGHTMASK_MASK & ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)))
+ | ((R300_TX_DEPTHMASK_MASK & ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)))
+ | ((R300_TX_MAX_MIP_LEVEL_MASK & ((t->maxLod - t->minLod) << R300_TX_MAX_MIP_LEVEL_SHIFT))));
t->tile_bits = 0;
@@ -239,7 +237,7 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
- unsigned int align = (64 / t->mt->bpp) - 1;
+ unsigned int align = (64 / _mesa_get_format_bytes(firstImage->TexFormat)) - 1;
t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
if (!t->image_override)
t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1;
@@ -248,8 +246,12 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
if (firstImage->Width > 2048)
t->pp_txpitch |= R500_TXWIDTH_BIT11;
+ else
+ t->pp_txpitch &= ~R500_TXWIDTH_BIT11;
if (firstImage->Height > 2048)
t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+ else
+ t->pp_txpitch &= ~R500_TXHEIGHT_BIT11;
}
}
@@ -434,20 +436,13 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
radeon_bo_unref(rImage->bo);
rImage->bo = NULL;
}
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = NULL;
- }
- if (rImage->mt) {
- radeon_miptree_unreference(rImage->mt);
- rImage->mt = NULL;
- }
+
+ radeon_miptree_unreference(&t->mt);
+ radeon_miptree_unreference(&rImage->mt);
+
_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
texImage->RowStride = rb->pitch / rb->cpp;
- texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
- internalFormat,
- type, format, 0);
rImage->bo = rb->bo;
radeon_bo_ref(rImage->bo);
t->bo = rb->bo;
@@ -479,16 +474,20 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
break;
}
pitch_val--;
- t->pp_txsize = ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT) |
- ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT);
+ t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT)))
+ | ((R300_TX_HEIGHTMASK_MASK & ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT))));
t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
t->pp_txpitch |= pitch_val;
if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
if (rb->base.Width > 2048)
t->pp_txpitch |= R500_TXWIDTH_BIT11;
+ else
+ t->pp_txpitch &= ~R500_TXWIDTH_BIT11;
if (rb->base.Height > 2048)
t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+ else
+ t->pp_txpitch &= ~R500_TXHEIGHT_BIT11;
}
t->validated = GL_TRUE;
_mesa_unlock_texture(radeon->glCtx, texObj);
diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c
index 2f7b67c..c2f96af 100644
--- a/r300/r300_vertprog.c
+++ b/r300/r300_vertprog.c
@@ -41,7 +41,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "tnl/tnl.h"
#include "compiler/radeon_compiler.h"
-#include "compiler/radeon_nqssadce.h"
+#include "radeon_mesa_to_rc.h"
#include "r300_context.h"
#include "r300_fragprog_common.h"
#include "r300_state.h"
@@ -62,12 +62,6 @@ static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_progra
}
}
- if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) {
- /* Should have checked this earlier... */
- fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
- _mesa_exit(-1);
- }
-
for(i = 0; i < vp->code.constants.Count; ++i) {
const float * src = 0;
const struct rc_constant * constant = &vp->code.constants.Constants[i];
@@ -217,20 +211,20 @@ static void initialize_NV_registers(struct radeon_compiler * compiler)
for(reg = 0; reg < 12; ++reg) {
inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
- inst->I.Opcode = OPCODE_MOV;
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = reg;
- inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_0000;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = reg;
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
}
inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
- inst->I.Opcode = OPCODE_ARL;
- inst->I.DstReg.File = PROGRAM_ADDRESS;
- inst->I.DstReg.Index = 0;
- inst->I.DstReg.WriteMask = WRITEMASK_X;
- inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_0000;
+ inst->U.I.Opcode = RC_OPCODE_ARL;
+ inst->U.I.DstReg.File = RC_FILE_ADDRESS;
+ inst->U.I.DstReg.Index = 0;
+ inst->U.I.DstReg.WriteMask = WRITEMASK_X;
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
}
static struct r300_vertex_program *build_program(GLcontext *ctx,
@@ -261,7 +255,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
_mesa_insert_mvp_code(ctx, vp->Base);
}
- rc_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
+ radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
if (mesa_vp->IsNVProgram)
initialize_NV_registers(&compiler.Base);
@@ -281,6 +275,11 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
}
r3xx_compile_vertex_program(&compiler);
+
+ if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) {
+ rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n");
+ }
+
vp->error = compiler.Base.Error;
vp->Base->Base.InputsRead = vp->code.InputsRead;
@@ -334,7 +333,6 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
#define bump_vpu_count(ptr, new_count) do { \
drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
int _nc=(new_count)/4; \
- assert(_nc < 256); \
if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
} while(0)
diff --git a/r300/radeon_context.h b/r300/radeon_context.h
index 250570f..da4812d 100644
--- a/r300/radeon_context.h
+++ b/r300/radeon_context.h
@@ -51,26 +51,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_screen.h"
-#if R200_MERGED
-extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
-
-#define FALLBACK( radeon, bit, mode ) do { \
- if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \
- __FUNCTION__, bit, mode ); \
- radeonFallback( (radeon)->glCtx, bit, mode ); \
-} while (0)
-#else
#define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__);
-#endif
/* TCL fallbacks */
extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
-#if R200_MERGED
-#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode )
-#else
#define TCL_FALLBACK( ctx, bit, mode ) ;
-#endif
#endif /* __RADEON_CONTEXT_H__ */
diff --git a/r300/radeon_mesa_to_rc.c b/r300/radeon_mesa_to_rc.c
new file mode 100644
index 0000000..9f9dec8
--- /dev/null
+++ b/r300/radeon_mesa_to_rc.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_mesa_to_rc.h"
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+
+#include "compiler/radeon_compiler.h"
+#include "compiler/radeon_program.h"
+
+
+static rc_opcode translate_opcode(gl_inst_opcode opcode)
+{
+ switch(opcode) {
+ case OPCODE_NOP: return RC_OPCODE_NOP;
+ case OPCODE_ABS: return RC_OPCODE_ABS;
+ case OPCODE_ADD: return RC_OPCODE_ADD;
+ case OPCODE_ARL: return RC_OPCODE_ARL;
+ case OPCODE_CMP: return RC_OPCODE_CMP;
+ case OPCODE_COS: return RC_OPCODE_COS;
+ case OPCODE_DDX: return RC_OPCODE_DDX;
+ case OPCODE_DDY: return RC_OPCODE_DDY;
+ case OPCODE_DP3: return RC_OPCODE_DP3;
+ case OPCODE_DP4: return RC_OPCODE_DP4;
+ case OPCODE_DPH: return RC_OPCODE_DPH;
+ case OPCODE_DST: return RC_OPCODE_DST;
+ case OPCODE_EX2: return RC_OPCODE_EX2;
+ case OPCODE_EXP: return RC_OPCODE_EXP;
+ case OPCODE_FLR: return RC_OPCODE_FLR;
+ case OPCODE_FRC: return RC_OPCODE_FRC;
+ case OPCODE_KIL: return RC_OPCODE_KIL;
+ case OPCODE_LG2: return RC_OPCODE_LG2;
+ case OPCODE_LIT: return RC_OPCODE_LIT;
+ case OPCODE_LOG: return RC_OPCODE_LOG;
+ case OPCODE_LRP: return RC_OPCODE_LRP;
+ case OPCODE_MAD: return RC_OPCODE_MAD;
+ case OPCODE_MAX: return RC_OPCODE_MAX;
+ case OPCODE_MIN: return RC_OPCODE_MIN;
+ case OPCODE_MOV: return RC_OPCODE_MOV;
+ case OPCODE_MUL: return RC_OPCODE_MUL;
+ case OPCODE_POW: return RC_OPCODE_POW;
+ case OPCODE_RCP: return RC_OPCODE_RCP;
+ case OPCODE_RSQ: return RC_OPCODE_RSQ;
+ case OPCODE_SCS: return RC_OPCODE_SCS;
+ case OPCODE_SEQ: return RC_OPCODE_SEQ;
+ case OPCODE_SFL: return RC_OPCODE_SFL;
+ case OPCODE_SGE: return RC_OPCODE_SGE;
+ case OPCODE_SGT: return RC_OPCODE_SGT;
+ case OPCODE_SIN: return RC_OPCODE_SIN;
+ case OPCODE_SLE: return RC_OPCODE_SLE;
+ case OPCODE_SLT: return RC_OPCODE_SLT;
+ case OPCODE_SNE: return RC_OPCODE_SNE;
+ case OPCODE_SUB: return RC_OPCODE_SUB;
+ case OPCODE_SWZ: return RC_OPCODE_SWZ;
+ case OPCODE_TEX: return RC_OPCODE_TEX;
+ case OPCODE_TXB: return RC_OPCODE_TXB;
+ case OPCODE_TXD: return RC_OPCODE_TXD;
+ case OPCODE_TXL: return RC_OPCODE_TXL;
+ case OPCODE_TXP: return RC_OPCODE_TXP;
+ case OPCODE_XPD: return RC_OPCODE_XPD;
+ default: return RC_OPCODE_ILLEGAL_OPCODE;
+ }
+}
+
+static rc_saturate_mode translate_saturate(unsigned int saturate)
+{
+ switch(saturate) {
+ default:
+ case SATURATE_OFF: return RC_SATURATE_NONE;
+ case SATURATE_ZERO_ONE: return RC_SATURATE_ZERO_ONE;
+ }
+}
+
+static rc_register_file translate_register_file(unsigned int file)
+{
+ switch(file) {
+ case PROGRAM_TEMPORARY: return RC_FILE_TEMPORARY;
+ case PROGRAM_INPUT: return RC_FILE_INPUT;
+ case PROGRAM_OUTPUT: return RC_FILE_OUTPUT;
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM: return RC_FILE_CONSTANT;
+ case PROGRAM_ADDRESS: return RC_FILE_ADDRESS;
+ default: return RC_FILE_NONE;
+ }
+}
+
+static void translate_srcreg(struct rc_src_register * dest, struct prog_src_register * src)
+{
+ dest->File = translate_register_file(src->File);
+ dest->Index = src->Index;
+ dest->RelAddr = src->RelAddr;
+ dest->Swizzle = src->Swizzle;
+ dest->Abs = src->Abs;
+ dest->Negate = src->Negate;
+}
+
+static void translate_dstreg(struct rc_dst_register * dest, struct prog_dst_register * src)
+{
+ dest->File = translate_register_file(src->File);
+ dest->Index = src->Index;
+ dest->RelAddr = src->RelAddr;
+ dest->WriteMask = src->WriteMask;
+}
+
+static rc_texture_target translate_tex_target(gl_texture_index target)
+{
+ switch(target) {
+ case TEXTURE_2D_ARRAY_INDEX: return RC_TEXTURE_2D_ARRAY;
+ case TEXTURE_1D_ARRAY_INDEX: return RC_TEXTURE_1D_ARRAY;
+ case TEXTURE_CUBE_INDEX: return RC_TEXTURE_CUBE;
+ case TEXTURE_3D_INDEX: return RC_TEXTURE_3D;
+ case TEXTURE_RECT_INDEX: return RC_TEXTURE_RECT;
+ default:
+ case TEXTURE_2D_INDEX: return RC_TEXTURE_2D;
+ case TEXTURE_1D_INDEX: return RC_TEXTURE_1D;
+ }
+}
+
+static void translate_instruction(struct radeon_compiler * c,
+ struct rc_instruction * dest, struct prog_instruction * src)
+{
+ const struct rc_opcode_info * opcode;
+ unsigned int i;
+
+ dest->U.I.Opcode = translate_opcode(src->Opcode);
+ if (dest->U.I.Opcode == RC_OPCODE_ILLEGAL_OPCODE) {
+ rc_error(c, "Unsupported opcode %i\n", src->Opcode);
+ return;
+ }
+ dest->U.I.SaturateMode = translate_saturate(src->SaturateMode);
+
+ opcode = rc_get_opcode_info(dest->U.I.Opcode);
+
+ for(i = 0; i < opcode->NumSrcRegs; ++i)
+ translate_srcreg(&dest->U.I.SrcReg[i], &src->SrcReg[i]);
+
+ if (opcode->HasDstReg)
+ translate_dstreg(&dest->U.I.DstReg, &src->DstReg);
+
+ if (opcode->HasTexture) {
+ dest->U.I.TexSrcUnit = src->TexSrcUnit;
+ dest->U.I.TexSrcTarget = translate_tex_target(src->TexSrcTarget);
+ dest->U.I.TexShadow = src->TexShadow;
+ }
+}
+
+void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
+{
+ struct prog_instruction *source;
+ unsigned int i;
+
+ for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) {
+ struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ translate_instruction(c, dest, source);
+ }
+
+ c->Program.ShadowSamplers = program->ShadowSamplers;
+ c->Program.InputsRead = program->InputsRead;
+ c->Program.OutputsWritten = program->OutputsWritten;
+
+ int isNVProgram = 0;
+
+ if (program->Target == GL_VERTEX_PROGRAM_ARB) {
+ struct gl_vertex_program * vp = (struct gl_vertex_program *) program;
+ isNVProgram = vp->IsNVProgram;
+ }
+
+ if (isNVProgram) {
+ /* NV_vertex_program has a fixed-sized constant environment.
+ * This could be handled more efficiently for programs that
+ * do not use relative addressing.
+ */
+ for(i = 0; i < 96; ++i) {
+ struct rc_constant constant;
+
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
+ constant.u.External = i;
+
+ rc_constants_add(&c->Program.Constants, &constant);
+ }
+ } else {
+ for(i = 0; i < program->Parameters->NumParameters; ++i) {
+ struct rc_constant constant;
+
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
+ constant.u.External = i;
+
+ rc_constants_add(&c->Program.Constants, &constant);
+ }
+ }
+}
diff --git a/r300/radeon_mesa_to_rc.h b/r300/radeon_mesa_to_rc.h
new file mode 100644
index 0000000..9511a04
--- /dev/null
+++ b/r300/radeon_mesa_to_rc.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_MESA_TO_RC_H
+#define RADEON_MESA_TO_RC_H
+
+struct gl_program;
+struct radeon_compiler;
+
+void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+
+#endif /* RADEON_MESA_TO_RC_H */
diff --git a/r600/Makefile.am b/r600/Makefile.am
index 6b2091e..c2ce9c8 100644
--- a/r600/Makefile.am
+++ b/r600/Makefile.am
@@ -1,7 +1,6 @@
AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
-R600_CFLAGS = -DCOMPILE_R600 -DR200_MERGED=0 -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R600
-R600_CFLAGS += -I../radeon -I../radeon/server
+R600_CFLAGS = -DRADEON_R600 -I../radeon -I../radeon/server
r600_dri_la_LTLIBRARIES = r600_dri.la
r600_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(R600_CFLAGS)
@@ -11,6 +10,7 @@ r600_dri_ladir = @libdir@/dri
r600_dri_la_SOURCES = \
../radeon/radeon_bo_legacy.c \
../radeon/radeon_common_context.c \
+ ../radeon/radeon_buffer_objects.c \
../radeon/radeon_common.c \
../radeon/radeon_cs_legacy.c \
../radeon/radeon_dma.c \
@@ -44,5 +44,7 @@ if HAVE_LIBDRM_RADEON
r600_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
r600_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
r600_dri_la_SOURCES += \
- ../radeon/radeon_cs_space_drm.c
+ ../radeon/radeon_cs_space_drm.c \
+ ../radeon/radeon_bo.c \
+ ../radeon/radeon_cs.c
endif
diff --git a/r600/r600_cmdbuf.c b/r600/r600_cmdbuf.c
index 3cfe03a..370bb04 100644
--- a/r600/r600_cmdbuf.c
+++ b/r600/r600_cmdbuf.c
@@ -52,29 +52,49 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_mipmap_tree.h"
#include "radeon_reg.h"
+#ifdef HAVE_LIBDRM_RADEON
+#include "radeon_cs_int.h"
+#else
+#include "radeon_cs_int_drm.h"
+#endif
+struct r600_cs_manager_legacy
+{
+ struct radeon_cs_manager base;
+ struct radeon_context *ctx;
+ /* hack for scratch stuff */
+ uint32_t pending_age;
+ uint32_t pending_count;
+};
+
+struct r600_cs_reloc_legacy {
+ struct radeon_cs_reloc base;
+ uint32_t cindices;
+ uint32_t *indices;
+ uint32_t *reloc_indices;
+};
-static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm,
- uint32_t ndw)
+static struct radeon_cs_int *r600_cs_create(struct radeon_cs_manager *csm,
+ uint32_t ndw)
{
- struct radeon_cs *cs;
+ struct radeon_cs_int *csi;
- cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
- if (cs == NULL) {
+ csi = (struct radeon_cs_int*)calloc(1, sizeof(struct radeon_cs_int));
+ if (csi == NULL) {
return NULL;
}
- cs->csm = csm;
- cs->ndw = (ndw + 0x3FF) & (~0x3FF);
- cs->packets = (uint32_t*)malloc(4*cs->ndw);
- if (cs->packets == NULL) {
- free(cs);
+ csi->csm = csm;
+ csi->ndw = (ndw + 0x3FF) & (~0x3FF);
+ csi->packets = (uint32_t*)malloc(4*csi->ndw);
+ if (csi->packets == NULL) {
+ free(csi);
return NULL;
}
- cs->relocs_total_size = 0;
- return cs;
+ csi->relocs_total_size = 0;
+ return csi;
}
-static int r600_cs_write_reloc(struct radeon_cs *cs,
+static int r600_cs_write_reloc(struct radeon_cs_int *csi,
struct radeon_bo *bo,
uint32_t read_domain,
uint32_t write_domain,
@@ -83,7 +103,7 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
struct r600_cs_reloc_legacy *relocs;
int i;
- relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
+ relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
/* check domains */
if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
/* in one CS a bo can only be in read or write domain but not
@@ -98,7 +118,7 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
return -EINVAL;
}
/* check if bo is already referenced */
- for(i = 0; i < cs->crelocs; i++) {
+ for(i = 0; i < csi->crelocs; i++) {
uint32_t *indices;
uint32_t *reloc_indices;
@@ -129,109 +149,108 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
}
relocs[i].indices = indices;
relocs[i].reloc_indices = reloc_indices;
- relocs[i].indices[relocs[i].cindices - 1] = cs->cdw;
- relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->cdw;
- cs->section_cdw += 2;
- cs->cdw += 2;
+ relocs[i].indices[relocs[i].cindices - 1] = csi->cdw;
+ relocs[i].reloc_indices[relocs[i].cindices - 1] = csi->cdw;
+ csi->section_cdw += 2;
+ csi->cdw += 2;
return 0;
}
}
/* add bo to reloc */
relocs = (struct r600_cs_reloc_legacy*)
- realloc(cs->relocs,
- sizeof(struct r600_cs_reloc_legacy) * (cs->crelocs + 1));
+ realloc(csi->relocs,
+ sizeof(struct r600_cs_reloc_legacy) * (csi->crelocs + 1));
if (relocs == NULL) {
return -ENOMEM;
}
- cs->relocs = relocs;
- relocs[cs->crelocs].base.bo = bo;
- relocs[cs->crelocs].base.read_domain = read_domain;
- relocs[cs->crelocs].base.write_domain = write_domain;
- relocs[cs->crelocs].base.flags = flags;
- relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
- relocs[cs->crelocs].reloc_indices = (uint32_t*)malloc(4);
- if ( (relocs[cs->crelocs].indices == NULL) || (relocs[cs->crelocs].reloc_indices == NULL) )
+ csi->relocs = relocs;
+ relocs[csi->crelocs].base.bo = bo;
+ relocs[csi->crelocs].base.read_domain = read_domain;
+ relocs[csi->crelocs].base.write_domain = write_domain;
+ relocs[csi->crelocs].base.flags = flags;
+ relocs[csi->crelocs].indices = (uint32_t*)malloc(4);
+ relocs[csi->crelocs].reloc_indices = (uint32_t*)malloc(4);
+ if ( (relocs[csi->crelocs].indices == NULL) || (relocs[csi->crelocs].reloc_indices == NULL) )
{
return -ENOMEM;
}
- relocs[cs->crelocs].indices[0] = cs->cdw;
- relocs[cs->crelocs].reloc_indices[0] = cs->cdw;
- cs->section_cdw += 2;
- cs->cdw += 2;
- relocs[cs->crelocs].cindices = 1;
- cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
- cs->crelocs++;
+ relocs[csi->crelocs].indices[0] = csi->cdw;
+ relocs[csi->crelocs].reloc_indices[0] = csi->cdw;
+ csi->section_cdw += 2;
+ csi->cdw += 2;
+ relocs[csi->crelocs].cindices = 1;
+ csi->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
+ csi->crelocs++;
radeon_bo_ref(bo);
return 0;
}
-static int r600_cs_begin(struct radeon_cs *cs,
+static int r600_cs_begin(struct radeon_cs_int *csi,
uint32_t ndw,
const char *file,
const char *func,
int line)
{
- if (cs->section) {
+ if (csi->section_ndw) {
fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
- cs->section_file, cs->section_func, cs->section_line);
+ csi->section_file, csi->section_func, csi->section_line);
fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
file, func, line);
return -EPIPE;
}
- cs->section = 1;
- cs->section_ndw = ndw;
- cs->section_cdw = 0;
- cs->section_file = file;
- cs->section_func = func;
- cs->section_line = line;
+ csi->section_ndw = ndw;
+ csi->section_cdw = 0;
+ csi->section_file = file;
+ csi->section_func = func;
+ csi->section_line = line;
- if (cs->cdw + ndw > cs->ndw) {
+ if (csi->cdw + ndw > csi->ndw) {
uint32_t tmp, *ptr;
int num = (ndw > 0x400) ? ndw : 0x400;
- tmp = (cs->cdw + num + 0x3FF) & (~0x3FF);
- ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
+ tmp = (csi->cdw + num + 0x3FF) & (~0x3FF);
+ ptr = (uint32_t*)realloc(csi->packets, 4 * tmp);
if (ptr == NULL) {
return -ENOMEM;
}
- cs->packets = ptr;
- cs->ndw = tmp;
+ csi->packets = ptr;
+ csi->ndw = tmp;
}
return 0;
}
-static int r600_cs_end(struct radeon_cs *cs,
+static int r600_cs_end(struct radeon_cs_int *csi,
const char *file,
const char *func,
int line)
{
- if (!cs->section) {
+ if (!csi->section_ndw) {
fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
file, func, line);
return -EPIPE;
}
- cs->section = 0;
- if ( cs->section_ndw != cs->section_cdw ) {
+ if ( csi->section_ndw != csi->section_cdw ) {
fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
- cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
- fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n",
- cs->section_ndw, cs->cdw, cs->section_cdw);
+ csi->section_file, csi->section_func, csi->section_line, csi->section_ndw, csi->section_cdw);
+ fprintf(stderr, "csi->section_ndw = %d, csi->cdw = %d, csi->section_cdw = %d \n",
+ csi->section_ndw, csi->cdw, csi->section_cdw);
fprintf(stderr, "CS section end at (%s,%s,%d)\n",
file, func, line);
return -EPIPE;
}
+ csi->section_ndw = 0;
- if (cs->cdw > cs->ndw) {
+ if (csi->cdw > csi->ndw) {
fprintf(stderr, "CS section overflow at (%s,%s,%d) cdw %d ndw %d\n",
- cs->section_file, cs->section_func, cs->section_line,cs->cdw,cs->ndw);
+ csi->section_file, csi->section_func, csi->section_line,csi->cdw,csi->ndw);
fprintf(stderr, "CS section end at (%s,%s,%d)\n",
file, func, line);
assert(0);
@@ -240,21 +259,21 @@ static int r600_cs_end(struct radeon_cs *cs,
return 0;
}
-static int r600_cs_process_relocs(struct radeon_cs *cs,
+static int r600_cs_process_relocs(struct radeon_cs_int *csi,
uint32_t * reloc_chunk,
uint32_t * length_dw_reloc_chunk)
{
- struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
struct r600_cs_reloc_legacy *relocs;
int i, j, r;
uint32_t offset_dw = 0;
- csm = (struct r600_cs_manager_legacy*)cs->csm;
- relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
+ csm = (struct r600_cs_manager_legacy*)csi->csm;
+ relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
restart:
- for (i = 0; i < cs->crelocs; i++) {
- uint32_t soffset, eoffset, asicoffset;
+ for (i = 0; i < csi->crelocs; i++) {
+ uint32_t soffset, eoffset;
r = radeon_bo_legacy_validate(relocs[i].base.bo,
&soffset, &eoffset);
@@ -262,32 +281,20 @@ restart:
goto restart;
}
if (r) {
- fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
+ fprintf(stderr, "invalid bo(%p) [0x%08X, 0x%08X]\n",
relocs[i].base.bo, soffset, eoffset);
return r;
}
- asicoffset = soffset;
for (j = 0; j < relocs[i].cindices; j++) {
- if (asicoffset >= eoffset) {
- /* radeon_bo_debug(relocs[i].base.bo, 12); */
- fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
- relocs[i].base.bo, soffset, eoffset);
- fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
- relocs[i].base.bo,
- cs->packets[relocs[i].indices[j]],
- eoffset);
- exit(0);
- return -EINVAL;
- }
/* pkt3 nop header in ib chunk */
- cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
+ csi->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
/* reloc index in ib chunk */
- cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
+ csi->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
}
/* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */
- reloc_chunk[offset_dw] = asicoffset;
+ reloc_chunk[offset_dw] = soffset;
reloc_chunk[offset_dw + 3] = 0;
offset_dw += 4;
@@ -298,14 +305,14 @@ restart:
return 0;
}
-static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
+static int r600_cs_set_age(struct radeon_cs_int *csi) /* -------------- */
{
- struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
struct r600_cs_reloc_legacy *relocs;
int i;
- relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
- for (i = 0; i < cs->crelocs; i++) {
+ relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
+ for (i = 0; i < csi->crelocs; i++) {
radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
radeon_bo_unref(relocs[i].base.bo);
}
@@ -313,21 +320,21 @@ static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
}
#if 0
-static void dump_cmdbuf(struct radeon_cs *cs)
+static void dump_cmdbuf(struct radeon_cs_int *csi)
{
int i;
fprintf(stderr,"--start--\n");
- for (i = 0; i < cs->cdw; i++){
- fprintf(stderr,"0x%08x\n", cs->packets[i]);
+ for (i = 0; i < csi->cdw; i++){
+ fprintf(stderr,"0x%08x\n", csi->packets[i]);
}
fprintf(stderr,"--end--\n");
}
#endif
-static int r600_cs_emit(struct radeon_cs *cs)
+static int r600_cs_emit(struct radeon_cs_int *csi)
{
- struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
struct drm_radeon_cs cs_cmd;
struct drm_radeon_cs_chunk cs_chunk[2];
uint32_t length_dw_reloc_chunk;
@@ -341,9 +348,9 @@ static int r600_cs_emit(struct radeon_cs *cs)
csm->pending_count = 1;
- reloc_chunk = (uint32_t*)calloc(1, cs->crelocs * 4 * 4);
+ reloc_chunk = (uint32_t*)calloc(1, csi->crelocs * 4 * 4);
- r = r600_cs_process_relocs(cs, reloc_chunk, &length_dw_reloc_chunk);
+ r = r600_cs_process_relocs(csi, reloc_chunk, &length_dw_reloc_chunk);
if (r) {
free(reloc_chunk);
return 0;
@@ -351,8 +358,8 @@ static int r600_cs_emit(struct radeon_cs *cs)
/* raw ib chunk */
cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB;
- cs_chunk[0].length_dw = cs->cdw;
- cs_chunk[0].chunk_data = (unsigned long)(cs->packets);
+ cs_chunk[0].length_dw = csi->cdw;
+ cs_chunk[0].chunk_data = (unsigned long)(csi->packets);
/* reloc chaunk */
cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
@@ -370,7 +377,7 @@ static int r600_cs_emit(struct radeon_cs *cs)
do
{
- r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
+ r = drmCommandWriteRead(csi->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
retry++;
} while (r == -EAGAIN && retry < 1000);
@@ -381,11 +388,11 @@ static int r600_cs_emit(struct radeon_cs *cs)
csm->pending_age = cs_cmd.cs_id;
- r600_cs_set_age(cs);
+ r600_cs_set_age(csi);
- cs->csm->read_used = 0;
- cs->csm->vram_write_used = 0;
- cs->csm->gart_write_used = 0;
+ csi->csm->read_used = 0;
+ csi->csm->vram_write_used = 0;
+ csi->csm->gart_write_used = 0;
free(reloc_chunk);
@@ -405,35 +412,34 @@ static void inline r600_cs_free_reloc(void *relocs_p, int crelocs)
}
}
-static int r600_cs_destroy(struct radeon_cs *cs)
+static int r600_cs_destroy(struct radeon_cs_int *csi)
{
- r600_cs_free_reloc(cs->relocs, cs->crelocs);
- free(cs->relocs);
- free(cs->packets);
- free(cs);
+ r600_cs_free_reloc(csi->relocs, csi->crelocs);
+ free(csi->relocs);
+ free(csi->packets);
+ free(csi);
return 0;
}
-static int r600_cs_erase(struct radeon_cs *cs)
+static int r600_cs_erase(struct radeon_cs_int *csi)
{
- r600_cs_free_reloc(cs->relocs, cs->crelocs);
- free(cs->relocs);
- cs->relocs_total_size = 0;
- cs->relocs = NULL;
- cs->crelocs = 0;
- cs->cdw = 0;
- cs->section = 0;
+ r600_cs_free_reloc(csi->relocs, csi->crelocs);
+ free(csi->relocs);
+ csi->relocs_total_size = 0;
+ csi->relocs = NULL;
+ csi->crelocs = 0;
+ csi->cdw = 0;
return 0;
}
-static int r600_cs_need_flush(struct radeon_cs *cs)
+static int r600_cs_need_flush(struct radeon_cs_int *csi)
{
/* this function used to flush when the BO usage got to
* a certain size, now the higher levels handle this better */
return 0;
}
-static void r600_cs_print(struct radeon_cs *cs, FILE *file)
+static void r600_cs_print(struct radeon_cs_int *csi, FILE *file)
{
}
diff --git a/r600/r600_cmdbuf.h b/r600/r600_cmdbuf.h
index eba43d3..dff0009 100644
--- a/r600/r600_cmdbuf.h
+++ b/r600/r600_cmdbuf.h
@@ -118,22 +118,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R600_IT_SET_CTL_CONST 0x00006F00
#define R600_IT_SURFACE_BASE_UPDATE 0x00007300
-struct r600_cs_manager_legacy
-{
- struct radeon_cs_manager base;
- struct radeon_context *ctx;
- /* hack for scratch stuff */
- uint32_t pending_age;
- uint32_t pending_count;
-};
-
-struct r600_cs_reloc_legacy {
- struct radeon_cs_reloc base;
- uint32_t cindices;
- uint32_t *indices;
- uint32_t *reloc_indices;
-};
-
struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
/**
diff --git a/r600/r600_context.c b/r600/r600_context.c
index e0b77d4..dbd2337 100644
--- a/r600/r600_context.c
+++ b/r600/r600_context.c
@@ -59,10 +59,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_debug.h"
#include "r600_context.h"
#include "radeon_common_context.h"
+#include "radeon_buffer_objects.h"
#include "radeon_span.h"
#include "r600_cmdbuf.h"
#include "r600_emit.h"
#include "radeon_bocs_wrapper.h"
+#include "radeon_queryobj.h"
#include "r700_state.h"
#include "r700_ioctl.h"
@@ -72,11 +74,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "utils.h"
#include "xmlpool.h" /* for symbolic values of enum-type options */
-/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */
-int future_hw_tcl_on = 1;
-int hw_tcl_on = 1;
-
#define need_GL_VERSION_2_0
+#define need_GL_ARB_occlusion_query
#define need_GL_ARB_point_parameters
#define need_GL_ARB_vertex_program
#define need_GL_EXT_blend_equation_separate
@@ -85,19 +84,20 @@ int hw_tcl_on = 1;
#define need_GL_EXT_framebuffer_object
#define need_GL_EXT_fog_coord
#define need_GL_EXT_gpu_program_parameters
+#define need_GL_EXT_provoking_vertex
#define need_GL_EXT_secondary_color
#define need_GL_EXT_stencil_two_side
#define need_GL_ATI_separate_stencil
#define need_GL_NV_vertex_program
-#include "extension_helper.h"
-
-extern const struct tnl_pipeline_stage *r700_pipeline[];
+#include "main/remap_helper.h"
-const struct dri_extension card_extensions[] = {
+static const struct dri_extension card_extensions[] = {
/* *INDENT-OFF* */
+ {"GL_ARB_depth_clamp", NULL},
{"GL_ARB_depth_texture", NULL},
{"GL_ARB_fragment_program", NULL},
+ {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions},
{"GL_ARB_multitexture", NULL},
{"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
{"GL_ARB_shadow", NULL},
@@ -117,6 +117,7 @@ const struct dri_extension card_extensions[] = {
{"GL_EXT_packed_depth_stencil", NULL},
{"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
{"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
+ {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions },
{"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
{"GL_EXT_shadow_funcs", NULL},
{"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions},
@@ -128,6 +129,8 @@ const struct dri_extension card_extensions[] = {
{"GL_EXT_texture_lod_bias", NULL},
{"GL_EXT_texture_mirror_clamp", NULL},
{"GL_EXT_texture_rectangle", NULL},
+ {"GL_EXT_vertex_array_bgra", NULL},
+ {"GL_EXT_texture_sRGB", NULL},
{"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions},
{"GL_ATI_texture_env_combine3", NULL},
{"GL_ATI_texture_mirror_once", NULL},
@@ -142,7 +145,7 @@ const struct dri_extension card_extensions[] = {
};
-const struct dri_extension mm_extensions[] = {
+static const struct dri_extension mm_extensions[] = {
{ "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
{ NULL, NULL }
};
@@ -151,21 +154,24 @@ const struct dri_extension mm_extensions[] = {
* The GL 2.0 functions are needed to make display lists work with
* functions added by GL_ATI_separate_stencil.
*/
-const struct dri_extension gl_20_extension[] = {
+static const struct dri_extension gl_20_extension[] = {
{"GL_VERSION_2_0", GL_VERSION_2_0_functions },
};
-
-static void r600RunPipeline(GLcontext * ctx)
-{
- _mesa_lock_context_textures(ctx);
-
- if (ctx->NewState)
- _mesa_update_state_locked(ctx);
-
- _tnl_run_pipeline(ctx);
- _mesa_unlock_context_textures(ctx);
-}
+static const struct tnl_pipeline_stage *r600_pipeline[] = {
+ /* Catch any t&l fallbacks
+ */
+ &_tnl_vertex_transform_stage,
+ &_tnl_normal_transform_stage,
+ &_tnl_lighting_stage,
+ &_tnl_fog_coordinate_stage,
+ &_tnl_texgen_stage,
+ &_tnl_texture_transform_stage,
+ &_tnl_point_attenuation_stage,
+ &_tnl_vertex_program_stage,
+ &_tnl_render_stage,
+ 0,
+};
static void r600_get_lock(radeonContextPtr rmesa)
{
@@ -176,7 +182,7 @@ static void r600_get_lock(radeonContextPtr rmesa)
if (!rmesa->radeonScreen->kernel_mm)
radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
}
-}
+}
static void r600_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
{
@@ -198,6 +204,24 @@ static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
context->radeon.Fallback &= ~bit;
}
+static void r600_emit_query_finish(radeonContextPtr radeon)
+{
+ context_t *context = (context_t*) radeon;
+ BATCH_LOCALS(&context->radeon);
+
+ struct radeon_query_object *query = radeon->query.current;
+
+ BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+ R600_OUT_BATCH(ZPASS_DONE);
+ R600_OUT_BATCH(query->curr_offset + 8); /* hw writes qwords */
+ R600_OUT_BATCH(0x00000000);
+ R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+ assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
+
static void r600_init_vtbl(radeonContextPtr radeon)
{
radeon->vtbl.get_lock = r600_get_lock;
@@ -206,6 +230,101 @@ static void r600_init_vtbl(radeonContextPtr radeon)
radeon->vtbl.swtcl_flush = NULL;
radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms;
radeon->vtbl.fallback = r600_fallback;
+ radeon->vtbl.emit_query_finish = r600_emit_query_finish;
+}
+
+static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
+{
+ context_t *r600 = R700_CONTEXT(ctx);
+
+ ctx->Const.MaxTextureImageUnits =
+ driQueryOptioni(&r600->radeon.optionCache, "texture_image_units");
+ ctx->Const.MaxTextureCoordUnits =
+ driQueryOptioni(&r600->radeon.optionCache, "texture_coord_units");
+ ctx->Const.MaxTextureUnits =
+ MIN2(ctx->Const.MaxTextureImageUnits,
+ ctx->Const.MaxTextureCoordUnits);
+ ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+ ctx->Const.MaxTextureLodBias = 16.0;
+
+ ctx->Const.MaxTextureLevels = 13; /* hw support 14 */
+ ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */
+
+ ctx->Const.MinPointSize = 0x0001 / 8.0;
+ ctx->Const.MinPointSizeAA = 0x0001 / 8.0;
+ ctx->Const.MaxPointSize = 0xffff / 8.0;
+ ctx->Const.MaxPointSizeAA = 0xffff / 8.0;
+
+ ctx->Const.MinLineWidth = 0x0001 / 8.0;
+ ctx->Const.MinLineWidthAA = 0x0001 / 8.0;
+ ctx->Const.MaxLineWidth = 0xffff / 8.0;
+ ctx->Const.MaxLineWidthAA = 0xffff / 8.0;
+
+ ctx->Const.MaxDrawBuffers = 1; /* hw supports 8 */
+
+ /* 256 for reg-based consts, inline consts also supported */
+ ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */
+ ctx->Const.VertexProgram.MaxNativeInstructions = 8192;
+ ctx->Const.VertexProgram.MaxNativeAttribs = 160;
+ ctx->Const.VertexProgram.MaxTemps = 128;
+ ctx->Const.VertexProgram.MaxNativeTemps = 128;
+ ctx->Const.VertexProgram.MaxNativeParameters = 256;
+ ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */
+
+ ctx->Const.FragmentProgram.MaxNativeTemps = 128;
+ ctx->Const.FragmentProgram.MaxNativeAttribs = 32;
+ ctx->Const.FragmentProgram.MaxNativeParameters = 256;
+ ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192;
+ /* 8 per clause on r6xx, 16 on rv670/r7xx */
+ if ((screen->chip_family == CHIP_FAMILY_RV670) ||
+ (screen->chip_family >= CHIP_FAMILY_RV770))
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16;
+ else
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8;
+ ctx->Const.FragmentProgram.MaxNativeInstructions = 8192;
+ ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */
+ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */
+}
+
+static void r600ParseOptions(context_t *r600, radeonScreenPtr screen)
+{
+ /* Parse configuration files.
+ * Do this here so that initialMaxAnisotropy is set before we create
+ * the default textures.
+ */
+ driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache,
+ screen->driScreen->myNum, "r600");
+
+ r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache,
+ "def_max_anisotropy");
+
+}
+
+static void r600InitGLExtensions(GLcontext *ctx)
+{
+ context_t *r600 = R700_CONTEXT(ctx);
+
+ driInitExtensions(ctx, card_extensions, GL_TRUE);
+ if (r600->radeon.radeonScreen->kernel_mm)
+ driInitExtensions(ctx, mm_extensions, GL_FALSE);
+
+ if (driQueryOptionb
+ (&r600->radeon.optionCache, "disable_stencil_two_side"))
+ _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
+
+ if (r600->radeon.glCtx->Mesa_DXTn
+ && !driQueryOptionb(&r600->radeon.optionCache, "disable_s3tc")) {
+ _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+ _mesa_enable_extension(ctx, "GL_S3_s3tc");
+ } else
+ if (driQueryOptionb(&r600->radeon.optionCache, "force_s3tc_enable"))
+ {
+ _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+ }
+
+ /* XXX: RV740 only seems to report results from half of its DBs */
+ if (r600->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV740)
+ _mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
}
/* Create the device specific rendering context.
@@ -231,19 +350,10 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
return GL_FALSE;
}
- if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
- hw_tcl_on = future_hw_tcl_on = 0;
+ r600ParseOptions(r600, screen);
+ r600->radeon.radeonScreen = screen;
r600_init_vtbl(&r600->radeon);
- /* Parse configuration files.
- * Do this here so that initialMaxAnisotropy is set before we create
- * the default textures.
- */
- driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache,
- screen->driScreen->myNum, "r600");
-
- r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache,
- "def_max_anisotropy");
/* Init default driver functions then plug in our R600-specific functions
* (the texture functions are especially important)
@@ -253,7 +363,9 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
r700InitStateFuncs(&functions);
r600InitTextureFuncs(&functions);
r700InitShaderFuncs(&functions);
+ radeonInitQueryObjFunctions(&functions);
r700InitIoctlFuncs(&functions);
+ radeonInitBufferObjectFuncs(&functions);
if (!radeonInitContext(&r600->radeon, &functions,
glVisual, driContextPriv,
@@ -263,44 +375,14 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
return GL_FALSE;
}
- /* Init r600 context data */
- /* Set the maximum texture size small enough that we can guarentee that
- * all texture units can bind a maximal texture and have them both in
- * texturable memory at once.
- */
-
ctx = r600->radeon.glCtx;
- ctx->Const.MaxTextureImageUnits =
- driQueryOptioni(&r600->radeon.optionCache, "texture_image_units");
- ctx->Const.MaxTextureCoordUnits =
- driQueryOptioni(&r600->radeon.optionCache, "texture_coord_units");
- ctx->Const.MaxTextureUnits =
- MIN2(ctx->Const.MaxTextureImageUnits,
- ctx->Const.MaxTextureCoordUnits);
- ctx->Const.MaxTextureMaxAnisotropy = 16.0;
- ctx->Const.MaxTextureLodBias = 16.0;
-
- ctx->Const.MaxTextureLevels = 13; /* hw support 14 */
- ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */
-
- ctx->Const.MinPointSize = 0x0001 / 8.0;
- ctx->Const.MinPointSizeAA = 0x0001 / 8.0;
- ctx->Const.MaxPointSize = 0xffff / 8.0;
- ctx->Const.MaxPointSizeAA = 0xffff / 8.0;
-
- ctx->Const.MinLineWidth = 0x0001 / 8.0;
- ctx->Const.MinLineWidthAA = 0x0001 / 8.0;
- ctx->Const.MaxLineWidth = 0xffff / 8.0;
- ctx->Const.MaxLineWidthAA = 0xffff / 8.0;
+ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
- /* Needs further modifications */
-#if 0
- ctx->Const.MaxArrayLockSize =
- ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
-#endif
+ r600InitConstValues(ctx, screen);
- ctx->Const.MaxDrawBuffers = 1;
+ _mesa_set_mvp_with_dp4( ctx, GL_TRUE );
/* Initialize the software rasterizer and helper modules.
*/
@@ -309,16 +391,12 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
_tnl_CreateContext(ctx);
_swsetup_CreateContext(ctx);
_swsetup_Wakeup(ctx);
- _ae_create_context(ctx);
/* Install the customized pipeline:
*/
_tnl_destroy_pipeline(ctx);
- _tnl_install_pipeline(ctx, r700_pipeline);
-
- /* Try and keep materials and vertices separate:
- */
-/* _tnl_isolate_materials(ctx, GL_TRUE); */
+ _tnl_install_pipeline(ctx, r600_pipeline);
+ TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
/* Configure swrast and TNL to match hardware characteristics:
*/
@@ -327,66 +405,16 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
_tnl_allow_pixel_fog(ctx, GL_FALSE);
_tnl_allow_vertex_fog(ctx, GL_TRUE);
- /* 256 for reg-based consts, inline consts also supported */
- ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */
- ctx->Const.VertexProgram.MaxNativeInstructions = 8192;
- ctx->Const.VertexProgram.MaxNativeAttribs = 160;
- ctx->Const.VertexProgram.MaxTemps = 128;
- ctx->Const.VertexProgram.MaxNativeTemps = 128;
- ctx->Const.VertexProgram.MaxNativeParameters = 256;
- ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */
-
- ctx->Const.FragmentProgram.MaxNativeTemps = 128;
- ctx->Const.FragmentProgram.MaxNativeAttribs = 32;
- ctx->Const.FragmentProgram.MaxNativeParameters = 256;
- ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192;
- /* 8 per clause on r6xx, 16 on rv670/r7xx */
- if ((screen->chip_family == CHIP_FAMILY_RV670) ||
- (screen->chip_family >= CHIP_FAMILY_RV770))
- ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16;
- else
- ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8;
- ctx->Const.FragmentProgram.MaxNativeInstructions = 8192;
- ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */
- ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */
- ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
- ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
-
radeon_init_debug();
- driInitExtensions(ctx, card_extensions, GL_TRUE);
- if (r600->radeon.radeonScreen->kernel_mm)
- driInitExtensions(ctx, mm_extensions, GL_FALSE);
-
- if (driQueryOptionb
- (&r600->radeon.optionCache, "disable_stencil_two_side"))
- _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
-
- if (r600->radeon.glCtx->Mesa_DXTn
- && !driQueryOptionb(&r600->radeon.optionCache, "disable_s3tc")) {
- _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
- _mesa_enable_extension(ctx, "GL_S3_s3tc");
- } else
- if (driQueryOptionb(&r600->radeon.optionCache, "force_s3tc_enable"))
- {
- _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
- }
+ r700InitDraw(ctx);
radeon_fbo_init(&r600->radeon);
radeonInitSpanFuncs( ctx );
-
r600InitCmdBuf(r600);
-
r700InitState(r600->radeon.glCtx);
- TNL_CONTEXT(ctx)->Driver.RunPipeline = r600RunPipeline;
-
- if (driQueryOptionb(&r600->radeon.optionCache, "no_rast")) {
- radeon_warning("disabling 3D acceleration\n");
-#if R200_MERGED
- FALLBACK(&r600->radeon, RADEON_FALLBACK_DISABLE, 1);
-#endif
- }
+ r600InitGLExtensions(ctx);
return GL_TRUE;
}
diff --git a/r600/r600_context.h b/r600/r600_context.h
index 9397ecd..394fd75 100644
--- a/r600/r600_context.h
+++ b/r600/r600_context.h
@@ -58,29 +58,6 @@ typedef struct r600_context context_t;
#include "main/mm.h"
-/************ DMA BUFFERS **************/
-
-/* The blit width for texture uploads
- */
-#define R600_BLIT_WIDTH_BYTES 1024
-#define R600_MAX_TEXTURE_UNITS 8
-
-struct r600_texture_state {
- int tc_count; /* number of incoming texture coordinates from VAP */
-};
-
-/* Perhaps more if we store programs in vmem? */
-/* drm_r600_cmd_header_t->vpu->count is unsigned char */
-#define VSF_MAX_FRAGMENT_LENGTH (255*4)
-
-/* Can be tested with colormat currently. */
-#define VSF_MAX_FRAGMENT_TEMPS (14)
-
-#define STATE_R600_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
-#define STATE_R600_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
-
-extern int hw_tcl_on;
-
#define COLOR_IS_RGBA
#define TAG(x) r600##x
#include "tnl_dd/t_dd_vertex.h"
@@ -126,6 +103,32 @@ struct r600_hw_state {
struct radeon_state_atom tx_brdr_clr;
};
+typedef struct StreamDesc
+{
+ GLint size; //number of data element
+ GLenum type; //data element type
+ GLsizei stride;
+
+ struct radeon_bo *bo;
+ GLint bo_offset;
+
+ GLuint dwords;
+ GLuint dst_loc;
+ GLuint _signed;
+ GLboolean normalize;
+ GLboolean is_named_bo;
+ GLubyte element;
+} StreamDesc;
+
+typedef struct r700_index_buffer
+{
+ struct radeon_bo *bo;
+ int bo_offset;
+
+ GLboolean is_32bit;
+ GLuint count;
+} r700_index_buffer;
+
/**
* \brief R600 context structure.
*/
@@ -141,9 +144,9 @@ struct r600_context {
/* Vertex buffers
*/
- GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
- GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
-
+ GLint nNumActiveAos;
+ StreamDesc stream_desc[VERT_ATTRIB_MAX];
+ struct r700_index_buffer ind_buf;
};
#define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx))
@@ -174,16 +177,13 @@ extern GLboolean r700SyncSurf(context_t *context,
uint32_t write_domain,
uint32_t sync_type);
-extern void r700SetupStreams(GLcontext * ctx);
extern void r700Start3D(context_t *context);
extern void r600InitAtoms(context_t *context);
+extern void r700InitDraw(GLcontext *ctx);
#define RADEON_D_CAPTURE 0
#define RADEON_D_PLAYBACK 1
#define RADEON_D_PLAYBACK_RAW 2
#define RADEON_D_T 3
-#define r600PackFloat32 radeonPackFloat32
-#define r600PackFloat24 radeonPackFloat24
-
#endif /* __R600_CONTEXT_H__ */
diff --git a/r600/r600_reg_r6xx.h b/r600/r600_reg_r6xx.h
index f7702c4..74af7b4 100644
--- a/r600/r600_reg_r6xx.h
+++ b/r600/r600_reg_r6xx.h
@@ -415,11 +415,11 @@ enum {
ALPHA_TO_MASK_ENABLE = 1 << 0,
ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8,
ALPHA_TO_MASK_OFFSET0_shift = 8,
- ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 10,
ALPHA_TO_MASK_OFFSET1_shift = 10,
- ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 12,
ALPHA_TO_MASK_OFFSET2_shift = 12,
- ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 14,
ALPHA_TO_MASK_OFFSET3_shift = 14,
// SQ_VTX_CONSTANT_WORD2_0 = 0x00038008,
diff --git a/r600/r600_reg_r7xx.h b/r600/r600_reg_r7xx.h
index e5c01c8..eb169bd 100644
--- a/r600/r600_reg_r7xx.h
+++ b/r600/r600_reg_r7xx.h
@@ -143,6 +143,8 @@ enum {
// SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c,
R7xx_TRUNCATE_COORD_bit = 1 << 9,
R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10,
+// DB_RENDER_CONTROL = 0x00028d0c,
+ PERFECT_ZPASS_COUNTS_bit = 1 << 15,
} ;
diff --git a/r600/r600_tex.c b/r600/r600_tex.c
index d105b90..9d83a64 100644
--- a/r600/r600_tex.c
+++ b/r600/r600_tex.c
@@ -40,7 +40,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/image.h"
#include "main/mipmap.h"
#include "main/simple_list.h"
-#include "main/texformat.h"
#include "main/texstore.h"
#include "main/teximage.h"
#include "main/texobj.h"
@@ -286,6 +285,7 @@ static void r600TexParameter(GLcontext * ctx, GLenum target,
GLenum pname, const GLfloat * params)
{
radeonTexObj* t = radeon_tex_obj(texObj);
+ GLenum baseFormat;
radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_VERBOSE,
"%s( %s )\n", __FUNCTION__,
@@ -312,23 +312,15 @@ static void r600TexParameter(GLcontext * ctx, GLenum target,
case GL_TEXTURE_MAX_LEVEL:
case GL_TEXTURE_MIN_LOD:
case GL_TEXTURE_MAX_LOD:
- /* This isn't the most efficient solution but there doesn't appear to
- * be a nice alternative. Since there's no LOD clamping,
- * we just have to rely on loading the right subset of mipmap levels
- * to simulate a clamped LOD.
- */
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = 0;
- t->validated = GL_FALSE;
- }
+ t->validated = GL_FALSE;
break;
case GL_DEPTH_TEXTURE_MODE:
if (!texObj->Image[0][texObj->BaseLevel])
return;
- if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat
- == GL_DEPTH_COMPONENT) {
+ baseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat;
+ if (baseFormat == GL_DEPTH_COMPONENT ||
+ baseFormat == GL_DEPTH_STENCIL) {
r600SetDepthTexMode(texObj);
break;
} else {
@@ -368,10 +360,8 @@ static void r600DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
t->bo = NULL;
}
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = 0;
- }
+ radeon_miptree_unreference(&t->mt);
+
_mesa_delete_texture_object(ctx, texObj);
}
diff --git a/r600/r600_texstate.c b/r600/r600_texstate.c
index bcb8d7c..4ec315b 100644
--- a/r600/r600_texstate.c
+++ b/r600/r600_texstate.c
@@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/imports.h"
#include "main/context.h"
#include "main/macros.h"
-#include "main/texformat.h"
#include "main/teximage.h"
#include "main/texobj.h"
#include "main/enums.h"
@@ -78,7 +77,7 @@ void r600UpdateTextureState(GLcontext * ctx)
}
}
-static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_format)
+static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa_format)
{
radeonTexObj *t = radeon_tex_obj(tObj);
@@ -87,9 +86,19 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+
switch (mesa_format) /* This is mesa format. */
{
case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_SIGNED_RGBA8888:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
@@ -101,8 +110,19 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) {
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+ }
break;
case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_SIGNED_RGBA8888_REV:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
@@ -114,6 +134,16 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) {
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+ }
break;
case MESA_FORMAT_ARGB8888:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
@@ -480,13 +510,21 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_Z16:
+ case MESA_FORMAT_X8_Z24:
+ case MESA_FORMAT_S8_Z24:
case MESA_FORMAT_Z24_S8:
case MESA_FORMAT_Z32:
+ case MESA_FORMAT_S8:
switch (mesa_format) {
case MESA_FORMAT_Z16:
SETfield(t->SQ_TEX_RESOURCE1, FMT_16,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
break;
+ case MESA_FORMAT_X8_Z24:
+ case MESA_FORMAT_S8_Z24:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_24,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ break;
case MESA_FORMAT_Z24_S8:
SETfield(t->SQ_TEX_RESOURCE1, FMT_24_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
@@ -495,6 +533,12 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_32,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
break;
+ case MESA_FORMAT_S8:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ break;
+ default:
+ break;
};
switch (tObj->DepthMode) {
case GL_LUMINANCE: /* X, X, X, ONE */
@@ -531,6 +575,49 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
return GL_FALSE;
}
break;
+ /* EXT_texture_sRGB */
+ case MESA_FORMAT_SRGBA8:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
+ case MESA_FORMAT_SLA8:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
+ case MESA_FORMAT_SL8: /* X, X, X, ONE */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
default:
/* Not supported format */
return GL_FALSE;
@@ -548,7 +635,7 @@ void r600SetDepthTexMode(struct gl_texture_object *tObj)
t = radeon_tex_obj(tObj);
- r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat);
+ r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat);
}
@@ -562,7 +649,6 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
{
radeonTexObj *t = radeon_tex_obj(texObj);
const struct gl_texture_image *firstImage;
- int firstlevel = t->mt ? t->mt->firstLevel : 0;
GLuint uTexelPitch, row_align;
if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled &&
@@ -570,10 +656,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
t->bo)
return;
- firstImage = t->base.Image[0][firstlevel];
+ firstImage = t->base.Image[0][t->minLod];
if (!t->image_override) {
- if (!r600GetTexFormat(texObj, firstImage->TexFormat->MesaFormat)) {
+ if (!r600GetTexFormat(texObj, firstImage->TexFormat)) {
radeon_error("unexpected texture format in %s\n",
__FUNCTION__);
return;
@@ -605,7 +691,8 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
}
row_align = rmesa->radeon.texture_row_align - 1;
- uTexelPitch = ((firstImage->Width * t->mt->bpp + row_align) & ~row_align) / t->mt->bpp;
+ uTexelPitch = (_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align;
+ uTexelPitch = uTexelPitch / _mesa_get_format_bytes(firstImage->TexFormat);
uTexelPitch = (uTexelPitch + R700_TEXEL_PITCH_ALIGNMENT_MASK)
& ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
@@ -619,10 +706,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
SETfield(t->SQ_TEX_RESOURCE1, firstImage->Height - 1,
TEX_HEIGHT_shift, TEX_HEIGHT_mask);
- if ((t->mt->lastLevel - t->mt->firstLevel) > 0) {
- t->SQ_TEX_RESOURCE3 = t->mt->levels[0].size / 256;
- SETfield(t->SQ_TEX_RESOURCE4, t->mt->firstLevel, BASE_LEVEL_shift, BASE_LEVEL_mask);
- SETfield(t->SQ_TEX_RESOURCE5, t->mt->lastLevel, LAST_LEVEL_shift, LAST_LEVEL_mask);
+ if ((t->maxLod - t->minLod) > 0) {
+ t->SQ_TEX_RESOURCE3 = t->mt->levels[t->minLod].size / 256;
+ SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask);
+ SETfield(t->SQ_TEX_RESOURCE5, t->maxLod - t->minLod, LAST_LEVEL_shift, LAST_LEVEL_mask);
}
}
@@ -721,7 +808,8 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
struct gl_texture_object *tObj =
_mesa_lookup_texture(rmesa->radeon.glCtx, texname);
radeonTexObjPtr t = radeon_tex_obj(tObj);
- uint32_t pitch_val, size;
+ const struct gl_texture_image *firstImage;
+ uint32_t pitch_val, size, row_align;
if (!tObj)
return;
@@ -731,7 +819,9 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
if (!offset)
return;
- size = pitch;//h * w * (depth / 8);
+ firstImage = t->base.Image[0][t->minLod];
+ row_align = rmesa->radeon.texture_row_align - 1;
+ size = ((_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align) * firstImage->Height;
if (t->bo) {
radeon_bo_unref(t->bo);
t->bo = NULL;
@@ -854,20 +944,14 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
radeon_bo_unref(rImage->bo);
rImage->bo = NULL;
}
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = NULL;
- }
- if (rImage->mt) {
- radeon_miptree_unreference(rImage->mt);
- rImage->mt = NULL;
- }
+
+ radeon_miptree_unreference(&t->mt);
+ radeon_miptree_unreference(&rImage->mt);
+
_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
texImage->RowStride = rb->pitch / rb->cpp;
- texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
- internalFormat,
- type, format, 0);
+
rImage->bo = rb->bo;
radeon_bo_ref(rImage->bo);
t->bo = rb->bo;
diff --git a/r600/r700_assembler.c b/r600/r700_assembler.c
index 00eda54..67e0ee7 100644
--- a/r600/r700_assembler.c
+++ b/r600/r700_assembler.c
@@ -336,7 +336,8 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
switch (pAsm->D.dst.opcode)
{
- case SQ_OP2_INST_ADD:
+ case SQ_OP2_INST_ADD:
+ case SQ_OP2_INST_KILLGT:
case SQ_OP2_INST_MUL:
case SQ_OP2_INST_MAX:
case SQ_OP2_INST_MIN:
@@ -354,9 +355,9 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
return 2;
case SQ_OP2_INST_MOV:
+ case SQ_OP2_INST_MOVA_FLOOR:
case SQ_OP2_INST_FRACT:
case SQ_OP2_INST_FLOOR:
- case SQ_OP2_INST_KILLGT:
case SQ_OP2_INST_EXP_IEEE:
case SQ_OP2_INST_LOG_CLAMPED:
case SQ_OP2_INST_LOG_IEEE:
@@ -790,6 +791,133 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
return GL_TRUE;
}
+GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
+ GLuint destination_register,
+ GLenum type,
+ GLint size,
+ GLubyte element,
+ GLuint _signed,
+ GLboolean normalize,
+ VTX_FETCH_METHOD * pFetchMethod)
+{
+ GLuint client_size_inbyte;
+ GLuint data_format;
+ GLuint mega_fetch_count;
+ GLuint is_mega_fetch_flag;
+
+ R700VertexGenericFetch* vfetch_instruction_ptr;
+ R700VertexGenericFetch* assembled_vfetch_instruction_ptr
+ = pAsm->vfetch_instruction_ptr_array[element];
+
+ if (assembled_vfetch_instruction_ptr == NULL)
+ {
+ vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
+ if (vfetch_instruction_ptr == NULL)
+ {
+ return GL_FALSE;
+ }
+ Init_R700VertexGenericFetch(vfetch_instruction_ptr);
+ }
+ else
+ {
+ vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
+ }
+
+ data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
+
+ if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
+ {
+ //TODO : mini fetch
+ }
+ else
+ {
+ mega_fetch_count = MEGA_FETCH_BYTES - 1;
+ is_mega_fetch_flag = 0x1;
+ pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
+ }
+
+ vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
+ vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
+ vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+
+ vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
+ vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
+ vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
+ vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
+
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+
+ vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
+ vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
+ vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
+
+ if(1 == _signed)
+ {
+ vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
+ }
+ else
+ {
+ vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
+ }
+
+ if(GL_TRUE == normalize)
+ {
+ vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
+ }
+ else
+ {
+ vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
+ }
+
+ // Destination register
+ vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
+ vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
+
+ vfetch_instruction_ptr->m_Word2.f.offset = 0;
+ vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
+
+ vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
+
+ if (assembled_vfetch_instruction_ptr == NULL)
+ {
+ if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
+ {
+ return GL_FALSE;
+ }
+
+ if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
+ {
+ return GL_FALSE;
+ }
+ else
+ {
+ pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
+{
+ GLint i;
+ pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
+ pAsm->cf_current_vtx_clause_ptr = NULL;
+
+ for (i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
+ }
+
+ cleanup_vfetch_shaderinst(pAsm->pR700Shader);
+
+ return GL_TRUE;
+}
+
GLuint gethelpr(r700_AssemblerBase* pAsm)
{
GLuint r = pAsm->uHelpReg;
@@ -1180,8 +1308,10 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
case PROGRAM_INPUT:
switch (pILInst->SrcReg[0].Index)
{
+ case FRAG_ATTRIB_WPOS:
case FRAG_ATTRIB_COL0:
case FRAG_ATTRIB_COL1:
+ case FRAG_ATTRIB_FOGC:
case FRAG_ATTRIB_TEX0:
case FRAG_ATTRIB_TEX1:
case FRAG_ATTRIB_TEX2:
@@ -1194,7 +1324,16 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
pAsm->S[0].src.reg =
pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
pAsm->S[0].src.rtype = SRC_REG_INPUT;
- break;
+ break;
+ case FRAG_ATTRIB_FACE:
+ fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
+ break;
+ case FRAG_ATTRIB_PNTC:
+ fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
+ break;
+ case FRAG_ATTRIB_VAR0:
+ fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n");
+ break;
}
break;
}
@@ -1951,9 +2090,9 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
GLuint contiguous_slots_needed;
GLuint uNumSrc = r700GetNumOperands(pAsm);
- GLuint channel_swizzle, j;
- GLuint chan_counter[4] = {0, 0, 0, 0};
- PVSSRC * pSource[3];
+ //GLuint channel_swizzle, j;
+ //GLuint chan_counter[4] = {0, 0, 0, 0};
+ //PVSSRC * pSource[3];
GLboolean bSplitInst = GL_FALSE;
if (1 == pAsm->D.dst.math)
@@ -2053,7 +2192,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
}
//other bits
- alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
+ alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X;
if( (is_single_scalar_operation == GL_TRUE)
|| (GL_TRUE == bSplitInst) )
@@ -2387,6 +2526,35 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
+GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
+{ /* TODO: ar values dont' persist between clauses */
+ if( GL_FALSE == checkop1(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = 0;
+ pAsm->D.dst.writex = 0;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean assemble_BAD(char *opcode_str)
{
radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
@@ -2508,7 +2676,7 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
}
else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
{
- onecomp_PVSSRC(&(pAsm->S[1].src), 3);
+ onecomp_PVSSRC(&(pAsm->S[0].src), 3);
}
if ( GL_FALSE == next_ins(pAsm) )
@@ -2561,6 +2729,133 @@ GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
{
return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
}
+
+GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
+{
+ BITS tmp;
+
+ checkop1(pAsm);
+
+ tmp = gethelpr(pAsm);
+
+ // FLOOR tmp.x, a.x
+ // EX2 dst.x tmp.x
+
+ if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ // FRACT dst.y a.x
+
+ if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ // EX2 dst.z, a.x
+
+ if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ // MOV dst.w 1.0
+
+ if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+}
GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
{
@@ -2617,15 +2912,15 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
{
+ /* TODO: doc says KILL has to be last(end) ALU clause */
+
checkop1(pAsm);
pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
-
- if ( GL_FALSE == assemble_dst(pAsm) )
- {
- return GL_FALSE;
- }
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = 0;
pAsm->D.dst.writex = 0;
pAsm->D.dst.writey = 0;
pAsm->D.dst.writez = 0;
@@ -2638,20 +2933,11 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
noneg_PVSSRC(&(pAsm->S[0].src));
- pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
-
- if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File)
+ if ( GL_FALSE == assemble_src(pAsm, 0, 1) )
{
- pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
- }
- else
- { //PROGRAM_OUTPUT
- pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index];
+ return GL_FALSE;
}
- setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
- noswizzle_PVSSRC(&(pAsm->S[1].src));
-
if ( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
@@ -2751,6 +3037,217 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
+GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
+{
+ BITS tmp1, tmp2, tmp3;
+
+ checkop1(pAsm);
+
+ tmp1 = gethelpr(pAsm);
+ tmp2 = gethelpr(pAsm);
+ tmp3 = gethelpr(pAsm);
+
+ // FIXME: The hardware can do fabs() directly on input
+ // elements, but the compiler doesn't have the
+ // capability to use that.
+
+ // MAX tmp1.x, a.x, -a.x (fabs(a.x))
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp1;
+ pAsm->D.dst.writex = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->S[1].bits = pAsm->S[0].bits;
+ flipneg_PVSSRC(&(pAsm->S[1].src));
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // Entire algo:
+ //
+ // LG2 tmp2.x, tmp1.x
+ // FLOOR tmp3.x, tmp2.x
+ // MOV dst.x, tmp3.x
+ // ADD tmp3.x, tmp2.x, -tmp3.x
+ // EX2 dst.y, tmp3.x
+ // MOV dst.z, tmp2.x
+ // MOV dst.w, 1.0
+
+ // LG2 tmp2.x, tmp1.x
+ // FLOOR tmp3.x, tmp2.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
+ pAsm->D.dst.math = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp2;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp1;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp3;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp2;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // MOV dst.x, tmp3.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp3;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // ADD tmp3.x, tmp2.x, -tmp3.x
+ // EX2 dst.y, tmp3.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp3;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp2;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[1].src.reg = tmp3;
+
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+ neg_PVSSRC(&(pAsm->S[1].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp3;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // MOV dst.z, tmp2.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp2;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // MOV dst.w 1.0
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp1;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
{
int tmp, ii;
@@ -2908,6 +3405,7 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
pAsm->S[0].src.rtype = srcType;
pAsm->S[0].src.reg = srcReg;
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
pAsm->S[1].src.reg = tmp;
setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
@@ -3417,22 +3915,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
need_barrier = GL_TRUE;
}
- switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
- {
- case OPCODE_TEX:
- break;
- case OPCODE_TXB:
- radeon_error("do not support TXB yet\n");
- return GL_FALSE;
- break;
- case OPCODE_TXP:
- break;
- default:
- radeon_error("Internal error: bad texture op (not TEX)\n");
- return GL_FALSE;
- break;
- }
-
if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
{
GLuint tmp = gethelpr(pAsm);
@@ -3611,7 +4093,15 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
}
- pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+ if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB)
+ {
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+ }
+
pAsm->is_tex = GL_TRUE;
if ( GL_TRUE == need_barrier )
{
@@ -3809,8 +4299,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
break;
case OPCODE_ARL:
- radeon_error("Not yet implemented instruction OPCODE_ARL \n");
- //if ( GL_FALSE == assemble_BAD("ARL") )
+ if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
return GL_FALSE;
break;
case OPCODE_ARR:
@@ -3845,10 +4334,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
case OPCODE_EXP:
- radeon_error("Not yet implemented instruction OPCODE_EXP \n");
- //if ( GL_FALSE == assemble_BAD("EXP") )
+ if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
return GL_FALSE;
- break; // approx of EX2
+ break;
case OPCODE_FLR:
if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
@@ -3881,10 +4369,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
case OPCODE_LOG:
- radeon_error("Not yet implemented instruction OPCODE_LOG \n");
- //if ( GL_FALSE == assemble_BAD("LOG") )
+ if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
return GL_FALSE;
- break; // approx of LG2
+ break;
case OPCODE_MAD:
if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
@@ -4155,6 +4642,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
GLbitfield OutputsWritten)
{
unsigned int unBit;
+ GLuint export_count = 0;
if(pR700AsmCode->depth_export_register_number >= 0)
{
@@ -4176,6 +4664,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
{
return GL_FALSE;
}
+ export_count++;
}
unBit = 1 << FRAG_RESULT_DEPTH;
if(OutputsWritten & unBit)
@@ -4189,8 +4678,15 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
{
return GL_FALSE;
}
+ export_count++;
}
-
+ /* Need to export something, otherwise we'll hang
+ * results are undefined anyway */
+ if(export_count == 0)
+ {
+ Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
+ }
+
if(pR700AsmCode->cf_last_export_ptr != NULL)
{
pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
diff --git a/r600/r700_assembler.h b/r600/r700_assembler.h
index 73bb8ba..c66db50 100644
--- a/r600/r700_assembler.h
+++ b/r600/r700_assembler.h
@@ -415,6 +415,15 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
GLuint number_of_elements,
GLenum dataElementType,
VTX_FETCH_METHOD* pFetchMethod);
+GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
+ GLuint destination_register,
+ GLenum type,
+ GLint size,
+ GLubyte element,
+ GLuint _signed,
+ GLboolean normalize,
+ VTX_FETCH_METHOD * pFetchMethod);
+GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm);
GLuint gethelpr(r700_AssemblerBase* pAsm);
void resethelpr(r700_AssemblerBase* pAsm);
void checkop_init(r700_AssemblerBase* pAsm);
@@ -461,18 +470,21 @@ GLboolean next_ins(r700_AssemblerBase *pAsm);
GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
+GLboolean assemble_ARL(r700_AssemblerBase *pAsm);
GLboolean assemble_BAD(char *opcode_str);
GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
GLboolean assemble_COS(r700_AssemblerBase *pAsm);
GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
GLboolean assemble_DST(r700_AssemblerBase *pAsm);
GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
+GLboolean assemble_EXP(r700_AssemblerBase *pAsm);
GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
+GLboolean assemble_LOG(r700_AssemblerBase *pAsm);
GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
GLboolean assemble_MAX(r700_AssemblerBase *pAsm);
diff --git a/r600/r700_chip.c b/r600/r700_chip.c
index 06d7e9c..02c56b9 100644
--- a/r600/r700_chip.c
+++ b/r600/r700_chip.c
@@ -54,11 +54,15 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
if (ctx->Texture.Unit[i]._ReallyEnabled) {
radeonTexObj *t = r700->textures[i];
+ uint32_t offset;
if (t) {
- if (!t->image_override)
+ if (!t->image_override) {
bo = t->mt->bo;
- else
+ offset = get_base_teximage_offset(t);
+ } else {
bo = t->bo;
+ offset = 0;
+ }
if (bo) {
r700SyncSurf(context, bo,
@@ -77,7 +81,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6);
R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2,
bo,
- 0,
+ offset,
RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3,
bo,
@@ -141,17 +145,15 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom
}
}
+extern int getTypeSize(GLenum type);
static void r700SetupVTXConstants(GLcontext * ctx,
- unsigned int nStreamID,
void * pAos,
- unsigned int size, /* number of elements in vector */
- unsigned int stride,
- unsigned int count) /* number of vectors in stream */
+ StreamDesc * pStreamDesc)
{
context_t *context = R700_CONTEXT(ctx);
struct radeon_aos * paos = (struct radeon_aos *)pAos;
+ unsigned int nVBsize;
BATCH_LOCALS(&context->radeon);
- radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
unsigned int uSQ_VTX_CONSTANT_WORD0_0;
unsigned int uSQ_VTX_CONSTANT_WORD1_0;
@@ -171,18 +173,40 @@ static void r700SetupVTXConstants(GLcontext * ctx,
else
r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
+ if(0 == pStreamDesc->stride)
+ {
+ nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type);
+ }
+ else
+ {
+ nVBsize = paos->count * pStreamDesc->stride;
+ }
+
uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
- uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1;
+ uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1;
SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */
- SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask);
- SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL),
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL),
SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift,
SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */
- SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED,
- SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
- SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit);
+
+ if(GL_TRUE == pStreamDesc->normalize)
+ {
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+ }
+ //else
+ //{
+ // SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT,
+ // SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+ //}
+
+ if(1 == pStreamDesc->_signed)
+ {
+ SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit);
+ }
SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask);
SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER,
@@ -191,7 +215,7 @@ static void r700SetupVTXConstants(GLcontext * ctx,
BEGIN_BATCH_NO_AUTOSTATE(9 + 2);
R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
- R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+ R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0);
R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0);
R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0);
@@ -208,31 +232,6 @@ static void r700SetupVTXConstants(GLcontext * ctx,
}
-void r700SetupStreams(GLcontext *ctx)
-{
- context_t *context = R700_CONTEXT(ctx);
- struct r700_vertex_program *vp = context->selected_vp;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
- unsigned int i, j = 0;
- radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
-
- R600_STATECHANGE(context, vtx);
-
- for(i=0; i<VERT_ATTRIB_MAX; i++) {
- if(vp->mesa_program->Base.InputsRead & (1 << i)) {
- rcommon_emit_vector(ctx,
- &context->radeon.tcl.aos[j],
- vb->AttribPtr[i]->data,
- vb->AttribPtr[i]->size,
- vb->AttribPtr[i]->stride,
- vb->Count);
- j++;
- }
- }
- context->radeon.tcl.aos_count = j;
-}
-
static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
{
context_t *context = R700_CONTEXT(ctx);
@@ -256,15 +255,12 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
COMMIT_BATCH();
for(i=0; i<VERT_ATTRIB_MAX; i++) {
- if(vp->mesa_program->Base.InputsRead & (1 << i)) {
- /* currently aos are packed */
- r700SetupVTXConstants(ctx,
- i,
- (void*)(&context->radeon.tcl.aos[j]),
- (unsigned int)context->radeon.tcl.aos[j].components,
- (unsigned int)context->radeon.tcl.aos[j].stride * 4,
- (unsigned int)context->radeon.tcl.aos[j].count);
- j++;
+ if(vp->mesa_program->Base.InputsRead & (1 << i))
+ {
+ r700SetupVTXConstants(ctx,
+ (void*)(&context->radeon.tcl.aos[j]),
+ &(context->stream_desc[j]));
+ j++;
}
}
}
@@ -366,7 +362,6 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a
rrb = radeon_get_depthbuffer(&context->radeon);
if (!rrb || !rrb->bo) {
- fprintf(stderr, "no rrb\n");
return;
}
@@ -408,7 +403,6 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
rrb = radeon_get_colorbuffer(&context->radeon);
if (!rrb || !rrb->bo) {
- fprintf(stderr, "no rrb\n");
return;
}
@@ -794,8 +788,7 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom)
BATCH_LOCALS(&context->radeon);
radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
- BEGIN_BATCH_NO_AUTOSTATE(23);
- R600_OUT_BATCH_REGVAL(DB_HTILE_DATA_BASE, r700->DB_HTILE_DATA_BASE.u32All);
+ BEGIN_BATCH_NO_AUTOSTATE(17);
R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2);
R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All);
@@ -808,7 +801,6 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom)
R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All);
R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All);
- R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All);
R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All);
END_BATCH();
@@ -1108,6 +1100,32 @@ static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom)
COMMIT_BATCH();
}
+static void r700SendQueryBegin(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ struct radeon_query_object *query = radeon->query.current;
+ BATCH_LOCALS(radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ /* clear the buffer */
+ radeon_bo_map(query->bo, GL_FALSE);
+ memset(query->bo->ptr, 0, 4 * 2 * sizeof(uint64_t)); /* 4 DBs, 2 qwords each */
+ radeon_bo_unmap(query->bo);
+
+ radeon_cs_space_check_with_bo(radeon->cmdbuf.cs,
+ query->bo,
+ 0, RADEON_GEM_DOMAIN_GTT);
+
+ BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+ R600_OUT_BATCH(ZPASS_DONE);
+ R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */
+ R600_OUT_BATCH(0x00000000);
+ R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+ query->emitted_begin = GL_TRUE;
+}
+
static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
{
return atom->cmd_size;
@@ -1136,7 +1154,11 @@ static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom)
count += 3;
if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
- for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) {
+ /* targets are enabled in r700SetRenderTarget but state
+ size is calculated before that. Until MRT's are done
+ hardcode target0 as enabled. */
+ count += 3;
+ for (ui = 1; ui < R700_MAX_RENDER_TARGETS; ui++) {
if (r700->render_target[ui].enabled)
count += 3;
}
@@ -1216,6 +1238,20 @@ static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom)
return count;
}
+static int check_queryobj(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ struct radeon_query_object *query = radeon->query.current;
+ int count;
+
+ if (!query || query->emitted_begin)
+ count = 0;
+ else
+ count = atom->cmd_size;
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+ return count;
+}
+
#define ALLOC_STATE( ATOM, CHK, SZ, EMIT ) \
do { \
context->atoms.ATOM.cmd_size = (SZ); \
@@ -1229,6 +1265,19 @@ do { \
insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \
} while (0)
+static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+ radeon->query.queryobj.cmd_size = (SZ);
+ radeon->query.queryobj.cmd = NULL;
+ radeon->query.queryobj.name = "queryobj";
+ radeon->query.queryobj.idx = 0;
+ radeon->query.queryobj.check = check_queryobj;
+ radeon->query.queryobj.dirty = GL_FALSE;
+ radeon->query.queryobj.emit = r700SendQueryBegin;
+ radeon->hw.max_state_size += (SZ);
+ insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}
+
void r600InitAtoms(context_t *context)
{
radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context);
@@ -1239,7 +1288,7 @@ void r600InitAtoms(context_t *context)
context->radeon.hw.atomlist.name = "atom-list";
ALLOC_STATE(sq, always, 34, r700SendSQConfig);
- ALLOC_STATE(db, always, 23, r700SendDBState);
+ ALLOC_STATE(db, always, 17, r700SendDBState);
ALLOC_STATE(stencil, always, 4, r700SendStencilState);
ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState);
ALLOC_STATE(sc, always, 15, r700SendSCState);
@@ -1252,9 +1301,9 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(poly, always, 10, r700SendPolyState);
ALLOC_STATE(cb, cb, 18, r700SendCBState);
ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
+ ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
- ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
ALLOC_STATE(sx, always, 9, r700SendSXState);
ALLOC_STATE(vgt, always, 41, r700SendVGTState);
ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState);
@@ -1268,6 +1317,7 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
+ r600_init_query_stateobj(&context->radeon, 6 * 2);
context->radeon.hw.is_dirty = GL_TRUE;
context->radeon.hw.all_dirty = GL_TRUE;
diff --git a/r600/r700_fragprog.c b/r600/r700_fragprog.c
index 78ce3ae..ccafd43 100644
--- a/r600/r700_fragprog.c
+++ b/r600/r700_fragprog.c
@@ -135,15 +135,19 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
{
GLuint i, j;
GLint * puiTEMPwrites;
+ GLint * puiTEMPreads;
struct prog_instruction * pILInst;
InstDeps *pInstDeps;
struct prog_instruction * texcoord_DepInst;
GLint nDepInstID;
puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+ puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+
for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
{
puiTEMPwrites[i] = -1;
+ puiTEMPreads[i] = -1;
}
pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
@@ -167,6 +171,11 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
{
//Set dep.
pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
+ //Set first read
+ if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 )
+ {
+ puiTEMPreads[pILInst->SrcReg[j].Index] = i;
+ }
}
else
{
@@ -177,8 +186,6 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
fp->r700AsmCode.pInstDeps = pInstDeps;
- FREE(puiTEMPwrites);
-
//Find dep for tex inst
for(i=0; i<mesa_fp->Base.NumInstructions; i++)
{
@@ -203,9 +210,25 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
{ //... other deps?
}
}
+ // make sure that we dont overwrite src used earlier
+ nDepInstID = puiTEMPreads[pILInst->DstReg.Index];
+ if(nDepInstID < i)
+ {
+ pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index];
+ texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
+ if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
+ {
+ pInstDeps[nDepInstID].nDstDep = i;
+ }
+
+ }
+
}
}
+ FREE(puiTEMPwrites);
+ FREE(puiTEMPreads);
+
return GL_TRUE;
}
@@ -251,7 +274,15 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
number_of_colors_exported--;
}
- fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
+ /* illegal to set this to 0 */
+ if(number_of_colors_exported || z_enabled)
+ {
+ fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
+ }
+ else
+ {
+ fp->r700Shader.exportMode = (1 << 1);
+ }
fp->translated = GL_TRUE;
@@ -341,6 +372,11 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
}
+ else
+ {
+ CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
+ CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
+ }
ui = (unNumOfReg < ui) ? ui : unNumOfReg;
@@ -357,26 +393,6 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
EXPORT_MODE_shift, EXPORT_MODE_mask);
- R600_STATECHANGE(context, db);
-
- if(fp->r700Shader.killIsUsed)
- {
- SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
- }
- else
- {
- CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
- }
-
- if(fp->r700Shader.depthIsExported)
- {
- SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
- }
- else
- {
- CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
- }
-
// emit ps input map
unBit = 1 << FRAG_ATTRIB_WPOS;
if(mesa_fp->Base.InputsRead & unBit)
@@ -443,9 +459,12 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
}
}
- R600_STATECHANGE(context, cb);
exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
- r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
+ if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1))
+ {
+ R600_STATECHANGE(context, cb);
+ r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
+ }
/* sent out shader constants. */
paramList = fp->mesa_program.Base.Parameters;
diff --git a/r600/r700_oglprog.c b/r600/r700_oglprog.c
index 5290ef3..0d476fc 100644
--- a/r600/r700_oglprog.c
+++ b/r600/r700_oglprog.c
@@ -40,6 +40,24 @@
#include "r700_vertprog.h"
+static void freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *cache)
+{
+ struct r700_vertex_program *tmp, *vp = cache->progs;
+
+ while (vp) {
+ tmp = vp->next;
+ /* Release DMA region */
+ r600DeleteShader(ctx, vp->shaderbo);
+ /* Clean up */
+ Clean_Up_Assembler(&(vp->r700AsmCode));
+ Clean_Up_Shader(&(vp->r700Shader));
+
+ _mesa_reference_vertprog(ctx, &vp->mesa_program, NULL);
+ _mesa_free(vp);
+ vp = tmp;
+ }
+}
+
static struct gl_program *r700NewProgram(GLcontext * ctx,
GLenum target,
GLuint id)
@@ -84,8 +102,7 @@ static struct gl_program *r700NewProgram(GLcontext * ctx,
static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
{
- struct r700_vertex_program_cont * vpc;
- struct r700_vertex_program *vp, *tmp;
+ struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog;
struct r700_fragment_program * fp;
radeon_print(RADEON_SHADER, RADEON_VERBOSE,
@@ -95,20 +112,7 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
{
case GL_VERTEX_STATE_PROGRAM_NV:
case GL_VERTEX_PROGRAM_ARB:
- vpc = (struct r700_vertex_program_cont*)prog;
- vp = vpc->progs;
- while (vp) {
- tmp = vp->next;
- /* Release DMA region */
-
- r600DeleteShader(ctx, vp->shaderbo);
-
- /* Clean up */
- Clean_Up_Assembler(&(vp->r700AsmCode));
- Clean_Up_Shader(&(vp->r700Shader));
- _mesa_free(vp);
- vp = tmp;
- }
+ freeVertProgCache(ctx, vpc);
break;
case GL_FRAGMENT_PROGRAM_NV:
case GL_FRAGMENT_PROGRAM_ARB:
@@ -131,7 +135,24 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
static void
r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
-
+ struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog;
+ struct r700_fragment_program * fp = (struct r700_fragment_program*)prog;
+
+ switch (target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ freeVertProgCache(ctx, vpc);
+ vpc->progs = NULL;
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ r600DeleteShader(ctx, fp->shaderbo);
+ Clean_Up_Assembler(&(fp->r700AsmCode));
+ Clean_Up_Shader(&(fp->r700Shader));
+ fp->translated = GL_FALSE;
+ fp->loaded = GL_FALSE;
+ fp->shaderbo = NULL;
+ break;
+ }
+
}
static GLboolean r700IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
diff --git a/r600/r700_render.c b/r600/r700_render.c
index b1c3648..47f89c9 100644
--- a/r600/r700_render.c
+++ b/r600/r700_render.c
@@ -43,6 +43,7 @@
#include "tnl/t_context.h"
#include "tnl/t_vertex.h"
#include "tnl/t_pipeline.h"
+#include "vbo/vbo_context.h"
#include "r600_context.h"
#include "r600_cmdbuf.h"
@@ -53,13 +54,12 @@
#include "r700_fragprog.h"
#include "r700_state.h"
+#include "radeon_buffer_objects.h"
#include "radeon_common_context.h"
void r700WaitForIdle(context_t *context);
void r700WaitForIdleClean(context_t *context);
-GLboolean r700SendTextureState(context_t *context);
static unsigned int r700PrimitiveType(int prim);
-void r600UpdateTextureState(GLcontext * ctx);
GLboolean r700SyncSurf(context_t *context,
struct radeon_bo *pbo,
uint32_t read_domain,
@@ -249,113 +249,635 @@ static int r700NumVerts(int num_verts, int prim)
static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
{
- context_t *context = R700_CONTEXT(ctx);
- BATCH_LOCALS(&context->radeon);
- int type, i, total_emit;
- int num_indices;
- uint32_t vgt_draw_initiator = 0;
- uint32_t vgt_index_type = 0;
- uint32_t vgt_primitive_type = 0;
- uint32_t vgt_num_indices = 0;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
-
- type = r700PrimitiveType(prim);
- num_indices = r700NumVerts(end - start, prim);
-
- radeon_print(RADEON_RENDER, RADEON_TRACE,
- "%s type %x num_indices %d\n",
- __func__, type, num_indices);
-
- if (type < 0 || num_indices <= 0)
- return;
+ context_t *context = R700_CONTEXT(ctx);
+ BATCH_LOCALS(&context->radeon);
+ int type, total_emit;
+ int num_indices;
+ uint32_t vgt_draw_initiator = 0;
+ uint32_t vgt_index_type = 0;
+ uint32_t vgt_primitive_type = 0;
+ uint32_t vgt_num_indices = 0;
+
+ type = r700PrimitiveType(prim);
+ num_indices = r700NumVerts(end - start, prim);
+
+ radeon_print(RADEON_RENDER, RADEON_TRACE,
+ "%s type %x num_indices %d\n",
+ __func__, type, num_indices);
+
+ if (type < 0 || num_indices <= 0)
+ return;
+
+ SETfield(vgt_primitive_type, type,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+
+ SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+
+ if(GL_TRUE != context->ind_buf.is_32bit)
+ {
+ SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ }
+
+ vgt_num_indices = num_indices;
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+ total_emit = 3 /* VGT_PRIMITIVE_TYPE */
+ + 2 /* VGT_INDEX_TYPE */
+ + 2 /* NUM_INSTANCES */
+ + 5 + 2; /* DRAW_INDEX */
+
+ BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+ // prim
+ R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+ R600_OUT_BATCH(vgt_primitive_type);
+ // index type
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ R600_OUT_BATCH(vgt_index_type);
+ // num instances
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ R600_OUT_BATCH(1);
+ // draw packet
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
+ R600_OUT_BATCH(context->ind_buf.bo_offset);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
+ context->ind_buf.bo,
+ context->ind_buf.bo_offset,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, int prim)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ BATCH_LOCALS(&context->radeon);
+ int type, i;
+ uint32_t num_indices, total_emit = 0;
+ uint32_t vgt_draw_initiator = 0;
+ uint32_t vgt_index_type = 0;
+ uint32_t vgt_primitive_type = 0;
+ uint32_t vgt_num_indices = 0;
+
+ type = r700PrimitiveType(prim);
+ num_indices = r700NumVerts(end - start, prim);
+
+ radeon_print(RADEON_RENDER, RADEON_TRACE,
+ "%s type %x num_indices %d\n",
+ __func__, type, num_indices);
+
+ if (type < 0 || num_indices <= 0)
+ return;
- total_emit = 3 /* VGT_PRIMITIVE_TYPE */
- + 2 /* VGT_INDEX_TYPE */
- + 2 /* NUM_INSTANCES */
- + num_indices + 3; /* DRAW_INDEX_IMMD */
-
- BEGIN_BATCH_NO_AUTOSTATE(total_emit);
- // prim
- SETfield(vgt_primitive_type, type,
- VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
- R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
- R600_OUT_BATCH(vgt_primitive_type);
-
- // index type
- SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
- R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
- R600_OUT_BATCH(vgt_index_type);
-
- // num instances
- R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
- R600_OUT_BATCH(1);
-
- // draw packet
- vgt_num_indices = num_indices;
- SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
- SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
-
- R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
+ SETfield(vgt_primitive_type, type,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+
+ if (num_indices > 0xffff)
+ {
+ SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ }
+ else
+ {
+ SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ }
+
+ vgt_num_indices = num_indices;
+ SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+ if (start == 0)
+ {
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ }
+ else
+ {
+ if (num_indices > 0xffff)
+ {
+ total_emit += num_indices;
+ }
+ else
+ {
+ total_emit += (num_indices + 1) / 2;
+ }
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ }
+
+ total_emit += 3 /* VGT_PRIMITIVE_TYPE */
+ + 2 /* VGT_INDEX_TYPE */
+ + 2 /* NUM_INSTANCES */
+ + 3; /* DRAW */
+
+ BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+ // prim
+ R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+ R600_OUT_BATCH(vgt_primitive_type);
+ // index type
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ R600_OUT_BATCH(vgt_index_type);
+ // num instances
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ R600_OUT_BATCH(1);
+ // draw packet
+ if(start == 0)
+ {
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
R600_OUT_BATCH(vgt_num_indices);
R600_OUT_BATCH(vgt_draw_initiator);
-
- for (i = start; i < (start + num_indices); i++) {
- if(vb->Elts)
- R600_OUT_BATCH(vb->Elts[i]);
+ }
+ else
+ {
+ if (num_indices > 0xffff)
+ {
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ for (i = start; i < (start + num_indices); i++)
+ {
+ R600_OUT_BATCH(i);
+ }
+ }
+ else
+ {
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1)));
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ for (i = start; i < (start + num_indices); i += 2)
+ {
+ if ((i + 1) == (start + num_indices))
+ {
+ R600_OUT_BATCH(i);
+ }
else
- R600_OUT_BATCH(i);
- }
- END_BATCH();
- COMMIT_BATCH();
+ {
+ R600_OUT_BATCH(((i + 1) << 16) | (i));
+ }
+ }
+ }
+ }
+ END_BATCH();
+ COMMIT_BATCH();
}
/* start 3d, idle, cb/db flush */
#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
-static GLuint r700PredictRenderSize(GLcontext* ctx)
+static GLuint r700PredictRenderSize(GLcontext* ctx,
+ const struct _mesa_prim *prim,
+ const struct _mesa_index_buffer *ib,
+ GLuint nr_prims)
{
context_t *context = R700_CONTEXT(ctx);
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct r700_vertex_program *vp = context->selected_vp;
- struct vertex_buffer *vb = &tnl->vb;
GLboolean flushed;
GLuint dwords, i;
GLuint state_size;
- /* pre calculate aos count so state prediction works */
- context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead);
dwords = PRE_EMIT_STATE_BUFSZ;
- for (i = 0; i < vb->PrimitiveCount; i++)
- dwords += vb->Primitive[i].count + 10;
+ if (ib)
+ dwords += nr_prims * 14;
+ else {
+ for (i = 0; i < nr_prims; ++i)
+ {
+ if (prim[i].start == 0)
+ dwords += 10;
+ else if (prim[i].count > 0xffff)
+ dwords += prim[i].count + 10;
+ else
+ dwords += ((prim[i].count + 1) / 2) + 10;
+ }
+ }
+
state_size = radeonCountStateEmitSize(&context->radeon);
flushed = rcommonEnsureCmdBufSpace(&context->radeon,
- dwords + state_size, __FUNCTION__);
-
+ dwords + state_size,
+ __FUNCTION__);
if (flushed)
- dwords += radeonCountStateEmitSize(&context->radeon);
+ dwords += radeonCountStateEmitSize(&context->radeon);
else
- dwords += state_size;
+ dwords += state_size;
- radeon_print(RADEON_RENDER, RADEON_VERBOSE,
- "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
return dwords;
+
+}
+
+#define CONVERT( TYPE, MACRO ) do { \
+ GLuint i, j, sz; \
+ sz = input->Size; \
+ if (input->Normalized) { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = MACRO(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } else { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = (GLfloat)(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } \
+} while (0)
+
+/**
+ * Convert attribute data type to float
+ * If the attribute uses named buffer object replace the bo with newly allocated bo
+ */
+static void r700ConvertAttrib(GLcontext *ctx, int count,
+ const struct gl_client_array *input,
+ struct StreamDesc *attr)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ const GLvoid *src_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+ GLfloat *dst_ptr;
+ GLuint stride;
+
+ stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
+
+ /* Convert value for first element only */
+ if (input->StrideB == 0)
+ {
+ count = 1;
+ }
+
+ if (input->BufferObj->Name)
+ {
+ if (!input->BufferObj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ }
+ else
+ {
+ src_ptr = input->Ptr;
+ }
+
+ radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset,
+ sizeof(GLfloat) * input->Size * count, 32);
+ dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+
+ assert(src_ptr != NULL);
+
+ switch (input->Type)
+ {
+ case GL_DOUBLE:
+ CONVERT(GLdouble, (GLfloat));
+ break;
+ case GL_UNSIGNED_INT:
+ CONVERT(GLuint, UINT_TO_FLOAT);
+ break;
+ case GL_INT:
+ CONVERT(GLint, INT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_SHORT:
+ CONVERT(GLushort, USHORT_TO_FLOAT);
+ break;
+ case GL_SHORT:
+ CONVERT(GLshort, SHORT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_BYTE:
+ assert(input->Format != GL_BGRA);
+ CONVERT(GLubyte, UBYTE_TO_FLOAT);
+ break;
+ case GL_BYTE:
+ CONVERT(GLbyte, BYTE_TO_FLOAT);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+}
+
+static void r700AlignDataToDword(GLcontext *ctx,
+ const struct gl_client_array *input,
+ int count,
+ struct StreamDesc *attr)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ const int dst_stride = (input->StrideB + 3) & ~3;
+ const int size = getTypeSize(input->Type) * input->Size * count;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
+
+ if (!input->BufferObj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ {
+ GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+ int i;
+
+ for (i = 0; i < count; ++i)
+ {
+ _mesa_memcpy(dst_ptr, src_ptr, input->StrideB);
+ src_ptr += input->StrideB;
+ dst_ptr += dst_stride;
+ }
+ }
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+
+ attr->stride = dst_stride;
+}
+
+static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ GLuint stride;
+ int ret;
+ int i, index;
+
+ R600_STATECHANGE(context, vtx);
+
+ for(index = 0; index < context->nNumActiveAos; index++)
+ {
+ struct radeon_aos *aos = &context->radeon.tcl.aos[index];
+ i = context->stream_desc[index].element;
+
+ stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
+
+ if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+ getTypeSize(input[i]->Type) != 4 ||
+#endif
+ stride < 4)
+ {
+ r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
+ }
+ else
+ {
+ if (input[i]->BufferObj->Name)
+ {
+ if (stride % 4 != 0)
+ {
+ assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
+ r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]);
+ context->stream_desc[index].is_named_bo = GL_FALSE;
+ }
+ else
+ {
+ context->stream_desc[index].stride = input[i]->StrideB;
+ context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
+ context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+ context->stream_desc[index].is_named_bo = GL_TRUE;
+ }
+ }
+ else
+ {
+ int size;
+ int local_count = count;
+ uint32_t *dst;
+
+ if (input[i]->StrideB == 0)
+ {
+ size = getTypeSize(input[i]->Type) * input[i]->Size;
+ local_count = 1;
+ }
+ else
+ {
+ size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
+ }
+
+ radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo,
+ &context->stream_desc[index].bo_offset, size, 32);
+ assert(context->stream_desc[index].bo->ptr != NULL);
+ dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr,
+ context->stream_desc[index].bo_offset);
+
+ switch (context->stream_desc[index].dwords)
+ {
+ case 1:
+ radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ case 2:
+ radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ case 3:
+ radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ case 4:
+ radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ }
+
+ aos->count = context->stream_desc[index].stride == 0 ? 1 : count;
+ aos->stride = context->stream_desc[index].stride / sizeof(float);
+ aos->components = context->stream_desc[index].dwords;
+ aos->bo = context->stream_desc[index].bo;
+ aos->offset = context->stream_desc[index].bo_offset;
+
+ if(context->stream_desc[index].is_named_bo)
+ {
+ radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs,
+ context->stream_desc[index].bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+ }
+
+ ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs,
+ first_elem(&context->radeon.dma.reserved)->bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+}
+
+static void r700FreeData(GLcontext *ctx)
+{
+ /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
+ * to prevent double unref in radeonReleaseArrays
+ * called during context destroy
+ */
+ context_t *context = R700_CONTEXT(ctx);
+
+ int i;
+
+ for (i = 0; i < context->nNumActiveAos; i++)
+ {
+ if (!context->stream_desc[i].is_named_bo)
+ {
+ radeon_bo_unref(context->stream_desc[i].bo);
+ }
+ context->radeon.tcl.aos[i].bo = NULL;
+ }
+
+ if (context->ind_buf.bo != NULL)
+ {
+ radeon_bo_unref(context->ind_buf.bo);
+ }
+}
+
+static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ GLvoid *src_ptr;
+ GLuint *out;
+ int i;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ mapped_named_bo = GL_TRUE;
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ }
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ if (mesa_ind_buf->type == GL_UNSIGNED_BYTE)
+ {
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+ GLubyte *in = (GLubyte *)src_ptr;
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+
+ assert(context->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
+ {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count)
+ {
+ *out++ = in[i];
+ }
+
+#if MESA_BIG_ENDIAN
+ }
+ else
+ { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
+ GLushort *in = (GLushort *)src_ptr;
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+
+ assert(context->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
+ {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count)
+ {
+ *out++ = in[i];
+ }
+#endif
+ }
+
+ context->ind_buf.is_32bit = GL_FALSE;
+ context->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+}
+
+static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+ context_t *context = R700_CONTEXT(ctx);
+
+ if (!mesa_ind_buf) {
+ context->ind_buf.bo = NULL;
+ return;
+ }
+
+#if MESA_BIG_ENDIAN
+ if (mesa_ind_buf->type == GL_UNSIGNED_INT)
+ {
+#else
+ if (mesa_ind_buf->type != GL_UNSIGNED_BYTE)
+ {
+#endif
+ const GLvoid *src_ptr;
+ GLvoid *dst_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+ assert(context->ind_buf.bo->ptr != NULL);
+ dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ _mesa_memcpy(dst_ptr, src_ptr, size);
+
+ context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
+ context->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+ }
+ else
+ {
+ r700FixupIndexBuffer(ctx, mesa_ind_buf);
+ }
}
-static GLboolean r700RunRender(GLcontext * ctx,
- struct tnl_pipeline_stage *stage)
+static GLboolean r700TryDrawPrims(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint min_index,
+ GLuint max_index )
{
context_t *context = R700_CONTEXT(ctx);
radeonContextPtr radeon = &context->radeon;
- unsigned int i, id = 0;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
+ GLuint i, id = 0;
struct radeon_renderbuffer *rrb;
- radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n",
- __func__, context->radeon.cmdbuf.cs->cdw);
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ _tnl_UpdateFixedFunctionProgram(ctx);
+ r700SetVertexFormat(ctx, arrays, max_index + 1);
+ /* shaders need to be updated before buffers are validated */
+ r700UpdateShaders(ctx);
+ if (!r600ValidateBuffers(ctx))
+ return GL_FALSE;
/* always emit CB base to prevent
* lock ups on some chips.
@@ -367,21 +889,29 @@ static GLboolean r700RunRender(GLcontext * ctx,
r700SetScissor(context);
r700SetupVertexProgram(ctx);
r700SetupFragmentProgram(ctx);
- r600UpdateTextureState(ctx);
+ r700UpdateShaderStates(ctx);
- GLuint emit_end = r700PredictRenderSize(ctx)
- + context->radeon.cmdbuf.cs->cdw;
- r700SetupStreams(ctx);
+ GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims)
+ + context->radeon.cmdbuf.cs->cdw;
+
+ r700SetupIndexBuffer(ctx, ib);
+ r700SetupStreams(ctx, arrays, max_index + 1);
radeonEmitState(radeon);
radeon_debug_add_indent();
- /* richard test code */
- for (i = 0; i < vb->PrimitiveCount; i++) {
- GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
- GLuint start = vb->Primitive[i].start;
- GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
- r700RunRenderPrimitive(ctx, start, end, prim);
+ for (i = 0; i < nr_prims; ++i)
+ {
+ if (context->ind_buf.bo)
+ r700RunRenderPrimitive(ctx,
+ prim[i].start,
+ prim[i].start + prim[i].count,
+ prim[i].mode);
+ else
+ r700RunRenderPrimitiveImmediate(ctx,
+ prim[i].start,
+ prim[i].start + prim[i].count,
+ prim[i].mode);
}
radeon_debug_remove_indent();
@@ -398,83 +928,54 @@ static GLboolean r700RunRender(GLcontext * ctx,
r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
- radeonReleaseArrays(ctx, ~0);
-
- radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n",
- __func__, context->radeon.cmdbuf.cs->cdw);
+ r700FreeData(ctx);
- if ( emit_end < context->radeon.cmdbuf.cs->cdw )
- WARN_ONCE("Rendering was %d commands larger than predicted size."
- " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
-
- return GL_FALSE;
-}
+ if (emit_end < context->radeon.cmdbuf.cs->cdw)
+ {
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
+ }
-static GLboolean r700RunNonTCLRender(GLcontext * ctx,
- struct tnl_pipeline_stage *stage) /* -------------------- */
-{
- GLboolean bRet = GL_TRUE;
-
- return bRet;
+ return GL_TRUE;
}
-static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
- struct tnl_pipeline_stage *stage)
+static void r700DrawPrims(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index)
{
- GLboolean bRet = GL_FALSE;
+ GLboolean retval = GL_FALSE;
- /* TODO : sw fallback */
-
- /* Need shader bo's setup before bo check */
- r700UpdateShaders(ctx);
- /**
+ /* This check should get folded into just the places that
+ * min/max index are really needed.
+ */
+ if (!index_bounds_valid) {
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+ }
- * Ensure all enabled and complete textures are uploaded along with any buffers being used.
- */
- if(!r600ValidateBuffers(ctx))
- {
- return GL_TRUE;
- }
+ if (min_index) {
+ vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims );
+ return;
+ }
- bRet = r700RunRender(ctx, stage);
+ /* Make an attempt at drawing */
+ retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
- return bRet;
- //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
- //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
+ /* If failed run tnl pipeline - it should take care of fallbacks */
+ if (!retval)
+ _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
}
-const struct tnl_pipeline_stage _r700_render_stage = {
- "r700 Hardware Rasterization",
- NULL,
- NULL,
- NULL,
- NULL,
- r700RunNonTCLRender
-};
-
-const struct tnl_pipeline_stage _r700_tcl_stage = {
- "r700 Hardware Transform, Clipping and Lighting",
- NULL,
- NULL,
- NULL,
- NULL,
- r700RunTCLRender
-};
-
-const struct tnl_pipeline_stage *r700_pipeline[] =
+void r700InitDraw(GLcontext *ctx)
{
- &_r700_tcl_stage,
- &_tnl_vertex_transform_stage,
- &_tnl_normal_transform_stage,
- &_tnl_lighting_stage,
- &_tnl_fog_coordinate_stage,
- &_tnl_texgen_stage,
- &_tnl_texture_transform_stage,
- &_tnl_vertex_program_stage,
-
- &_r700_render_stage,
- &_tnl_render_stage,
- 0,
-};
+ struct vbo_context *vbo = vbo_context(ctx);
+
+ /* to be enabled */
+ vbo->draw_prims = r700DrawPrims;
+}
diff --git a/r600/r700_shader.c b/r600/r700_shader.c
index b4fd51c..955ea4e 100644
--- a/r600/r700_shader.c
+++ b/r600/r700_shader.c
@@ -60,6 +60,55 @@ void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction *
plstCFInstructions->uNumOfNode++;
}
+void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst)
+{
+ GLuint ulIndex = 0;
+ GLboolean bFound = GL_FALSE;
+ R700ShaderInstruction * pPrevInst = NULL;
+ R700ShaderInstruction * pCurInst = plstCFInstructions->pHead;
+
+ /* Need go thro list to make sure pInst is there. */
+ while(NULL != pCurInst)
+ {
+ if(pCurInst == pInst)
+ {
+ bFound = GL_TRUE;
+ break;
+ }
+
+ pPrevInst = pCurInst;
+ pCurInst = pCurInst->pNextInst;
+ }
+ if(GL_TRUE == bFound)
+ {
+ plstCFInstructions->uNumOfNode--;
+
+ pCurInst = pInst->pNextInst;
+ ulIndex = pInst->m_uIndex;
+ while(NULL != pCurInst)
+ {
+ pCurInst->m_uIndex = ulIndex;
+ ulIndex++;
+ pCurInst = pCurInst->pNextInst;
+ }
+
+ if(plstCFInstructions->pHead == pInst)
+ {
+ plstCFInstructions->pHead = pInst->pNextInst;
+ }
+ if(plstCFInstructions->pTail == pInst)
+ {
+ plstCFInstructions->pTail = pPrevInst;
+ }
+ if(NULL != pPrevInst)
+ {
+ pPrevInst->pNextInst = pInst->pNextInst;
+ }
+
+ FREE(pInst);
+ }
+}
+
void Init_R700_Shader(R700_Shader * pShader)
{
pShader->Type = R700_SHADER_INVALID;
@@ -488,6 +537,47 @@ void DebugPrint(void)
{
}
+void cleanup_vfetch_shaderinst(R700_Shader *pShader)
+{
+ R700ShaderInstruction *pInst;
+ R700ShaderInstruction *pInstToFree;
+ R700VertexInstruction *pVTXInst;
+ R700ControlFlowInstruction *pCFInst;
+
+ pInst = pShader->lstVTXInstructions.pHead;
+ while(NULL != pInst)
+ {
+ pVTXInst = (R700VertexInstruction *)pInst;
+ pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pVTXInst->m_ShaderInstType);
+
+ if(NULL != pVTXInst->m_pLinkedGenericClause)
+ {
+ pCFInst = (R700ControlFlowInstruction*)(pVTXInst->m_pLinkedGenericClause);
+
+ TakeInstOutFromList(&(pShader->lstCFInstructions),
+ (R700ShaderInstruction*)pCFInst);
+
+ pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pCFInst->m_ShaderInstType);
+ }
+
+ pInst = pInst->pNextInst;
+ };
+
+ //destroy each item in pShader->lstVTXInstructions;
+ pInst = pShader->lstVTXInstructions.pHead;
+ while(NULL != pInst)
+ {
+ pInstToFree = pInst;
+ pInst = pInst->pNextInst;
+ FREE(pInstToFree);
+ };
+
+ //set NULL pShader->lstVTXInstructions
+ pShader->lstVTXInstructions.pHead=NULL;
+ pShader->lstVTXInstructions.pTail=NULL;
+ pShader->lstVTXInstructions.uNumOfNode=0;
+}
+
void Clean_Up_Shader(R700_Shader *pShader)
{
FREE(pShader->pProgram);
diff --git a/r600/r700_shader.h b/r600/r700_shader.h
index bfd01e1..c6a0586 100644
--- a/r600/r700_shader.h
+++ b/r600/r700_shader.h
@@ -128,6 +128,7 @@ typedef struct R700_Shader
//Internal
void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst);
+void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst);
void ResolveLinks(R700_Shader *pShader);
void Assemble(R700_Shader *pShader);
@@ -143,6 +144,7 @@ void LoadProgram(R700_Shader *pShader);
void UpdateShaderRegisters(R700_Shader *pShader);
void DeleteInstructions(R700_Shader *pShader);
void DebugPrint(void);
+void cleanup_vfetch_shaderinst(R700_Shader *pShader);
void Clean_Up_Shader(R700_Shader *pShader);
diff --git a/r600/r700_state.c b/r600/r700_state.c
index 124469b..16b05d5 100644
--- a/r600/r700_state.c
+++ b/r600/r700_state.c
@@ -46,7 +46,6 @@
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
#include "vbo/vbo.h"
-#include "main/texformat.h"
#include "r600_context.h"
@@ -55,18 +54,15 @@
#include "r700_fragprog.h"
#include "r700_vertprog.h"
-
+void r600UpdateTextureState(GLcontext * ctx);
static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state);
static void r700UpdatePolygonMode(GLcontext * ctx);
static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state);
static void r700SetStencilState(GLcontext * ctx, GLboolean state);
-void r700UpdateShaders (GLcontext * ctx) //----------------------------------
+void r700UpdateShaders(GLcontext * ctx)
{
context_t *context = R700_CONTEXT(ctx);
- GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
- GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
- int i;
/* should only happenen once, just after context is created */
/* TODO: shouldn't we fallback to sw here? */
@@ -77,21 +73,6 @@ void r700UpdateShaders (GLcontext * ctx) //----------------------------------
r700SelectFragmentShader(ctx);
- if (context->radeon.NewGLState) {
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- /* mat states from state var not array for sw */
- dummy_attrib[i].stride = 0;
- temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i];
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &(dummy_attrib[i]);
- }
-
- _tnl_UpdateFixedFunctionProgram(ctx);
-
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] = temp_attrib[i];
- }
- }
-
r700SelectVertexShader(ctx);
r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
context->radeon.NewGLState = 0;
@@ -171,6 +152,14 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //-----------
R600_STATECHANGE(context, db_target);
}
+ if (new_state & (_NEW_LIGHT)) {
+ R600_STATECHANGE(context, su);
+ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION)
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
+ else
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
+ }
+
r700UpdateStateParameters(ctx, new_state);
R600_STATECHANGE(context, cl);
@@ -202,6 +191,67 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //-----------
context->radeon.NewGLState |= new_state;
}
+static void r700SetDBRenderState(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ struct r700_fragment_program *fp = (struct r700_fragment_program *)
+ (ctx->FragmentProgram._Current);
+
+ R600_STATECHANGE(context, db);
+
+ SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
+ SETfield(r700->DB_SHADER_CONTROL.u32All, EARLY_Z_THEN_LATE_Z, Z_ORDER_shift, Z_ORDER_mask);
+ /* XXX need to enable htile for hiz/s */
+ SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask);
+ SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask);
+ SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask);
+
+ if (context->radeon.query.current)
+ {
+ SETbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit);
+ if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+ {
+ SETbit(r700->DB_RENDER_CONTROL.u32All, PERFECT_ZPASS_COUNTS_bit);
+ }
+ }
+ else
+ {
+ CLEARbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit);
+ if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+ {
+ CLEARbit(r700->DB_RENDER_CONTROL.u32All, PERFECT_ZPASS_COUNTS_bit);
+ }
+ }
+
+ if (fp)
+ {
+ if (fp->r700Shader.killIsUsed)
+ {
+ SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+ }
+
+ if (fp->r700Shader.depthIsExported)
+ {
+ SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
+ }
+ }
+}
+
+void r700UpdateShaderStates(GLcontext * ctx)
+{
+ r700SetDBRenderState(ctx);
+ r600UpdateTextureState(ctx);
+}
+
static void r700SetDepthState(GLcontext * ctx)
{
context_t *context = R700_CONTEXT(ctx);
@@ -467,10 +517,10 @@ static void r700SetBlendState(GLcontext * ctx)
eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask);
SETfield(blend_reg,
- blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE),
+ blend_factor(ctx->Color.BlendSrcA, GL_TRUE),
ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
SETfield(blend_reg,
- blend_factor(ctx->Color.BlendDstRGB, GL_FALSE),
+ blend_factor(ctx->Color.BlendDstA, GL_FALSE),
ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
switch (ctx->Color.BlendEquationA) {
@@ -655,6 +705,10 @@ static void r700UpdateCulling(GLcontext * ctx)
CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */
break;
}
+
+ /* Winding is inverted when rendering to FBO */
+ if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+ r700->PA_SU_SC_MODE_CNTL.u32All ^= FACE_bit;
}
static void r700UpdateLineStipple(GLcontext * ctx)
@@ -745,9 +799,9 @@ static void r700ColorMask(GLcontext * ctx,
(b ? 4 : 0) |
(a ? 8 : 0));
- if (mask != r700->CB_SHADER_MASK.u32All) {
+ if (mask != r700->CB_TARGET_MASK.u32All) {
R600_STATECHANGE(context, cb);
- SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask);
+ SETfield(r700->CB_TARGET_MASK.u32All, mask, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask);
}
}
@@ -1041,6 +1095,7 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //--------------------
GLfloat tz = v[MAT_TZ] * depthScale;
R600_STATECHANGE(context, vpt);
+ R600_STATECHANGE(context, cl);
r700->viewport[id].PA_CL_VPORT_XSCALE.f32All = sx;
r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx;
@@ -1051,6 +1106,18 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //--------------------
r700->viewport[id].PA_CL_VPORT_ZSCALE.f32All = sz;
r700->viewport[id].PA_CL_VPORT_ZOFFSET.f32All = tz;
+ if (ctx->Transform.DepthClamp) {
+ r700->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+ r700->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+ SETbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit);
+ SETbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit);
+ } else {
+ r700->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = 0.0;
+ r700->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = 1.0;
+ CLEARbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit);
+ CLEARbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit);
+ }
+
r700->viewport[id].enabled = GL_TRUE;
r700SetScissor(context);
@@ -1164,13 +1231,8 @@ static void r700UpdatePolygonMode(GLcontext * ctx)
/* Handle GL_CW (clock wise and GL_CCW (counter clock wise)
* correctly by selecting the correct front and back face
*/
- if (ctx->Polygon.FrontFace == GL_CCW) {
- f = ctx->Polygon.FrontMode;
- b = ctx->Polygon.BackMode;
- } else {
- f = ctx->Polygon.BackMode;
- b = ctx->Polygon.FrontMode;
- }
+ f = ctx->Polygon.FrontMode;
+ b = ctx->Polygon.BackMode;
/* Enable polygon mode */
SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DUAL_MODE, POLY_MODE_shift, POLY_MODE_mask);
@@ -1269,11 +1331,15 @@ void r700SetScissor(context_t *context) //---------------
return;
}
if (context->radeon.state.scissor.enabled) {
- /* r600 has exclusive scissors */
x1 = context->radeon.state.scissor.rect.x1;
y1 = context->radeon.state.scissor.rect.y1;
- x2 = context->radeon.state.scissor.rect.x2 + 1;
- y2 = context->radeon.state.scissor.rect.y2 + 1;
+ x2 = context->radeon.state.scissor.rect.x2;
+ y2 = context->radeon.state.scissor.rect.y2;
+ /* r600 has exclusive BR scissors */
+ if (context->radeon.radeonScreen->kernel_mm) {
+ x2++;
+ y2++;
+ }
} else {
if (context->radeon.radeonScreen->driScreen->dri2.enabled) {
x1 = 0;
@@ -1352,8 +1418,6 @@ void r700SetScissor(context_t *context) //---------------
SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, y2,
PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask);
- r700->viewport[id].PA_SC_VPORT_ZMIN_0.u32All = 0;
- r700->viewport[id].PA_SC_VPORT_ZMAX_0.u32All = 0x3F800000;
r700->viewport[id].enabled = GL_TRUE;
}
@@ -1668,19 +1732,10 @@ void r700InitState(GLcontext * ctx) //-------------------
r700Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
r700DepthMask(ctx, ctx->Depth.Mask);
r700DepthFunc(ctx, ctx->Depth.Func);
- SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
-
r700->DB_DEPTH_CLEAR.u32All = 0x3F800000;
-
- r700->DB_RENDER_CONTROL.u32All = 0;
SETbit(r700->DB_RENDER_CONTROL.u32All, STENCIL_COMPRESS_DISABLE_bit);
SETbit(r700->DB_RENDER_CONTROL.u32All, DEPTH_COMPRESS_DISABLE_bit);
- r700->DB_RENDER_OVERRIDE.u32All = 0;
- if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
- SETbit(r700->DB_RENDER_OVERRIDE.u32All, FORCE_SHADER_Z_ORDER_bit);
- SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask);
- SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask);
- SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask);
+ r700SetDBRenderState(ctx);
r700->DB_ALPHA_TO_MASK.u32All = 0;
SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask);
@@ -1754,7 +1809,7 @@ void r700InitState(GLcontext * ctx) //-------------------
r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF;
/* screen/window/view */
- SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask);
+ SETfield(r700->CB_SHADER_MASK.u32All, 0xF, (4 * id), OUTPUT0_ENABLE_mask);
context->radeon.hw.all_dirty = GL_TRUE;
diff --git a/r600/r700_state.h b/r600/r700_state.h
index 0f53d5b..60c6a7f 100644
--- a/r600/r700_state.h
+++ b/r600/r700_state.h
@@ -35,6 +35,7 @@
extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state);
extern void r700UpdateShaders (GLcontext * ctx);
+extern void r700UpdateShaderStates(GLcontext * ctx);
extern void r700UpdateViewportOffset(GLcontext * ctx);
diff --git a/r600/r700_vertprog.c b/r600/r700_vertprog.c
index 04726ec..6986eb0 100644
--- a/r600/r700_vertprog.c
+++ b/r600/r700_vertprog.c
@@ -159,7 +159,35 @@ GLboolean Process_Vertex_Program_Vfetch_Instructions(
return GL_TRUE;
}
-void Map_Vertex_Program(struct r700_vertex_program *vp,
+GLboolean Process_Vertex_Program_Vfetch_Instructions2(
+ GLcontext *ctx,
+ struct r700_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp)
+{
+ int i;
+ context_t *context = R700_CONTEXT(ctx);
+
+ VTX_FETCH_METHOD vtxFetchMethod;
+ vtxFetchMethod.bEnableMini = GL_FALSE;
+ vtxFetchMethod.mega_fetch_remainder = 0;
+
+ for(i=0; i<context->nNumActiveAos; i++)
+ {
+ assemble_vfetch_instruction2(&vp->r700AsmCode,
+ vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element],
+ context->stream_desc[i].type,
+ context->stream_desc[i].size,
+ context->stream_desc[i].element,
+ context->stream_desc[i]._signed,
+ context->stream_desc[i].normalize,
+ &vtxFetchMethod);
+ }
+
+ return GL_TRUE;
+}
+
+void Map_Vertex_Program(GLcontext *ctx,
+ struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp)
{
GLuint ui;
@@ -175,10 +203,10 @@ void Map_Vertex_Program(struct r700_vertex_program *vp,
pAsm->number_used_registers += num_inputs;
// Create VFETCH instructions for inputs
- if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) )
+ if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
{
- radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n");
- return; //error
+ radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
+ return;
}
// Map Outputs
@@ -189,7 +217,7 @@ void Map_Vertex_Program(struct r700_vertex_program *vp,
pAsm->number_used_registers += pAsm->number_of_exports;
pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
-
+
for(ui=0; ui<pAsm->number_of_exports; ui++)
{
pAsm->pucOutMask[ui] = 0x0;
@@ -206,7 +234,7 @@ void Map_Vertex_Program(struct r700_vertex_program *vp,
{ /* fix func t_vp uses NumTemporaries */
pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
}
-
+
pAsm->uFirstHelpReg = pAsm->number_used_registers;
}
@@ -261,13 +289,10 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
}
struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
- struct gl_vertex_program *mesa_vp)
+ struct gl_vertex_program *mesa_vp)
{
context_t *context = R700_CONTEXT(ctx);
struct r700_vertex_program *vp;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
- unsigned int unBit;
unsigned int i;
vp = _mesa_calloc(sizeof(*vp));
@@ -278,17 +303,13 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
_mesa_insert_mvp_code(ctx, vp->mesa_program);
}
- for(i=0; i<VERT_ATTRIB_MAX; i++)
+ for(i=0; i<context->nNumActiveAos; i++)
{
- unBit = 1 << i;
- if(vp->mesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */
- {
- vp->aos_desc[i].size = vb->AttribPtr[i]->size;
- vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/
- vp->aos_desc[i].type = GL_FLOAT;
- }
+ vp->aos_desc[i].size = context->stream_desc[i].size;
+ vp->aos_desc[i].stride = context->stream_desc[i].stride;
+ vp->aos_desc[i].type = context->stream_desc[i].type;
}
-
+
if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
{
vp->r700AsmCode.bR6xx = 1;
@@ -296,19 +317,19 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
//Init_Program
Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
- Map_Vertex_Program( vp, vp->mesa_program );
+ Map_Vertex_Program(ctx, vp, vp->mesa_program );
if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
{
return NULL;
- }
+ }
if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions,
- &(vp->mesa_program->Base.Instructions[0]),
+ &(vp->mesa_program->Base.Instructions[0]),
&(vp->r700AsmCode)) )
{
return NULL;
- }
+ }
if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
{
@@ -330,9 +351,6 @@ void r700SelectVertexShader(GLcontext *ctx)
context_t *context = R700_CONTEXT(ctx);
struct r700_vertex_program_cont *vpc;
struct r700_vertex_program *vp;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
- unsigned int unBit;
unsigned int i;
GLboolean match;
GLbitfield InputsRead;
@@ -343,29 +361,27 @@ void r700SelectVertexShader(GLcontext *ctx)
if (vpc->mesa_program.IsPositionInvariant)
{
InputsRead |= VERT_BIT_POS;
- }
-
+ }
+
for (vp = vpc->progs; vp; vp = vp->next)
{
- match = GL_TRUE;
- for(i=0; i<VERT_ATTRIB_MAX; i++)
+ match = GL_TRUE;
+ for(i=0; i<context->nNumActiveAos; i++)
{
- unBit = 1 << i;
- if(InputsRead & unBit)
+ if (vp->aos_desc[i].size != context->stream_desc[i].size)
{
- if (vp->aos_desc[i].size != vb->AttribPtr[i]->size)
- match = GL_FALSE;
- break;
+ match = GL_FALSE;
+ break;
}
}
- if (match)
+ if (match)
{
context->selected_vp = vp;
return;
}
}
- vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program) );
+ vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program));
if(!vp)
{
radeon_error("Failed to translate vertex shader. \n");
@@ -377,6 +393,146 @@ void r700SelectVertexShader(GLcontext *ctx)
return;
}
+int getTypeSize(GLenum type)
+{
+ switch (type)
+ {
+ case GL_DOUBLE:
+ return sizeof(GLdouble);
+ case GL_FLOAT:
+ return sizeof(GLfloat);
+ case GL_INT:
+ return sizeof(GLint);
+ case GL_UNSIGNED_INT:
+ return sizeof(GLuint);
+ case GL_SHORT:
+ return sizeof(GLshort);
+ case GL_UNSIGNED_SHORT:
+ return sizeof(GLushort);
+ case GL_BYTE:
+ return sizeof(GLbyte);
+ case GL_UNSIGNED_BYTE:
+ return sizeof(GLubyte);
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input)
+{
+ context_t *context = R700_CONTEXT(ctx);
+
+ StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]);
+
+ GLuint stride;
+
+ stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size
+ : input->StrideB;
+
+ if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+ getTypeSize(input->Type) != 4 ||
+#endif
+ stride < 4)
+ {
+ pStreamDesc->type = GL_FLOAT;
+
+ if (input->StrideB == 0)
+ {
+ pStreamDesc->stride = 0;
+ }
+ else
+ {
+ pStreamDesc->stride = sizeof(GLfloat) * input->Size;
+ }
+ pStreamDesc->dwords = input->Size;
+ pStreamDesc->is_named_bo = GL_FALSE;
+ }
+ else
+ {
+ pStreamDesc->type = input->Type;
+ pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4;
+ if (!input->BufferObj->Name)
+ {
+ if (input->StrideB == 0)
+ {
+ pStreamDesc->stride = 0;
+ }
+ else
+ {
+ pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3;
+ }
+
+ pStreamDesc->is_named_bo = GL_FALSE;
+ }
+ }
+
+ pStreamDesc->size = input->Size;
+ pStreamDesc->dst_loc = context->nNumActiveAos;
+ pStreamDesc->element = unLoc;
+
+ switch (pStreamDesc->type)
+ { //GetSurfaceFormat
+ case GL_FLOAT:
+ pStreamDesc->_signed = 0;
+ pStreamDesc->normalize = GL_FALSE;
+ break;
+ case GL_SHORT:
+ pStreamDesc->_signed = 1;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ case GL_BYTE:
+ pStreamDesc->_signed = 1;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ case GL_UNSIGNED_SHORT:
+ pStreamDesc->_signed = 0;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ case GL_UNSIGNED_BYTE:
+ pStreamDesc->_signed = 0;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ default:
+ case GL_INT:
+ case GL_UNSIGNED_INT:
+ case GL_DOUBLE:
+ assert(0);
+ break;
+ }
+ context->nNumActiveAos++;
+}
+
+void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ struct r700_vertex_program *vpc
+ = (struct r700_vertex_program *)ctx->VertexProgram._Current;
+
+ struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program);
+ unsigned int unLoc = 0;
+ unsigned int unBit = mesa_vp->Base.InputsRead;
+ context->nNumActiveAos = 0;
+
+ if (mesa_vp->IsPositionInvariant)
+ {
+ unBit |= VERT_BIT_POS;
+ }
+
+ while(unBit)
+ {
+ if(unBit & 1)
+ {
+ r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]);
+ }
+
+ unBit >>= 1;
+ ++unLoc;
+ }
+ context->radeon.tcl.aos_count = context->nNumActiveAos;
+}
+
void * r700GetActiveVpShaderBo(GLcontext * ctx)
{
context_t *context = R700_CONTEXT(ctx);
diff --git a/r600/r700_vertprog.h b/r600/r700_vertprog.h
index c48764c..00824c2 100644
--- a/r600/r700_vertprog.h
+++ b/r600/r700_vertprog.h
@@ -52,8 +52,7 @@ struct r700_vertex_program
GLboolean translated;
GLboolean loaded;
- GLboolean needUpdateVF;
-
+
void * shaderbo;
ArrayDesc aos_desc[VERT_ATTRIB_MAX];
@@ -76,19 +75,27 @@ unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm,
GLboolean Process_Vertex_Program_Vfetch_Instructions(
struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp);
-void Map_Vertex_Program(struct r700_vertex_program *vp,
+GLboolean Process_Vertex_Program_Vfetch_Instructions2(
+ GLcontext *ctx,
+ struct r700_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp);
+void Map_Vertex_Program(GLcontext *ctx,
+ struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp);
GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp);
struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
- struct gl_vertex_program *mesa_vp);
+ struct gl_vertex_program *mesa_vp);
/* Interface */
extern void r700SelectVertexShader(GLcontext *ctx);
+extern void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count);
extern GLboolean r700SetupVertexProgram(GLcontext * ctx);
extern void * r700GetActiveVpShaderBo(GLcontext * ctx);
+extern int getTypeSize(GLenum type);
+
#endif /* _R700_VERTPROG_H_ */
diff --git a/radeon/Makefile.am b/radeon/Makefile.am
index 554f67e..0ff149d 100644
--- a/radeon/Makefile.am
+++ b/radeon/Makefile.am
@@ -1,6 +1,6 @@
AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
-RADEON_CFLAGS = -Iserver -DRADEON_COMMON=0
+RADEON_CFLAGS = -DRADEON_R100 -Iserver
radeon_dri_la_LTLIBRARIES = radeon_dri.la
radeon_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(RADEON_CFLAGS)
@@ -36,5 +36,7 @@ if HAVE_LIBDRM_RADEON
radeon_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
radeon_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
radeon_dri_la_SOURCES += \
- radeon_cs_space_drm.c
+ radeon_cs_space_drm.c \
+ radeon_bo.c \
+ radeon_cs.c
endif
diff --git a/radeon/radeon_bo.c b/radeon/radeon_bo.c
new file mode 100644
index 0000000..393d156
--- /dev/null
+++ b/radeon/radeon_bo.c
@@ -0,0 +1,110 @@
+#include <radeon_bocs_wrapper.h>
+#include <radeon_bo_int_drm.h>
+
+void radeon_bo_debug(struct radeon_bo *bo,
+ const char *op)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+
+ fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X\n",
+ op, bo, bo->handle, boi->size, boi->cref);
+}
+
+struct radeon_bo *radeon_bo_open(struct radeon_bo_manager *bom,
+ uint32_t handle,
+ uint32_t size,
+ uint32_t alignment,
+ uint32_t domains,
+ uint32_t flags)
+{
+ struct radeon_bo *bo;
+ bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags);
+ return bo;
+}
+
+void radeon_bo_ref(struct radeon_bo *bo)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ boi->cref++;
+ boi->bom->funcs->bo_ref(boi);
+}
+
+struct radeon_bo *radeon_bo_unref(struct radeon_bo *bo)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ boi->cref--;
+ return boi->bom->funcs->bo_unref(boi);
+}
+
+int radeon_bo_map(struct radeon_bo *bo, int write)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ return boi->bom->funcs->bo_map(boi, write);
+}
+
+int radeon_bo_unmap(struct radeon_bo *bo)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ return boi->bom->funcs->bo_unmap(boi);
+}
+
+int radeon_bo_wait(struct radeon_bo *bo)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ if (!boi->bom->funcs->bo_wait)
+ return 0;
+ return boi->bom->funcs->bo_wait(boi);
+}
+
+int radeon_bo_is_busy(struct radeon_bo *bo,
+ uint32_t *domain)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ return boi->bom->funcs->bo_is_busy(boi, domain);
+}
+
+int radeon_bo_set_tiling(struct radeon_bo *bo,
+ uint32_t tiling_flags, uint32_t pitch)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ return boi->bom->funcs->bo_set_tiling(boi, tiling_flags, pitch);
+}
+
+int radeon_bo_get_tiling(struct radeon_bo *bo,
+ uint32_t *tiling_flags, uint32_t *pitch)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ return boi->bom->funcs->bo_get_tiling(boi, tiling_flags, pitch);
+}
+
+int radeon_bo_is_static(struct radeon_bo *bo)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ if (boi->bom->funcs->bo_is_static)
+ return boi->bom->funcs->bo_is_static(boi);
+ return 0;
+}
+
+int radeon_bo_is_referenced_by_cs(struct radeon_bo *bo,
+ struct radeon_cs *cs)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ return boi->cref > 1;
+}
+
+uint32_t radeon_bo_get_handle(struct radeon_bo *bo)
+{
+ return bo->handle;
+}
+
+uint32_t radeon_bo_get_src_domain(struct radeon_bo *bo)
+{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ uint32_t src_domain;
+
+ src_domain = boi->space_accounted & 0xffff;
+ if (!src_domain)
+ src_domain = boi->space_accounted >> 16;
+
+ return src_domain;
+}
diff --git a/radeon/radeon_bo_drm.h b/radeon/radeon_bo_drm.h
index 7141371..beb2369 100644
--- a/radeon/radeon_bo_drm.h
+++ b/radeon/radeon_bo_drm.h
@@ -32,188 +32,44 @@
#include <stdio.h>
#include <stdint.h>
-//#include "radeon_track.h"
/* bo object */
#define RADEON_BO_FLAGS_MACRO_TILE 1
#define RADEON_BO_FLAGS_MICRO_TILE 2
struct radeon_bo_manager;
+struct radeon_cs;
struct radeon_bo {
- uint32_t alignment;
+ void *ptr;
+ uint32_t flags;
uint32_t handle;
uint32_t size;
- uint32_t domains;
- uint32_t flags;
- unsigned cref;
-#ifdef RADEON_BO_TRACK
- struct radeon_track *track;
-#endif
- void *ptr;
- struct radeon_bo_manager *bom;
- uint32_t space_accounted;
-};
-
-/* bo functions */
-struct radeon_bo_funcs {
- struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom,
- uint32_t handle,
- uint32_t size,
- uint32_t alignment,
- uint32_t domains,
- uint32_t flags);
- void (*bo_ref)(struct radeon_bo *bo);
- struct radeon_bo *(*bo_unref)(struct radeon_bo *bo);
- int (*bo_map)(struct radeon_bo *bo, int write);
- int (*bo_unmap)(struct radeon_bo *bo);
- int (*bo_wait)(struct radeon_bo *bo);
- int (*bo_is_static)(struct radeon_bo *bo);
- int (*bo_set_tiling)(struct radeon_bo *bo, uint32_t tiling_flags,
- uint32_t pitch);
- int (*bo_get_tiling)(struct radeon_bo *bo, uint32_t *tiling_flags,
- uint32_t *pitch);
- int (*bo_is_busy)(struct radeon_bo *bo, uint32_t *domain);
};
-struct radeon_bo_manager {
- struct radeon_bo_funcs *funcs;
- int fd;
-
-#ifdef RADEON_BO_TRACK
- struct radeon_tracker tracker;
-#endif
-};
-
-static inline void _radeon_bo_debug(struct radeon_bo *bo,
- const char *op,
- const char *file,
- const char *func,
- int line)
-{
- fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X [%s %s %d]\n",
- op, bo, bo->handle, bo->size, bo->cref, file, func, line);
-}
-
-static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom,
- uint32_t handle,
- uint32_t size,
- uint32_t alignment,
- uint32_t domains,
- uint32_t flags,
- const char *file,
- const char *func,
- int line)
-{
- struct radeon_bo *bo;
-
- bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags);
-
-#ifdef RADEON_BO_TRACK
- if (bo) {
- bo->track = radeon_tracker_add_track(&bom->tracker, bo->handle);
- radeon_track_add_event(bo->track, file, func, "open", line);
- }
-#endif
- return bo;
-}
-
-static inline void _radeon_bo_ref(struct radeon_bo *bo,
- const char *file,
- const char *func,
- int line)
-{
- bo->cref++;
-#ifdef RADEON_BO_TRACK
- radeon_track_add_event(bo->track, file, func, "ref", line);
-#endif
- bo->bom->funcs->bo_ref(bo);
-}
-
-static inline struct radeon_bo *_radeon_bo_unref(struct radeon_bo *bo,
- const char *file,
- const char *func,
- int line)
-{
- bo->cref--;
-#ifdef RADEON_BO_TRACK
- radeon_track_add_event(bo->track, file, func, "unref", line);
- if (bo->cref <= 0) {
- radeon_tracker_remove_track(&bo->bom->tracker, bo->track);
- bo->track = NULL;
- }
-#endif
- return bo->bom->funcs->bo_unref(bo);
-}
-
-static inline int _radeon_bo_map(struct radeon_bo *bo,
- int write,
- const char *file,
- const char *func,
- int line)
-{
- return bo->bom->funcs->bo_map(bo, write);
-}
-
-static inline int _radeon_bo_unmap(struct radeon_bo *bo,
- const char *file,
- const char *func,
- int line)
-{
- return bo->bom->funcs->bo_unmap(bo);
-}
-
-static inline int _radeon_bo_wait(struct radeon_bo *bo,
- const char *file,
- const char *func,
- int line)
-{
- return bo->bom->funcs->bo_wait(bo);
-}
-
-static inline int _radeon_bo_is_busy(struct radeon_bo *bo,
- uint32_t *domain,
- const char *file,
- const char *func,
- int line)
-{
- return bo->bom->funcs->bo_is_busy(bo, domain);
-}
-
-static inline int radeon_bo_set_tiling(struct radeon_bo *bo,
- uint32_t tiling_flags, uint32_t pitch)
-{
- return bo->bom->funcs->bo_set_tiling(bo, tiling_flags, pitch);
-}
-
-static inline int radeon_bo_get_tiling(struct radeon_bo *bo,
- uint32_t *tiling_flags, uint32_t *pitch)
-{
- return bo->bom->funcs->bo_get_tiling(bo, tiling_flags, pitch);
-}
-
-static inline int radeon_bo_is_static(struct radeon_bo *bo)
-{
- if (bo->bom->funcs->bo_is_static)
- return bo->bom->funcs->bo_is_static(bo);
- return 0;
-}
-
-#define radeon_bo_open(bom, h, s, a, d, f)\
- _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_ref(bo)\
- _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_unref(bo)\
- _radeon_bo_unref(bo, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_map(bo, w)\
- _radeon_bo_map(bo, w, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_unmap(bo)\
- _radeon_bo_unmap(bo, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_debug(bo, opcode)\
- _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_wait(bo) \
- _radeon_bo_wait(bo, __FILE__, __func__, __LINE__)
-#define radeon_bo_is_busy(bo, domain) \
- _radeon_bo_is_busy(bo, domain, __FILE__, __func__, __LINE__)
+struct radeon_bo_manager;
+void radeon_bo_debug(struct radeon_bo *bo,
+ const char *op);
+
+struct radeon_bo *radeon_bo_open(struct radeon_bo_manager *bom,
+ uint32_t handle,
+ uint32_t size,
+ uint32_t alignment,
+ uint32_t domains,
+ uint32_t flags);
+
+void radeon_bo_ref(struct radeon_bo *bo);
+struct radeon_bo *radeon_bo_unref(struct radeon_bo *bo);
+int radeon_bo_map(struct radeon_bo *bo, int write);
+int radeon_bo_unmap(struct radeon_bo *bo);
+int radeon_bo_wait(struct radeon_bo *bo);
+int radeon_bo_is_busy(struct radeon_bo *bo, uint32_t *domain);
+int radeon_bo_set_tiling(struct radeon_bo *bo, uint32_t tiling_flags, uint32_t pitch);
+int radeon_bo_get_tiling(struct radeon_bo *bo, uint32_t *tiling_flags, uint32_t *pitch);
+int radeon_bo_is_static(struct radeon_bo *bo);
+int radeon_bo_is_referenced_by_cs(struct radeon_bo *bo,
+ struct radeon_cs *cs);
+uint32_t radeon_bo_get_handle(struct radeon_bo *bo);
+uint32_t radeon_bo_get_src_domain(struct radeon_bo *bo);
#endif
diff --git a/radeon/radeon_bo_int_drm.h b/radeon/radeon_bo_int_drm.h
new file mode 100644
index 0000000..190c332
--- /dev/null
+++ b/radeon/radeon_bo_int_drm.h
@@ -0,0 +1,45 @@
+#ifndef RADEON_BO_INT
+#define RADEON_BO_INT
+
+struct radeon_bo_manager {
+ struct radeon_bo_funcs *funcs;
+ int fd;
+};
+
+struct radeon_bo_int {
+ void *ptr;
+ uint32_t flags;
+ uint32_t handle;
+ uint32_t size;
+ /* private members */
+ uint32_t alignment;
+ uint32_t domains;
+ unsigned cref;
+ struct radeon_bo_manager *bom;
+ uint32_t space_accounted;
+ uint32_t referenced_in_cs;
+};
+
+/* bo functions */
+struct radeon_bo_funcs {
+ struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom,
+ uint32_t handle,
+ uint32_t size,
+ uint32_t alignment,
+ uint32_t domains,
+ uint32_t flags);
+ void (*bo_ref)(struct radeon_bo_int *bo);
+ struct radeon_bo *(*bo_unref)(struct radeon_bo_int *bo);
+ int (*bo_map)(struct radeon_bo_int *bo, int write);
+ int (*bo_unmap)(struct radeon_bo_int *bo);
+ int (*bo_wait)(struct radeon_bo_int *bo);
+ int (*bo_is_static)(struct radeon_bo_int *bo);
+ int (*bo_set_tiling)(struct radeon_bo_int *bo, uint32_t tiling_flags,
+ uint32_t pitch);
+ int (*bo_get_tiling)(struct radeon_bo_int *bo, uint32_t *tiling_flags,
+ uint32_t *pitch);
+ int (*bo_is_busy)(struct radeon_bo_int *bo, uint32_t *domain);
+ int (*bo_is_referenced_by_cs)(struct radeon_bo_int *bo, struct radeon_cs *cs);
+};
+
+#endif
diff --git a/radeon/radeon_bo_legacy.c b/radeon/radeon_bo_legacy.c
index 3e7547d..cf12664 100644
--- a/radeon/radeon_bo_legacy.c
+++ b/radeon/radeon_bo_legacy.c
@@ -50,6 +50,12 @@
#include "radeon_bocs_wrapper.h"
#include "radeon_macros.h"
+#ifdef HAVE_LIBDRM_RADEON
+#include "radeon_bo_int.h"
+#else
+#include "radeon_bo_int_drm.h"
+#endif
+
/* no seriously texmem.c is this screwed up */
struct bo_legacy_texture_object {
driTextureObject base;
@@ -57,7 +63,7 @@ struct bo_legacy_texture_object {
};
struct bo_legacy {
- struct radeon_bo base;
+ struct radeon_bo_int base;
int map_count;
uint32_t pending;
int is_pending;
@@ -187,10 +193,10 @@ static void legacy_get_current_age(struct bo_manager_legacy *boml)
}
}
-static int legacy_is_pending(struct radeon_bo *bo)
+static int legacy_is_pending(struct radeon_bo_int *boi)
{
- struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
- struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom;
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)boi;
if (bo_legacy->is_pending <= 0) {
bo_legacy->is_pending = 0;
@@ -204,13 +210,13 @@ static int legacy_is_pending(struct radeon_bo *bo)
if (bo_legacy->pnext) {
bo_legacy->pnext->pprev = bo_legacy->pprev;
}
- assert(bo_legacy->is_pending <= bo->cref);
+ assert(bo_legacy->is_pending <= boi->cref);
while (bo_legacy->is_pending--) {
- bo = radeon_bo_unref(bo);
- if (!bo)
+ boi = (struct radeon_bo_int *)radeon_bo_unref((struct radeon_bo *)boi);
+ if (!boi)
break;
}
- if (bo)
+ if (boi)
bo_legacy->is_pending = 0;
boml->cpendings--;
return 0;
@@ -218,7 +224,7 @@ static int legacy_is_pending(struct radeon_bo *bo)
return 1;
}
-static int legacy_wait_pending(struct radeon_bo *bo)
+static int legacy_wait_pending(struct radeon_bo_int *bo)
{
struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
@@ -323,7 +329,7 @@ static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml,
return bo_legacy;
}
-static int bo_dma_alloc(struct radeon_bo *bo)
+static int bo_dma_alloc(struct radeon_bo_int *bo)
{
struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
@@ -333,7 +339,7 @@ static int bo_dma_alloc(struct radeon_bo *bo)
int r;
/* align size on 4Kb */
- size = (((4 * 1024) - 1) + bo->size) & ~((4 * 1024) - 1);
+ size = (((4 * 1024) - 1) + bo_legacy->base.size) & ~((4 * 1024) - 1);
alloc.region = RADEON_MEM_REGION_GART;
alloc.alignment = bo_legacy->base.alignment;
alloc.size = size;
@@ -355,7 +361,7 @@ static int bo_dma_alloc(struct radeon_bo *bo)
return 0;
}
-static int bo_dma_free(struct radeon_bo *bo)
+static int bo_dma_free(struct radeon_bo_int *bo)
{
struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
@@ -428,7 +434,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom,
bo_legacy = boml->bos.next;
while (bo_legacy) {
if (bo_legacy->base.handle == handle) {
- radeon_bo_ref(&(bo_legacy->base));
+ radeon_bo_ref((struct radeon_bo *)&(bo_legacy->base));
return (struct radeon_bo*)bo_legacy;
}
bo_legacy = bo_legacy->next;
@@ -468,20 +474,20 @@ retry:
return NULL;
}
}
- radeon_bo_ref(&(bo_legacy->base));
+ radeon_bo_ref((struct radeon_bo *)&(bo_legacy->base));
return (struct radeon_bo*)bo_legacy;
}
-static void bo_ref(struct radeon_bo *bo)
+static void bo_ref(struct radeon_bo_int *bo)
{
}
-static struct radeon_bo *bo_unref(struct radeon_bo *bo)
+static struct radeon_bo *bo_unref(struct radeon_bo_int *boi)
{
- struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+ struct bo_legacy *bo_legacy = (struct bo_legacy*)boi;
- if (bo->cref <= 0) {
+ if (boi->cref <= 0) {
bo_legacy->prev->next = bo_legacy->next;
if (bo_legacy->next) {
bo_legacy->next->prev = bo_legacy->prev;
@@ -491,10 +497,10 @@ static struct radeon_bo *bo_unref(struct radeon_bo *bo)
}
return NULL;
}
- return bo;
+ return (struct radeon_bo *)boi;
}
-static int bo_map(struct radeon_bo *bo, int write)
+static int bo_map(struct radeon_bo_int *bo, int write)
{
struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
@@ -528,7 +534,7 @@ static int bo_map(struct radeon_bo *bo, int write)
return 0;
}
-static int bo_unmap(struct radeon_bo *bo)
+static int bo_unmap(struct radeon_bo_int *bo)
{
struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
@@ -542,7 +548,7 @@ static int bo_unmap(struct radeon_bo *bo)
return 0;
}
-static int bo_is_busy(struct radeon_bo *bo, uint32_t *domain)
+static int bo_is_busy(struct radeon_bo_int *bo, uint32_t *domain)
{
*domain = 0;
if (bo->domains & RADEON_GEM_DOMAIN_GTT)
@@ -555,7 +561,7 @@ static int bo_is_busy(struct radeon_bo *bo, uint32_t *domain)
return 0;
}
-static int bo_is_static(struct radeon_bo *bo)
+static int bo_is_static(struct radeon_bo_int *bo)
{
struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
return bo_legacy->static_bo;
@@ -574,7 +580,7 @@ static struct radeon_bo_funcs bo_legacy_funcs = {
bo_is_busy
};
-static int bo_vram_validate(struct radeon_bo *bo,
+static int bo_vram_validate(struct radeon_bo_int *bo,
uint32_t *soffset,
uint32_t *eoffset)
{
@@ -700,25 +706,30 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo,
uint32_t *soffset,
uint32_t *eoffset)
{
- struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom;
struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
int r;
int retries = 0;
if (bo_legacy->map_count) {
fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n",
- bo, bo->size, bo_legacy->map_count);
+ bo, boi->size, bo_legacy->map_count);
+ return -EINVAL;
+ }
+ if(boi->size == 0) {
+ fprintf(stderr, "bo(%p) has size 0.\n", bo);
return -EINVAL;
}
if (bo_legacy->static_bo || bo_legacy->validated) {
*soffset = bo_legacy->offset;
- *eoffset = bo_legacy->offset + bo->size;
+ *eoffset = bo_legacy->offset + boi->size;
return 0;
}
- if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
+ if (!(boi->domains & RADEON_GEM_DOMAIN_GTT)) {
- r = bo_vram_validate(bo, soffset, eoffset);
+ r = bo_vram_validate(boi, soffset, eoffset);
if (r) {
legacy_track_pending(&boml->base, 0);
legacy_kick_all_buffers(boml);
@@ -732,7 +743,7 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo,
}
}
*soffset = bo_legacy->offset;
- *eoffset = bo_legacy->offset + bo->size;
+ *eoffset = bo_legacy->offset + boi->size;
bo_legacy->validated = 1;
return 0;
@@ -740,7 +751,8 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo,
void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending)
{
- struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom;
struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
bo_legacy->pending = pending;
@@ -795,7 +807,7 @@ static struct bo_legacy *radeon_legacy_bo_alloc_static(struct bo_manager_legacy
if (bo->base.handle > bom->nhandle) {
bom->nhandle = bo->base.handle + 1;
}
- radeon_bo_ref(&(bo->base));
+ radeon_bo_ref((struct radeon_bo *)&(bo->base));
return bo;
}
@@ -890,12 +902,13 @@ void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom)
unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo)
{
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
- if (bo_legacy->static_bo || (bo->domains & RADEON_GEM_DOMAIN_GTT)) {
+ if (bo_legacy->static_bo || (boi->domains & RADEON_GEM_DOMAIN_GTT)) {
return 0;
}
- return bo->size;
+ return boi->size;
}
/*
@@ -920,7 +933,7 @@ struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom,
if (bo->base.handle > boml->nhandle) {
boml->nhandle = bo->base.handle + 1;
}
- radeon_bo_ref(&(bo->base));
- return &(bo->base);
+ radeon_bo_ref((struct radeon_bo *)&(bo->base));
+ return (struct radeon_bo *)&(bo->base);
}
diff --git a/radeon/radeon_bocs_wrapper.h b/radeon/radeon_bocs_wrapper.h
index 4520a7d..6c2648b 100644
--- a/radeon/radeon_bocs_wrapper.h
+++ b/radeon/radeon_bocs_wrapper.h
@@ -18,8 +18,11 @@
#define RADEON_TILING_MACRO 0x1
#define RADEON_TILING_MICRO 0x2
#define RADEON_TILING_SWAP 0x4
+
+#ifndef RADEON_TILING_SURFACE
#define RADEON_TILING_SURFACE 0x8 /* this object requires a surface
* when mapped - i.e. front buffer */
+#endif
/* to be used to build locally in mesa with no libdrm bits */
#include "../radeon/radeon_bo_drm.h"
diff --git a/radeon/radeon_buffer_objects.c b/radeon/radeon_buffer_objects.c
index 8fac5c6..99d3ec7 100644
--- a/radeon/radeon_buffer_objects.c
+++ b/radeon/radeon_buffer_objects.c
@@ -136,8 +136,13 @@ radeonBufferSubData(GLcontext * ctx,
const GLvoid * data,
struct gl_buffer_object *obj)
{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+ if (radeon_bo_is_referenced_by_cs(radeon_obj->bo, radeon->cmdbuf.cs)) {
+ radeon_firevertices(radeon);
+ }
+
radeon_bo_map(radeon_obj->bo, GL_TRUE);
_mesa_memcpy(radeon_obj->bo->ptr + offset, data, size);
diff --git a/radeon/radeon_common.c b/radeon/radeon_common.c
index 9817ff8..9b64c21 100644
--- a/radeon/radeon_common.c
+++ b/radeon/radeon_common.c
@@ -229,16 +229,15 @@ void radeonUpdateScissor( GLcontext *ctx )
}
if (!rmesa->radeonScreen->kernel_mm) {
/* Fix scissors for dri 1 */
-
__DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa);
x1 += dPriv->x;
- x2 += dPriv->x;
+ x2 += dPriv->x + 1;
min_x += dPriv->x;
- max_x += dPriv->x;
+ max_x += dPriv->x + 1;
y1 += dPriv->y;
- y2 += dPriv->y;
+ y2 += dPriv->y + 1;
min_y += dPriv->y;
- max_y += dPriv->y;
+ max_y += dPriv->y + 1;
}
rmesa->state.scissor.rect.x1 = CLAMP(x1, min_x, max_x);
@@ -263,29 +262,6 @@ void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
}
}
-void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask )
-{
- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
- GLuint i;
- drm_radeon_stipple_t stipple;
-
- /* Must flip pattern upside down.
- */
- for ( i = 0 ; i < 32 ; i++ ) {
- stipple.mask[31 - i] = ((GLuint *) mask)[i];
- }
-
- /* TODO: push this into cmd mechanism
- */
- radeon_firevertices(radeon);
- LOCK_HARDWARE( radeon );
-
- drmCommandWrite( radeon->dri.fd, DRM_RADEON_STIPPLE,
- &stipple, sizeof(stipple) );
- UNLOCK_HARDWARE( radeon );
-}
-
-
/* ================================================================
* SwapBuffers with client-side throttling
*/
@@ -1124,8 +1100,6 @@ void radeonFlush(GLcontext *ctx)
if (radeon->dma.flush)
radeon->dma.flush( ctx );
- radeonEmitState(radeon);
-
if (radeon->cmdbuf.cs->cdw)
rcommonFlushCmdBuf(radeon, __FUNCTION__);
@@ -1148,9 +1122,6 @@ void radeonFlush(GLcontext *ctx)
}
}
}
-
- make_empty_list(&radeon->query.not_flushed_head);
-
}
/* Make sure all commands have been sent to the hardware and have
@@ -1345,5 +1316,5 @@ void rcommonBeginBatch(radeonContextPtr rmesa, int n,
void radeonUserClear(GLcontext *ctx, GLuint mask)
{
- _mesa_meta_clear(ctx, mask);
+ _mesa_meta_Clear(ctx, mask);
}
diff --git a/radeon/radeon_common.h b/radeon/radeon_common.h
index f320191..0608fe2 100644
--- a/radeon/radeon_common.h
+++ b/radeon/radeon_common.h
@@ -10,7 +10,6 @@ void radeonRecalcScissorRects(radeonContextPtr radeon);
void radeonSetCliprects(radeonContextPtr radeon);
void radeonUpdateScissor( GLcontext *ctx );
void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h);
-void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask );
void radeonWaitForIdleLocked(radeonContextPtr radeon);
extern uint32_t radeonGetAge(radeonContextPtr radeon);
@@ -43,7 +42,7 @@ void
radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
struct radeon_bo *bo);
struct radeon_renderbuffer *
-radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv);
+radeon_create_renderbuffer(gl_format format, __DRIdrawablePrivate *driDrawPriv);
static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb)
{
struct radeon_renderbuffer *rrb = (struct radeon_renderbuffer *)rb;
diff --git a/radeon/radeon_common_context.c b/radeon/radeon_common_context.c
index 330721a..71f70d7 100644
--- a/radeon/radeon_common_context.c
+++ b/radeon/radeon_common_context.c
@@ -47,7 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "swrast_setup/swrast_setup.h"
#include "tnl/tnl.h"
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */
+#if defined(RADEON_R600)
#include "r600_context.h"
#endif
@@ -262,10 +262,9 @@ GLboolean radeonInitContext(radeonContextPtr radeon,
else
radeon->texture_row_align = 32;
radeon->texture_rect_row_align = 64;
- radeon->texture_compressed_row_align = 64;
+ radeon->texture_compressed_row_align = 32;
}
- make_empty_list(&radeon->query.not_flushed_head);
radeon_init_dma(radeon);
return GL_TRUE;
@@ -496,19 +495,7 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon,
static unsigned
radeon_bits_per_pixel(const struct radeon_renderbuffer *rb)
{
- switch (rb->base._ActualFormat) {
- case GL_RGB5:
- case GL_DEPTH_COMPONENT16:
- return 16;
- case GL_RGB8:
- case GL_RGBA8:
- case GL_DEPTH_COMPONENT24:
- case GL_DEPTH24_STENCIL8_EXT:
- case GL_STENCIL_INDEX8_EXT:
- return 32;
- default:
- return 0;
- }
+ return _mesa_get_format_bytes(rb->base.Format) * 8;
}
void
diff --git a/radeon/radeon_common_context.h b/radeon/radeon_common_context.h
index 0309345..6298748 100644
--- a/radeon/radeon_common_context.h
+++ b/radeon/radeon_common_context.h
@@ -208,6 +208,10 @@ struct radeon_tex_obj {
* and so on.
*/
GLboolean validated;
+ /* Minimum LOD to be used during rendering */
+ unsigned minLod;
+ /* Miximum LOD to be used during rendering */
+ unsigned maxLod;
GLuint override_offset;
GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
@@ -401,9 +405,6 @@ struct radeon_state {
struct radeon_depthbuffer_state depth;
struct radeon_scissor_state scissor;
struct radeon_stencilbuffer_state stencil;
-
- struct radeon_cs_space_check bos[RADEON_MAX_BOS];
- int validated_bo_count;
};
/**
@@ -502,7 +503,6 @@ struct radeon_context {
struct {
struct radeon_query_object *current;
- struct radeon_query_object not_flushed_head;
struct radeon_state_atom queryobj;
} query;
diff --git a/radeon/radeon_context.c b/radeon/radeon_context.c
index 8f4485a..5e700be 100644
--- a/radeon/radeon_context.c
+++ b/radeon/radeon_context.c
@@ -69,7 +69,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define need_GL_EXT_fog_coord
#define need_GL_EXT_secondary_color
#define need_GL_EXT_framebuffer_object
-#include "extension_helper.h"
+#include "main/remap_helper.h"
#define DRIVER_DATE "20061018"
@@ -79,7 +79,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/* Extension strings exported by the R100 driver.
*/
-const struct dri_extension card_extensions[] =
+static const struct dri_extension card_extensions[] =
{
{ "GL_ARB_multitexture", NULL },
{ "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions},
@@ -109,7 +109,7 @@ const struct dri_extension card_extensions[] =
{ NULL, NULL }
};
-const struct dri_extension mm_extensions[] = {
+static const struct dri_extension mm_extensions[] = {
{ "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
{ NULL, NULL }
};
diff --git a/radeon/radeon_context.h b/radeon/radeon_context.h
index 4e2c52c..12ab33a 100644
--- a/radeon/radeon_context.h
+++ b/radeon/radeon_context.h
@@ -331,8 +331,12 @@ struct r100_hw_state {
struct radeon_state_atom stp;
};
+struct radeon_stipple_state {
+ GLuint mask[32];
+};
struct r100_state {
+ struct radeon_stipple_state stipple;
struct radeon_texture_state texture;
};
diff --git a/radeon/radeon_cs.c b/radeon/radeon_cs.c
new file mode 100644
index 0000000..17e7433
--- /dev/null
+++ b/radeon/radeon_cs.c
@@ -0,0 +1,95 @@
+
+#include <stdio.h>
+#include <stdint.h>
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_cs_int_drm.h"
+
+struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
+ uint32_t ndw)
+{
+ struct radeon_cs_int *csi = csm->funcs->cs_create(csm, ndw);
+ return (struct radeon_cs *)csi;
+}
+
+int radeon_cs_write_reloc(struct radeon_cs *cs,
+ struct radeon_bo *bo,
+ uint32_t read_domain,
+ uint32_t write_domain,
+ uint32_t flags)
+{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+
+ return csi->csm->funcs->cs_write_reloc(csi,
+ bo,
+ read_domain,
+ write_domain,
+ flags);
+}
+
+int radeon_cs_begin(struct radeon_cs *cs,
+ uint32_t ndw,
+ const char *file,
+ const char *func,
+ int line)
+{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ return csi->csm->funcs->cs_begin(csi, ndw, file, func, line);
+}
+
+int radeon_cs_end(struct radeon_cs *cs,
+ const char *file,
+ const char *func,
+ int line)
+{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ return csi->csm->funcs->cs_end(csi, file, func, line);
+}
+
+int radeon_cs_emit(struct radeon_cs *cs)
+{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ return csi->csm->funcs->cs_emit(csi);
+}
+
+int radeon_cs_destroy(struct radeon_cs *cs)
+{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ return csi->csm->funcs->cs_destroy(csi);
+}
+
+int radeon_cs_erase(struct radeon_cs *cs)
+{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ return csi->csm->funcs->cs_erase(csi);
+}
+
+int radeon_cs_need_flush(struct radeon_cs *cs)
+{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ return csi->csm->funcs->cs_need_flush(csi);
+}
+
+void radeon_cs_print(struct radeon_cs *cs, FILE *file)
+{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ csi->csm->funcs->cs_print(csi, file);
+}
+
+void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit)
+{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ if (domain == RADEON_GEM_DOMAIN_VRAM)
+ csi->csm->vram_limit = limit;
+ else
+ csi->csm->gart_limit = limit;
+}
+
+void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data)
+{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ csi->space_flush_fn = fn;
+ csi->space_flush_data = data;
+}
+
diff --git a/radeon/radeon_cs_drm.h b/radeon/radeon_cs_drm.h
index ab4eca3..a3f1750 100644
--- a/radeon/radeon_cs_drm.h
+++ b/radeon/radeon_cs_drm.h
@@ -36,6 +36,7 @@
#include <string.h>
#include "drm.h"
#include "radeon_drm.h"
+#include "radeon_bo_drm.h"
struct radeon_cs_reloc {
struct radeon_bo *bo;
@@ -49,173 +50,41 @@ struct radeon_cs_reloc {
#define RADEON_CS_SPACE_OP_TO_BIG 1
#define RADEON_CS_SPACE_FLUSH 2
-struct radeon_cs_space_check {
- struct radeon_bo *bo;
- uint32_t read_domains;
- uint32_t write_domain;
- uint32_t new_accounted;
-};
-
-#define MAX_SPACE_BOS (32)
-
-struct radeon_cs_manager;
-
struct radeon_cs {
- struct radeon_cs_manager *csm;
- void *relocs;
- uint32_t *packets;
- unsigned crelocs;
- unsigned relocs_total_size;
- unsigned cdw;
- unsigned ndw;
- int section;
+ uint32_t *packets;
+ unsigned cdw;
+ unsigned ndw;
unsigned section_ndw;
unsigned section_cdw;
- const char *section_file;
- const char *section_func;
- int section_line;
- struct radeon_cs_space_check bos[MAX_SPACE_BOS];
- int bo_count;
- void (*space_flush_fn)(void *);
- void *space_flush_data;
-};
-
-/* cs functions */
-struct radeon_cs_funcs {
- struct radeon_cs *(*cs_create)(struct radeon_cs_manager *csm,
- uint32_t ndw);
- int (*cs_write_reloc)(struct radeon_cs *cs,
- struct radeon_bo *bo,
- uint32_t read_domain,
- uint32_t write_domain,
- uint32_t flags);
- int (*cs_begin)(struct radeon_cs *cs,
- uint32_t ndw,
- const char *file,
- const char *func,
- int line);
- int (*cs_end)(struct radeon_cs *cs,
- const char *file,
- const char *func,
- int line);
- int (*cs_emit)(struct radeon_cs *cs);
- int (*cs_destroy)(struct radeon_cs *cs);
- int (*cs_erase)(struct radeon_cs *cs);
- int (*cs_need_flush)(struct radeon_cs *cs);
- void (*cs_print)(struct radeon_cs *cs, FILE *file);
-};
-
-struct radeon_cs_manager {
- struct radeon_cs_funcs *funcs;
- int fd;
- int32_t vram_limit, gart_limit;
- int32_t vram_write_used, gart_write_used;
- int32_t read_used;
};
-static inline struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
- uint32_t ndw)
-{
- return csm->funcs->cs_create(csm, ndw);
-}
-
-static inline int radeon_cs_write_reloc(struct radeon_cs *cs,
- struct radeon_bo *bo,
- uint32_t read_domain,
- uint32_t write_domain,
- uint32_t flags)
-{
- return cs->csm->funcs->cs_write_reloc(cs,
- bo,
- read_domain,
- write_domain,
- flags);
-}
-
-static inline int radeon_cs_begin(struct radeon_cs *cs,
- uint32_t ndw,
- const char *file,
- const char *func,
- int line)
-{
- return cs->csm->funcs->cs_begin(cs, ndw, file, func, line);
-}
-
-static inline int radeon_cs_end(struct radeon_cs *cs,
- const char *file,
- const char *func,
- int line)
-{
- return cs->csm->funcs->cs_end(cs, file, func, line);
-}
-
-static inline int radeon_cs_emit(struct radeon_cs *cs)
-{
- return cs->csm->funcs->cs_emit(cs);
-}
-
-static inline int radeon_cs_destroy(struct radeon_cs *cs)
-{
- return cs->csm->funcs->cs_destroy(cs);
-}
-
-static inline int radeon_cs_erase(struct radeon_cs *cs)
-{
- return cs->csm->funcs->cs_erase(cs);
-}
-
-static inline int radeon_cs_need_flush(struct radeon_cs *cs)
-{
- return cs->csm->funcs->cs_need_flush(cs);
-}
-
-static inline void radeon_cs_print(struct radeon_cs *cs, FILE *file)
-{
- cs->csm->funcs->cs_print(cs, file);
-}
-
-static inline void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit)
-{
-
- if (domain == RADEON_GEM_DOMAIN_VRAM)
- cs->csm->vram_limit = limit;
- else
- cs->csm->gart_limit = limit;
-}
-
-static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword)
-{
- cs->packets[cs->cdw++] = dword;
- if (cs->section) {
- cs->section_cdw++;
- }
-}
-
-static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword)
-{
-
- memcpy(cs->packets + cs->cdw, &qword, sizeof(qword));
- cs->cdw+=2;
- if (cs->section) {
- cs->section_cdw+=2;
- }
-}
-
-static inline void radeon_cs_write_table(struct radeon_cs *cs, void *data, uint32_t size)
-{
- memcpy(cs->packets + cs->cdw, data, size * 4);
- cs->cdw += size;
- if (cs->section) {
- cs->section_cdw += size;
- }
-}
+#define MAX_SPACE_BOS (32)
-static inline void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data)
-{
- cs->space_flush_fn = fn;
- cs->space_flush_data = data;
-}
+struct radeon_cs_manager;
+extern struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
+ uint32_t ndw);
+
+extern int radeon_cs_begin(struct radeon_cs *cs,
+ uint32_t ndw,
+ const char *file,
+ const char *func, int line);
+extern int radeon_cs_end(struct radeon_cs *cs,
+ const char *file,
+ const char *func,
+ int line);
+extern int radeon_cs_emit(struct radeon_cs *cs);
+extern int radeon_cs_destroy(struct radeon_cs *cs);
+extern int radeon_cs_erase(struct radeon_cs *cs);
+extern int radeon_cs_need_flush(struct radeon_cs *cs);
+extern void radeon_cs_print(struct radeon_cs *cs, FILE *file);
+extern void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit);
+extern void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data);
+extern int radeon_cs_write_reloc(struct radeon_cs *cs,
+ struct radeon_bo *bo,
+ uint32_t read_domain,
+ uint32_t write_domain,
+ uint32_t flags);
/*
* add a persistent BO to the list
@@ -243,4 +112,30 @@ int radeon_cs_space_check_with_bo(struct radeon_cs *cs,
uint32_t read_domains,
uint32_t write_domain);
+static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword)
+{
+ cs->packets[cs->cdw++] = dword;
+ if (cs->section_ndw) {
+ cs->section_cdw++;
+ }
+}
+
+static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword)
+{
+ memcpy(cs->packets + cs->cdw, &qword, sizeof(uint64_t));
+ cs->cdw += 2;
+ if (cs->section_ndw) {
+ cs->section_cdw += 2;
+ }
+}
+
+static inline void radeon_cs_write_table(struct radeon_cs *cs,
+ void *data, uint32_t size)
+{
+ memcpy(cs->packets + cs->cdw, data, size * 4);
+ cs->cdw += size;
+ if (cs->section_ndw) {
+ cs->section_cdw += size;
+ }
+}
#endif
diff --git a/radeon/radeon_cs_int_drm.h b/radeon/radeon_cs_int_drm.h
new file mode 100644
index 0000000..8ba76bf
--- /dev/null
+++ b/radeon/radeon_cs_int_drm.h
@@ -0,0 +1,66 @@
+
+#ifndef _RADEON_CS_INT_H_
+#define _RADEON_CS_INT_H_
+
+struct radeon_cs_space_check {
+ struct radeon_bo_int *bo;
+ uint32_t read_domains;
+ uint32_t write_domain;
+ uint32_t new_accounted;
+};
+
+struct radeon_cs_int {
+ /* keep first two in same place */
+ uint32_t *packets;
+ unsigned cdw;
+ unsigned ndw;
+ unsigned section_ndw;
+ unsigned section_cdw;
+ /* private members */
+ struct radeon_cs_manager *csm;
+ void *relocs;
+ unsigned crelocs;
+ unsigned relocs_total_size;
+ const char *section_file;
+ const char *section_func;
+ int section_line;
+ struct radeon_cs_space_check bos[MAX_SPACE_BOS];
+ int bo_count;
+ void (*space_flush_fn)(void *);
+ void *space_flush_data;
+};
+
+/* cs functions */
+struct radeon_cs_funcs {
+ struct radeon_cs_int *(*cs_create)(struct radeon_cs_manager *csm,
+ uint32_t ndw);
+ int (*cs_write_reloc)(struct radeon_cs_int *cs,
+ struct radeon_bo *bo,
+ uint32_t read_domain,
+ uint32_t write_domain,
+ uint32_t flags);
+ int (*cs_begin)(struct radeon_cs_int *cs,
+ uint32_t ndw,
+ const char *file,
+ const char *func,
+ int line);
+ int (*cs_end)(struct radeon_cs_int *cs,
+ const char *file, const char *func,
+ int line);
+
+
+ int (*cs_emit)(struct radeon_cs_int *cs);
+ int (*cs_destroy)(struct radeon_cs_int *cs);
+ int (*cs_erase)(struct radeon_cs_int *cs);
+ int (*cs_need_flush)(struct radeon_cs_int *cs);
+ void (*cs_print)(struct radeon_cs_int *cs, FILE *file);
+};
+
+struct radeon_cs_manager {
+ struct radeon_cs_funcs *funcs;
+ int fd;
+ int32_t vram_limit, gart_limit;
+ int32_t vram_write_used, gart_write_used;
+ int32_t read_used;
+};
+#endif
diff --git a/radeon/radeon_cs_legacy.c b/radeon/radeon_cs_legacy.c
index f1addb2..45b608a 100644
--- a/radeon/radeon_cs_legacy.c
+++ b/radeon/radeon_cs_legacy.c
@@ -30,10 +30,18 @@
* Jérôme Glisse <glisse@freedesktop.org>
*/
#include <errno.h>
+#include <unistd.h>
+#include <stdint.h>
+#include "drm.h"
+#include "radeon_drm.h"
#include "radeon_bocs_wrapper.h"
#include "radeon_common.h"
-
+#ifdef HAVE_LIBDRM_RADEON
+#include "radeon_cs_int.h"
+#else
+#include "radeon_cs_int_drm.h"
+#endif
struct cs_manager_legacy {
struct radeon_cs_manager base;
struct radeon_context *ctx;
@@ -51,27 +59,27 @@ struct cs_reloc_legacy {
};
-static struct radeon_cs *cs_create(struct radeon_cs_manager *csm,
- uint32_t ndw)
+static struct radeon_cs_int *cs_create(struct radeon_cs_manager *csm,
+ uint32_t ndw)
{
- struct radeon_cs *cs;
+ struct radeon_cs_int *csi;
- cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
- if (cs == NULL) {
+ csi = (struct radeon_cs_int*)calloc(1, sizeof(struct radeon_cs_int));
+ if (csi == NULL) {
return NULL;
}
- cs->csm = csm;
- cs->ndw = (ndw + 0x3FF) & (~0x3FF);
- cs->packets = (uint32_t*)malloc(4*cs->ndw);
- if (cs->packets == NULL) {
- free(cs);
+ csi->csm = csm;
+ csi->ndw = (ndw + 0x3FF) & (~0x3FF);
+ csi->packets = (uint32_t*)malloc(4*csi->ndw);
+ if (csi->packets == NULL) {
+ free(csi);
return NULL;
}
- cs->relocs_total_size = 0;
- return cs;
+ csi->relocs_total_size = 0;
+ return csi;
}
-static int cs_write_reloc(struct radeon_cs *cs,
+static int cs_write_reloc(struct radeon_cs_int *cs,
struct radeon_bo *bo,
uint32_t read_domain,
uint32_t write_domain,
@@ -150,20 +158,19 @@ static int cs_write_reloc(struct radeon_cs *cs,
return 0;
}
-static int cs_begin(struct radeon_cs *cs,
+static int cs_begin(struct radeon_cs_int *cs,
uint32_t ndw,
const char *file,
const char *func,
int line)
{
- if (cs->section) {
+ if (cs->section_ndw) {
fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
cs->section_file, cs->section_func, cs->section_line);
fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
file, func, line);
return -EPIPE;
}
- cs->section = 1;
cs->section_ndw = ndw;
cs->section_cdw = 0;
cs->section_file = file;
@@ -187,18 +194,17 @@ static int cs_begin(struct radeon_cs *cs,
return 0;
}
-static int cs_end(struct radeon_cs *cs,
+static int cs_end(struct radeon_cs_int *cs,
const char *file,
const char *func,
int line)
{
- if (!cs->section) {
+ if (!cs->section_ndw) {
fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
file, func, line);
return -EPIPE;
}
- cs->section = 0;
if (cs->section_ndw != cs->section_cdw) {
fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
@@ -206,10 +212,12 @@ static int cs_end(struct radeon_cs *cs,
file, func, line);
return -EPIPE;
}
+ cs->section_ndw = 0;
+
return 0;
}
-static int cs_process_relocs(struct radeon_cs *cs)
+static int cs_process_relocs(struct radeon_cs_int *cs)
{
struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
struct cs_reloc_legacy *relocs;
@@ -254,7 +262,7 @@ restart:
return 0;
}
-static int cs_set_age(struct radeon_cs *cs)
+static int cs_set_age(struct radeon_cs_int *cs)
{
struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
struct cs_reloc_legacy *relocs;
@@ -268,7 +276,7 @@ static int cs_set_age(struct radeon_cs *cs)
return 0;
}
-static int cs_emit(struct radeon_cs *cs)
+static int cs_emit(struct radeon_cs_int *cs)
{
struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
drm_radeon_cmd_buffer_t cmd;
@@ -276,7 +284,7 @@ static int cs_emit(struct radeon_cs *cs)
uint64_t ull;
int r;
- csm->ctx->vtbl.emit_cs_header(cs, csm->ctx);
+ csm->ctx->vtbl.emit_cs_header((struct radeon_cs *)cs, csm->ctx);
/* append buffer age */
if ( IS_R300_CLASS(csm->ctx->radeonScreen) )
@@ -289,9 +297,9 @@ static int cs_emit(struct radeon_cs *cs)
age.scratch.reg = 2;
age.scratch.n_bufs = 1;
age.scratch.flags = 0;
- radeon_cs_write_dword(cs, age.u);
- radeon_cs_write_qword(cs, ull);
- radeon_cs_write_dword(cs, 0);
+ radeon_cs_write_dword((struct radeon_cs *)cs, age.u);
+ radeon_cs_write_qword((struct radeon_cs *)cs, ull);
+ radeon_cs_write_dword((struct radeon_cs *)cs, 0);
}
r = cs_process_relocs(cs);
@@ -342,7 +350,7 @@ static void inline cs_free_reloc(void *relocs_p, int crelocs)
free(relocs[i].indices);
}
-static int cs_destroy(struct radeon_cs *cs)
+static int cs_destroy(struct radeon_cs_int *cs)
{
cs_free_reloc(cs->relocs, cs->crelocs);
free(cs->relocs);
@@ -351,7 +359,7 @@ static int cs_destroy(struct radeon_cs *cs)
return 0;
}
-static int cs_erase(struct radeon_cs *cs)
+static int cs_erase(struct radeon_cs_int *cs)
{
cs_free_reloc(cs->relocs, cs->crelocs);
free(cs->relocs);
@@ -359,18 +367,18 @@ static int cs_erase(struct radeon_cs *cs)
cs->relocs = NULL;
cs->crelocs = 0;
cs->cdw = 0;
- cs->section = 0;
+ cs->section_ndw = 0;
return 0;
}
-static int cs_need_flush(struct radeon_cs *cs)
+static int cs_need_flush(struct radeon_cs_int *cs)
{
/* this function used to flush when the BO usage got to
* a certain size, now the higher levels handle this better */
return 0;
}
-static void cs_print(struct radeon_cs *cs, FILE *file)
+static void cs_print(struct radeon_cs_int *cs, FILE *file)
{
}
diff --git a/radeon/radeon_cs_space_drm.c b/radeon/radeon_cs_space_drm.c
index 89cbbb5..e22b437 100644
--- a/radeon/radeon_cs_space_drm.c
+++ b/radeon/radeon_cs_space_drm.c
@@ -29,6 +29,8 @@
#include <errno.h>
#include <stdlib.h>
#include "radeon_bocs_wrapper.h"
+#include "radeon_bo_int_drm.h"
+#include "radeon_cs_int_drm.h"
struct rad_sizes {
int32_t op_read;
@@ -39,7 +41,7 @@ struct rad_sizes {
static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct rad_sizes *sizes)
{
uint32_t read_domains, write_domain;
- struct radeon_bo *bo;
+ struct radeon_bo_int *bo;
bo = sc->bo;
sc->new_accounted = 0;
@@ -47,7 +49,7 @@ static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct ra
write_domain = sc->write_domain;
/* legacy needs a static check */
- if (radeon_bo_is_static(bo)) {
+ if (radeon_bo_is_static((struct radeon_bo *)sc->bo)) {
bo->space_accounted = sc->new_accounted = (read_domains << 16) | write_domain;
return 0;
}
@@ -100,11 +102,11 @@ static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct ra
return 0;
}
-static int radeon_cs_do_space_check(struct radeon_cs *cs, struct radeon_cs_space_check *new_tmp)
+static int radeon_cs_do_space_check(struct radeon_cs_int *cs, struct radeon_cs_space_check *new_tmp)
{
struct radeon_cs_manager *csm = cs->csm;
int i;
- struct radeon_bo *bo;
+ struct radeon_bo_int *bo;
struct rad_sizes sizes;
int ret;
@@ -158,25 +160,28 @@ static int radeon_cs_do_space_check(struct radeon_cs *cs, struct radeon_cs_space
void radeon_cs_space_add_persistent_bo(struct radeon_cs *cs, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain)
{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
int i;
- for (i = 0; i < cs->bo_count; i++) {
- if (cs->bos[i].bo == bo &&
- cs->bos[i].read_domains == read_domains &&
- cs->bos[i].write_domain == write_domain)
+ for (i = 0; i < csi->bo_count; i++) {
+ if (csi->bos[i].bo == boi &&
+ csi->bos[i].read_domains == read_domains &&
+ csi->bos[i].write_domain == write_domain)
return;
}
radeon_bo_ref(bo);
- i = cs->bo_count;
- cs->bos[i].bo = bo;
- cs->bos[i].read_domains = read_domains;
- cs->bos[i].write_domain = write_domain;
- cs->bos[i].new_accounted = 0;
- cs->bo_count++;
-
- assert(cs->bo_count < MAX_SPACE_BOS);
+ i = csi->bo_count;
+ csi->bos[i].bo = boi;
+ csi->bos[i].read_domains = read_domains;
+ csi->bos[i].write_domain = write_domain;
+ csi->bos[i].new_accounted = 0;
+ csi->bo_count++;
+
+ assert(csi->bo_count < MAX_SPACE_BOS);
}
-static int radeon_cs_check_space_internal(struct radeon_cs *cs, struct radeon_cs_space_check *tmp_bo)
+static int radeon_cs_check_space_internal(struct radeon_cs_int *cs,
+ struct radeon_cs_space_check *tmp_bo)
{
int ret;
int flushed = 0;
@@ -198,37 +203,42 @@ again:
int radeon_cs_space_check_with_bo(struct radeon_cs *cs,
struct radeon_bo *bo,
uint32_t read_domains, uint32_t write_domain)
-{
+{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
struct radeon_cs_space_check temp_bo;
+
int ret = 0;
if (bo) {
- temp_bo.bo = bo;
+ temp_bo.bo = boi;
temp_bo.read_domains = read_domains;
temp_bo.write_domain = write_domain;
temp_bo.new_accounted = 0;
}
- ret = radeon_cs_check_space_internal(cs, bo ? &temp_bo : NULL);
+ ret = radeon_cs_check_space_internal(csi, bo ? &temp_bo : NULL);
return ret;
}
int radeon_cs_space_check(struct radeon_cs *cs)
{
- return radeon_cs_check_space_internal(cs, NULL);
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+ return radeon_cs_check_space_internal(csi, NULL);
}
void radeon_cs_space_reset_bos(struct radeon_cs *cs)
{
+ struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
int i;
- for (i = 0; i < cs->bo_count; i++) {
- radeon_bo_unref(cs->bos[i].bo);
- cs->bos[i].bo = NULL;
- cs->bos[i].read_domains = 0;
- cs->bos[i].write_domain = 0;
- cs->bos[i].new_accounted = 0;
+ for (i = 0; i < csi->bo_count; i++) {
+ radeon_bo_unref((struct radeon_bo *)csi->bos[i].bo);
+ csi->bos[i].bo = NULL;
+ csi->bos[i].read_domains = 0;
+ csi->bos[i].write_domain = 0;
+ csi->bos[i].new_accounted = 0;
}
- cs->bo_count = 0;
+ csi->bo_count = 0;
}
diff --git a/radeon/radeon_dma.c b/radeon/radeon_dma.c
index c6edbae..232972d 100644
--- a/radeon/radeon_dma.c
+++ b/radeon/radeon_dma.c
@@ -306,10 +306,6 @@ static int radeon_bo_is_idle(struct radeon_bo* bo)
WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
"This may cause small performance drop for you.\n");
}
- /* Protect against bug in legacy bo handling that causes bos stay
- * referenced even after they should be freed */
- if (bo->cref != 1)
- return 0;
return ret != -EBUSY;
}
@@ -346,9 +342,7 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa)
foreach_s(dma_bo, temp, &rmesa->dma.wait) {
if (dma_bo->expire_counter == time) {
WARN_ONCE("Leaking dma buffer object!\n");
- /* force free of buffer so we don't realy start
- * leaking stuff now*/
- while ((dma_bo->bo = radeon_bo_unref(dma_bo->bo))) {}
+ radeon_bo_unref(dma_bo->bo);
remove_from_list(dma_bo);
FREE(dma_bo);
continue;
diff --git a/radeon/radeon_fbo.c b/radeon/radeon_fbo.c
index d83b166..fc21069 100644
--- a/radeon/radeon_fbo.c
+++ b/radeon/radeon_fbo.c
@@ -33,7 +33,6 @@
#include "main/framebuffer.h"
#include "main/renderbuffer.h"
#include "main/context.h"
-#include "main/texformat.h"
#include "main/texrender.h"
#include "drivers/common/meta.h"
@@ -91,11 +90,8 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
case GL_R3_G3_B2:
case GL_RGB4:
case GL_RGB5:
- rb->_ActualFormat = GL_RGB5;
+ rb->Format = _dri_texformat_rgb565;
rb->DataType = GL_UNSIGNED_BYTE;
- rb->RedBits = 5;
- rb->GreenBits = 6;
- rb->BlueBits = 5;
cpp = 2;
break;
case GL_RGB:
@@ -103,12 +99,8 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
case GL_RGB10:
case GL_RGB12:
case GL_RGB16:
- rb->_ActualFormat = GL_RGB8;
+ rb->Format = _dri_texformat_argb8888;
rb->DataType = GL_UNSIGNED_BYTE;
- rb->RedBits = 8;
- rb->GreenBits = 8;
- rb->BlueBits = 8;
- rb->AlphaBits = 0;
cpp = 4;
break;
case GL_RGBA:
@@ -119,12 +111,8 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
case GL_RGB10_A2:
case GL_RGBA12:
case GL_RGBA16:
- rb->_ActualFormat = GL_RGBA8;
+ rb->Format = _dri_texformat_argb8888;
rb->DataType = GL_UNSIGNED_BYTE;
- rb->RedBits = 8;
- rb->GreenBits = 8;
- rb->BlueBits = 8;
- rb->AlphaBits = 8;
cpp = 4;
break;
case GL_STENCIL_INDEX:
@@ -133,39 +121,36 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
case GL_STENCIL_INDEX8_EXT:
case GL_STENCIL_INDEX16_EXT:
/* alloc a depth+stencil buffer */
- rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+ rb->Format = MESA_FORMAT_S8_Z24;
rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
- rb->StencilBits = 8;
cpp = 4;
break;
case GL_DEPTH_COMPONENT16:
- rb->_ActualFormat = GL_DEPTH_COMPONENT16;
+ rb->Format = MESA_FORMAT_Z16;
rb->DataType = GL_UNSIGNED_SHORT;
- rb->DepthBits = 16;
cpp = 2;
break;
case GL_DEPTH_COMPONENT:
case GL_DEPTH_COMPONENT24:
case GL_DEPTH_COMPONENT32:
- rb->_ActualFormat = GL_DEPTH_COMPONENT24;
+ rb->Format = MESA_FORMAT_X8_Z24;
rb->DataType = GL_UNSIGNED_INT;
- rb->DepthBits = 24;
cpp = 4;
break;
case GL_DEPTH_STENCIL_EXT:
case GL_DEPTH24_STENCIL8_EXT:
- rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+ rb->Format = MESA_FORMAT_S8_Z24;
rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
- rb->DepthBits = 24;
- rb->StencilBits = 8;
cpp = 4;
break;
default:
_mesa_problem(ctx,
- "Unexpected format in intel_alloc_renderbuffer_storage");
+ "Unexpected format in radeon_alloc_renderbuffer_storage");
return GL_FALSE;
}
+ rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat);
+
if (ctx->Driver.Flush)
ctx->Driver.Flush(ctx); /* +r6/r7 */
@@ -213,7 +198,7 @@ radeon_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
ASSERT(rb->Name == 0);
rb->Width = width;
rb->Height = height;
- rb->_ActualFormat = internalFormat;
+ rb->InternalFormat = internalFormat;
return GL_TRUE;
}
@@ -255,8 +240,13 @@ radeon_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
return GL_FALSE;
}
+
+/**
+ * Create a renderbuffer for a window's color, depth and/or stencil buffer.
+ * Not used for user-created renderbuffers.
+ */
struct radeon_renderbuffer *
-radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv)
+radeon_create_renderbuffer(gl_format format, __DRIdrawablePrivate *driDrawPriv)
{
struct radeon_renderbuffer *rrb;
@@ -267,67 +257,64 @@ radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv)
_mesa_init_renderbuffer(&rrb->base, 0);
rrb->base.ClassID = RADEON_RB_CLASS;
- /* XXX format junk */
+ rrb->base.Format = format;
+
switch (format) {
- case GL_RGB5:
- rrb->base._ActualFormat = GL_RGB5;
- rrb->base._BaseFormat = GL_RGBA;
- rrb->base.RedBits = 5;
- rrb->base.GreenBits = 6;
- rrb->base.BlueBits = 5;
+ case MESA_FORMAT_RGB565:
+ assert(_mesa_little_endian());
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ rrb->base._BaseFormat = GL_RGB;
+ break;
+ case MESA_FORMAT_RGB565_REV:
+ assert(!_mesa_little_endian());
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ rrb->base._BaseFormat = GL_RGB;
+ break;
+ case MESA_FORMAT_XRGB8888:
+ assert(_mesa_little_endian());
rrb->base.DataType = GL_UNSIGNED_BYTE;
+ rrb->base._BaseFormat = GL_RGB;
break;
- case GL_RGB8:
- rrb->base._ActualFormat = GL_RGB8;
- rrb->base._BaseFormat = GL_RGB;
- rrb->base.RedBits = 8;
- rrb->base.GreenBits = 8;
- rrb->base.BlueBits = 8;
- rrb->base.AlphaBits = 0;
+ case MESA_FORMAT_XRGB8888_REV:
+ assert(!_mesa_little_endian());
rrb->base.DataType = GL_UNSIGNED_BYTE;
+ rrb->base._BaseFormat = GL_RGB;
break;
- case GL_RGBA8:
- rrb->base._ActualFormat = GL_RGBA8;
- rrb->base._BaseFormat = GL_RGBA;
- rrb->base.RedBits = 8;
- rrb->base.GreenBits = 8;
- rrb->base.BlueBits = 8;
- rrb->base.AlphaBits = 8;
+ case MESA_FORMAT_ARGB8888:
+ assert(_mesa_little_endian());
rrb->base.DataType = GL_UNSIGNED_BYTE;
+ rrb->base._BaseFormat = GL_RGBA;
break;
- case GL_STENCIL_INDEX8_EXT:
- rrb->base._ActualFormat = GL_STENCIL_INDEX8_EXT;
- rrb->base._BaseFormat = GL_STENCIL_INDEX;
- rrb->base.StencilBits = 8;
+ case MESA_FORMAT_ARGB8888_REV:
+ assert(!_mesa_little_endian());
rrb->base.DataType = GL_UNSIGNED_BYTE;
+ rrb->base._BaseFormat = GL_RGBA;
break;
- case GL_DEPTH_COMPONENT16:
- rrb->base._ActualFormat = GL_DEPTH_COMPONENT16;
- rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
- rrb->base.DepthBits = 16;
+ case MESA_FORMAT_S8:
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ rrb->base._BaseFormat = GL_STENCIL_INDEX;
+ break;
+ case MESA_FORMAT_Z16:
rrb->base.DataType = GL_UNSIGNED_SHORT;
+ rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
break;
- case GL_DEPTH_COMPONENT24:
- rrb->base._ActualFormat = GL_DEPTH_COMPONENT24;
- rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
- rrb->base.DepthBits = 24;
+ case MESA_FORMAT_X8_Z24:
rrb->base.DataType = GL_UNSIGNED_INT;
+ rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
break;
- case GL_DEPTH24_STENCIL8_EXT:
- rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
- rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
- rrb->base.DepthBits = 24;
- rrb->base.StencilBits = 8;
+ case MESA_FORMAT_S8_Z24:
rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+ rrb->base._BaseFormat = GL_DEPTH_STENCIL;
break;
default:
- fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format);
+ fprintf(stderr, "%s: Unknown format %s\n",
+ __FUNCTION__, _mesa_get_format_name(format));
_mesa_delete_renderbuffer(&rrb->base);
return NULL;
}
rrb->dPriv = driDrawPriv;
- rrb->base.InternalFormat = format;
+ rrb->base.InternalFormat = _mesa_get_format_base_format(format);
rrb->base.Delete = radeon_delete_renderbuffer;
rrb->base.AllocStorage = radeon_alloc_window_storage;
@@ -382,51 +369,41 @@ radeon_framebuffer_renderbuffer(GLcontext * ctx,
}
+/* TODO: According to EXT_fbo spec internal format of texture image
+ * once set during glTexImage call, should be preserved when
+ * attaching image to renderbuffer. When HW doesn't support
+ * rendering to format of attached image, set framebuffer
+ * completeness accordingly in radeon_validate_framebuffer (issue #79).
+ */
static GLboolean
radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb,
struct gl_texture_image *texImage)
{
int retry = 0;
+ gl_format texFormat;
+
restart:
- if (texImage->TexFormat == &_mesa_texformat_argb8888) {
- rrb->cpp = 4;
- rrb->base._ActualFormat = GL_RGBA8;
- rrb->base._BaseFormat = GL_RGBA;
+ if (texImage->TexFormat == _dri_texformat_argb8888) {
rrb->base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to RGBA8 texture OK\n");
}
- else if (texImage->TexFormat == &_mesa_texformat_rgb565) {
- rrb->cpp = 2;
- rrb->base._ActualFormat = GL_RGB5;
- rrb->base._BaseFormat = GL_RGB;
+ else if (texImage->TexFormat == _dri_texformat_rgb565) {
rrb->base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to RGB5 texture OK\n");
}
- else if (texImage->TexFormat == &_mesa_texformat_argb1555) {
- rrb->cpp = 2;
- rrb->base._ActualFormat = GL_RGB5_A1;
- rrb->base._BaseFormat = GL_RGBA;
+ else if (texImage->TexFormat == _dri_texformat_argb1555) {
rrb->base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to ARGB1555 texture OK\n");
}
- else if (texImage->TexFormat == &_mesa_texformat_argb4444) {
- rrb->cpp = 2;
- rrb->base._ActualFormat = GL_RGBA4;
- rrb->base._BaseFormat = GL_RGBA;
+ else if (texImage->TexFormat == _dri_texformat_argb4444) {
rrb->base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to ARGB1555 texture OK\n");
+ DBG("Render to ARGB4444 texture OK\n");
}
- else if (texImage->TexFormat == &_mesa_texformat_z16) {
- rrb->cpp = 2;
- rrb->base._ActualFormat = GL_DEPTH_COMPONENT16;
- rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
+ else if (texImage->TexFormat == MESA_FORMAT_Z16) {
rrb->base.DataType = GL_UNSIGNED_SHORT;
DBG("Render to DEPTH16 texture OK\n");
}
- else if (texImage->TexFormat == &_mesa_texformat_s8_z24) {
- rrb->cpp = 4;
- rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
- rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
+ else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) {
rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
DBG("Render to DEPTH_STENCIL texture OK\n");
}
@@ -434,27 +411,31 @@ restart:
/* try redoing the FBO */
if (retry == 1) {
DBG("Render to texture BAD FORMAT %d\n",
- texImage->TexFormat->MesaFormat);
+ texImage->TexFormat);
return GL_FALSE;
}
+ /* XXX why is the tex format being set here?
+ * I think this can be removed.
+ */
texImage->TexFormat = radeonChooseTextureFormat(ctx, texImage->InternalFormat, 0,
- texImage->TexFormat->DataType,
+ _mesa_get_format_datatype(texImage->TexFormat),
1);
retry++;
goto restart;
}
+ texFormat = texImage->TexFormat;
+
+ rrb->base.Format = texFormat;
+
+ rrb->cpp = _mesa_get_format_bytes(texFormat);
rrb->pitch = texImage->Width * rrb->cpp;
- rrb->base.InternalFormat = rrb->base._ActualFormat;
+ rrb->base.InternalFormat = texImage->InternalFormat;
+ rrb->base._BaseFormat = _mesa_base_fbo_format(ctx, rrb->base.InternalFormat);
+
rrb->base.Width = texImage->Width;
rrb->base.Height = texImage->Height;
- rrb->base.RedBits = texImage->TexFormat->RedBits;
- rrb->base.GreenBits = texImage->TexFormat->GreenBits;
- rrb->base.BlueBits = texImage->TexFormat->BlueBits;
- rrb->base.AlphaBits = texImage->TexFormat->AlphaBits;
- rrb->base.DepthBits = texImage->TexFormat->DepthBits;
- rrb->base.StencilBits = texImage->TexFormat->StencilBits;
rrb->base.Delete = radeon_delete_renderbuffer;
rrb->base.AllocStorage = radeon_nop_alloc_storage;
@@ -555,8 +536,10 @@ radeon_render_texture(GLcontext * ctx,
imageOffset += offsets[att->Zoffset];
}
- /* store that offset in the region */
+ /* store that offset in the region, along with the correct pitch for
+ * the image we are rendering to */
rrb->draw_offset = imageOffset;
+ rrb->pitch = radeon_image->mt->levels[att->TextureLevel].rowstride;
/* update drawing region, etc */
radeon_draw_buffer(ctx, fb);
@@ -583,7 +566,7 @@ void radeon_fbo_init(struct radeon_context *radeon)
radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture;
radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers;
radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer;
- radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer;
+ radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer;
}
diff --git a/radeon/radeon_lock.c b/radeon/radeon_lock.c
index 02de8e5..7ad781b 100644
--- a/radeon/radeon_lock.c
+++ b/radeon/radeon_lock.c
@@ -62,8 +62,6 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
__DRIdrawablePrivate *const readable = radeon_get_readable(rmesa);
__DRIscreenPrivate *sPriv = rmesa->dri.screen;
- assert(drawable != NULL);
-
drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
/* The window might have moved, so we might need to get new clip
@@ -74,12 +72,13 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
* Since the hardware state depends on having the latest drawable
* clip rects, all state checking must be done _after_ this call.
*/
- DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
- if (drawable != readable) {
+ if (drawable)
+ DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
+ if (readable && drawable != readable) {
DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable);
}
- if (rmesa->lastStamp != drawable->lastStamp) {
+ if (drawable && (rmesa->lastStamp != drawable->lastStamp)) {
radeon_window_moved(rmesa);
rmesa->lastStamp = drawable->lastStamp;
}
diff --git a/radeon/radeon_mipmap_tree.c b/radeon/radeon_mipmap_tree.c
index 38db305..5a346c5 100644
--- a/radeon/radeon_mipmap_tree.c
+++ b/radeon/radeon_mipmap_tree.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (C) 2009 Maciej Cencora.
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
@@ -32,51 +33,39 @@
#include "main/simple_list.h"
#include "main/texcompress.h"
-#include "main/texformat.h"
-
-static GLuint radeon_compressed_texture_size(GLcontext *ctx,
- GLsizei width, GLsizei height, GLsizei depth,
- GLuint mesaFormat)
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "radeon_texture.h"
+
+static unsigned get_aligned_compressed_row_stride(
+ gl_format format,
+ unsigned width,
+ unsigned minStride)
{
- GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat);
-
- if (mesaFormat == MESA_FORMAT_RGB_DXT1 ||
- mesaFormat == MESA_FORMAT_RGBA_DXT1) {
- if (width + 3 < 8) /* width one block */
- size = size * 4;
- else if (width + 3 < 16)
- size = size * 2;
- } else {
- /* DXT3/5, 16 bytes per block */
- // WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n");
- if (width + 3 < 8)
- size = size * 2;
+ const unsigned blockSize = _mesa_get_format_bytes(format);
+ unsigned blockWidth, blockHeight, numXBlocks;
+
+ _mesa_get_format_block_size(format, &blockWidth, &blockHeight);
+ numXBlocks = (width + blockWidth - 1) / blockWidth;
+
+ while (numXBlocks * blockSize < minStride)
+ {
+ ++numXBlocks;
}
- return size;
+ return numXBlocks * blockSize;
}
-
-static int radeon_compressed_num_bytes(GLuint mesaFormat)
+static unsigned get_compressed_image_size(
+ gl_format format,
+ unsigned rowStride,
+ unsigned height)
{
- int bytes = 0;
- switch(mesaFormat) {
-
- case MESA_FORMAT_RGB_FXT1:
- case MESA_FORMAT_RGBA_FXT1:
- case MESA_FORMAT_RGB_DXT1:
- case MESA_FORMAT_RGBA_DXT1:
- bytes = 2;
- break;
-
- case MESA_FORMAT_RGBA_DXT3:
- case MESA_FORMAT_RGBA_DXT5:
- bytes = 4;
- default:
- break;
- }
-
- return bytes;
+ unsigned blockWidth, blockHeight;
+
+ _mesa_get_format_block_size(format, &blockWidth, &blockHeight);
+
+ return rowStride * ((height + blockHeight - 1) / blockHeight);
}
/**
@@ -93,25 +82,22 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree
uint32_t row_align;
/* Find image size in bytes */
- if (mt->compressed) {
- /* TODO: Is this correct? Need test cases for compressed textures! */
- row_align = rmesa->texture_compressed_row_align - 1;
- lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
- lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx,
- lvl->width, lvl->height, lvl->depth, mt->compressed);
+ if (_mesa_is_format_compressed(mt->mesaFormat)) {
+ lvl->rowstride = get_aligned_compressed_row_stride(mt->mesaFormat, lvl->width, rmesa->texture_compressed_row_align);
+ lvl->size = get_compressed_image_size(mt->mesaFormat, lvl->rowstride, lvl->height);
} else if (mt->target == GL_TEXTURE_RECTANGLE_NV) {
row_align = rmesa->texture_rect_row_align - 1;
- lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
+ lvl->rowstride = (_mesa_format_row_stride(mt->mesaFormat, lvl->width) + row_align) & ~row_align;
lvl->size = lvl->rowstride * lvl->height;
} else if (mt->tilebits & RADEON_TXO_MICRO_TILE) {
/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
* though the actual offset may be different (if texture is less than
* 32 bytes width) to the untiled case */
- lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31;
+ lvl->rowstride = (_mesa_format_row_stride(mt->mesaFormat, lvl->width) * 2 + 31) & ~31;
lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth;
} else {
row_align = rmesa->texture_row_align - 1;
- lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
+ lvl->rowstride = (_mesa_format_row_stride(mt->mesaFormat, lvl->width) + row_align) & ~row_align;
lvl->size = lvl->rowstride * lvl->height * lvl->depth;
}
assert(lvl->size > 0);
@@ -138,22 +124,19 @@ static GLuint minify(GLuint size, GLuint levels)
static void calculate_miptree_layout_r100(radeonContextPtr rmesa, radeon_mipmap_tree *mt)
{
- GLuint curOffset;
- GLuint numLevels;
- GLuint i;
- GLuint face;
+ GLuint curOffset, i, face, level;
- numLevels = mt->lastLevel - mt->firstLevel + 1;
- assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels);
+ assert(mt->numLevels <= rmesa->glCtx->Const.MaxTextureLevels);
curOffset = 0;
for(face = 0; face < mt->faces; face++) {
- for(i = 0; i < numLevels; i++) {
- mt->levels[i].width = minify(mt->width0, i);
- mt->levels[i].height = minify(mt->height0, i);
- mt->levels[i].depth = minify(mt->depth0, i);
- compute_tex_image_offset(rmesa, mt, face, i, &curOffset);
+ for(i = 0, level = mt->baseLevel; i < mt->numLevels; i++, level++) {
+ mt->levels[level].valid = 1;
+ mt->levels[level].width = minify(mt->width0, i);
+ mt->levels[level].height = minify(mt->height0, i);
+ mt->levels[level].depth = minify(mt->depth0, i);
+ compute_tex_image_offset(rmesa, mt, face, level, &curOffset);
}
}
@@ -163,23 +146,21 @@ static void calculate_miptree_layout_r100(radeonContextPtr rmesa, radeon_mipmap_
static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_tree *mt)
{
- GLuint curOffset;
- GLuint numLevels;
- GLuint i;
+ GLuint curOffset, i, level;
- numLevels = mt->lastLevel - mt->firstLevel + 1;
- assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels);
+ assert(mt->numLevels <= rmesa->glCtx->Const.MaxTextureLevels);
curOffset = 0;
- for(i = 0; i < numLevels; i++) {
+ for(i = 0, level = mt->baseLevel; i < mt->numLevels; i++, level++) {
GLuint face;
- mt->levels[i].width = minify(mt->width0, i);
- mt->levels[i].height = minify(mt->height0, i);
- mt->levels[i].depth = minify(mt->depth0, i);
+ mt->levels[level].valid = 1;
+ mt->levels[level].width = minify(mt->width0, i);
+ mt->levels[level].height = minify(mt->height0, i);
+ mt->levels[level].depth = minify(mt->depth0, i);
for(face = 0; face < mt->faces; face++)
- compute_tex_image_offset(rmesa, mt, face, i, &curOffset);
+ compute_tex_image_offset(rmesa, mt, face, level, &curOffset);
}
/* Note the required size in memory */
@@ -189,27 +170,22 @@ static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_
/**
* Create a new mipmap tree, calculate its layout and allocate memory.
*/
-radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t,
- GLenum target, GLenum internal_format, GLuint firstLevel, GLuint lastLevel,
- GLuint width0, GLuint height0, GLuint depth0,
- GLuint bpp, GLuint tilebits, GLuint compressed)
+static radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa,
+ GLenum target, gl_format mesaFormat, GLuint baseLevel, GLuint numLevels,
+ GLuint width0, GLuint height0, GLuint depth0, GLuint tilebits)
{
radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree);
- mt->radeon = rmesa;
- mt->internal_format = internal_format;
+ mt->mesaFormat = mesaFormat;
mt->refcount = 1;
- mt->t = t;
mt->target = target;
mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
- mt->firstLevel = firstLevel;
- mt->lastLevel = lastLevel;
+ mt->baseLevel = baseLevel;
+ mt->numLevels = numLevels;
mt->width0 = width0;
mt->height0 = height0;
mt->depth0 = depth0;
- mt->bpp = compressed ? radeon_compressed_num_bytes(compressed) : bpp;
mt->tilebits = tilebits;
- mt->compressed = compressed;
if (rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R300)
calculate_miptree_layout_r300(rmesa, mt);
@@ -224,53 +200,43 @@ radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *
return mt;
}
-void radeon_miptree_reference(radeon_mipmap_tree *mt)
+void radeon_miptree_reference(radeon_mipmap_tree *mt, radeon_mipmap_tree **ptr)
{
+ assert(!*ptr);
+
mt->refcount++;
assert(mt->refcount > 0);
+
+ *ptr = mt;
}
-void radeon_miptree_unreference(radeon_mipmap_tree *mt)
+void radeon_miptree_unreference(radeon_mipmap_tree **ptr)
{
+ radeon_mipmap_tree *mt = *ptr;
if (!mt)
return;
assert(mt->refcount > 0);
+
mt->refcount--;
if (!mt->refcount) {
radeon_bo_unref(mt->bo);
free(mt);
}
-}
+ *ptr = 0;
+}
/**
- * Calculate first and last mip levels for the given texture object,
- * where the dimensions are taken from the given texture image at
- * the given level.
- *
- * Note: level is the OpenGL level number, which is not necessarily the same
- * as the first level that is actually present.
- *
- * The base level image of the given texture face must be non-null,
- * or this will fail.
+ * Calculate min and max LOD for the given texture object.
+ * @param[in] tObj texture object whose LOD values to calculate
+ * @param[out] pminLod minimal LOD
+ * @param[out] pmaxLod maximal LOD
*/
-static void calculate_first_last_level(struct gl_texture_object *tObj,
- GLuint *pfirstLevel, GLuint *plastLevel,
- GLuint face, GLuint level)
+static void calculate_min_max_lod(struct gl_texture_object *tObj,
+ unsigned *pminLod, unsigned *pmaxLod)
{
- const struct gl_texture_image * const baseImage =
- tObj->Image[face][level];
-
- assert(baseImage);
-
- /* These must be signed values. MinLod and MaxLod can be negative numbers,
- * and having firstLevel and lastLevel as signed prevents the need for
- * extra sign checks.
- */
- int firstLevel;
- int lastLevel;
-
+ int minLod, maxLod;
/* Yes, this looks overly complicated, but it's all needed.
*/
switch (tObj->Target) {
@@ -281,32 +247,30 @@ static void calculate_first_last_level(struct gl_texture_object *tObj,
if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
/* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
*/
- firstLevel = lastLevel = tObj->BaseLevel;
+ minLod = maxLod = tObj->BaseLevel;
} else {
- firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
- firstLevel = MAX2(firstLevel, tObj->BaseLevel);
- firstLevel = MIN2(firstLevel, level + baseImage->MaxLog2);
- lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
- lastLevel = MAX2(lastLevel, tObj->BaseLevel);
- lastLevel = MIN2(lastLevel, level + baseImage->MaxLog2);
- lastLevel = MIN2(lastLevel, tObj->MaxLevel);
- lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+ minLod = tObj->BaseLevel + (GLint)(tObj->MinLod);
+ minLod = MAX2(minLod, tObj->BaseLevel);
+ minLod = MIN2(minLod, tObj->MaxLevel);
+ maxLod = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
+ maxLod = MIN2(maxLod, tObj->MaxLevel);
+ maxLod = MIN2(maxLod, tObj->Image[0][minLod]->MaxLog2 + minLod);
+ maxLod = MAX2(maxLod, minLod); /* need at least one level */
}
break;
case GL_TEXTURE_RECTANGLE_NV:
case GL_TEXTURE_4D_SGIS:
- firstLevel = lastLevel = 0;
+ minLod = maxLod = 0;
break;
default:
return;
}
/* save these values */
- *pfirstLevel = firstLevel;
- *plastLevel = lastLevel;
+ *pminLod = minLod;
+ *pmaxLod = maxLod;
}
-
/**
* Checks whether the given miptree can hold the given texture image at the
* given face and level.
@@ -316,20 +280,15 @@ GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
{
radeon_mipmap_level *lvl;
- if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel)
- return GL_FALSE;
-
- if (texImage->InternalFormat != mt->internal_format ||
- texImage->IsCompressed != mt->compressed)
+ if (face >= mt->faces)
return GL_FALSE;
- if (!texImage->IsCompressed &&
- !mt->compressed &&
- texImage->TexFormat->TexelBytes != mt->bpp)
+ if (texImage->TexFormat != mt->mesaFormat)
return GL_FALSE;
- lvl = &mt->levels[level - mt->firstLevel];
- if (lvl->width != texImage->Width ||
+ lvl = &mt->levels[level];
+ if (!lvl->valid ||
+ lvl->width != texImage->Width ||
lvl->height != texImage->Height ||
lvl->depth != texImage->Depth)
return GL_FALSE;
@@ -337,59 +296,72 @@ GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
return GL_TRUE;
}
-
/**
* Checks whether the given miptree has the right format to store the given texture object.
*/
-GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj)
+static GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj)
{
struct gl_texture_image *firstImage;
- GLuint compressed;
- GLuint numfaces = 1;
- GLuint firstLevel, lastLevel;
-
- calculate_first_last_level(texObj, &firstLevel, &lastLevel, 0, texObj->BaseLevel);
- if (texObj->Target == GL_TEXTURE_CUBE_MAP)
- numfaces = 6;
-
- firstImage = texObj->Image[0][firstLevel];
- compressed = firstImage->IsCompressed ? firstImage->TexFormat->MesaFormat : 0;
-
- return (mt->firstLevel == firstLevel &&
- mt->lastLevel == lastLevel &&
- mt->width0 == firstImage->Width &&
- mt->height0 == firstImage->Height &&
- mt->depth0 == firstImage->Depth &&
- mt->compressed == compressed &&
- (!mt->compressed ? (mt->bpp == firstImage->TexFormat->TexelBytes) : 1));
-}
+ unsigned numLevels;
+ radeon_mipmap_level *mtBaseLevel;
+ if (texObj->BaseLevel < mt->baseLevel)
+ return GL_FALSE;
+
+ mtBaseLevel = &mt->levels[texObj->BaseLevel - mt->baseLevel];
+ firstImage = texObj->Image[0][texObj->BaseLevel];
+ numLevels = MIN2(texObj->MaxLevel - texObj->BaseLevel + 1, firstImage->MaxLog2 + 1);
+
+ if (RADEON_DEBUG & RADEON_TEXTURE) {
+ fprintf(stderr, "Checking if miptree %p matches texObj %p\n", mt, texObj);
+ fprintf(stderr, "target %d vs %d\n", mt->target, texObj->Target);
+ fprintf(stderr, "format %d vs %d\n", mt->mesaFormat, firstImage->TexFormat);
+ fprintf(stderr, "numLevels %d vs %d\n", mt->numLevels, numLevels);
+ fprintf(stderr, "width0 %d vs %d\n", mtBaseLevel->width, firstImage->Width);
+ fprintf(stderr, "height0 %d vs %d\n", mtBaseLevel->height, firstImage->Height);
+ fprintf(stderr, "depth0 %d vs %d\n", mtBaseLevel->depth, firstImage->Depth);
+ if (mt->target == texObj->Target &&
+ mt->mesaFormat == firstImage->TexFormat &&
+ mt->numLevels >= numLevels &&
+ mtBaseLevel->width == firstImage->Width &&
+ mtBaseLevel->height == firstImage->Height &&
+ mtBaseLevel->depth == firstImage->Depth) {
+ fprintf(stderr, "MATCHED\n");
+ } else {
+ fprintf(stderr, "NOT MATCHED\n");
+ }
+ }
+
+ return (mt->target == texObj->Target &&
+ mt->mesaFormat == firstImage->TexFormat &&
+ mt->numLevels >= numLevels &&
+ mtBaseLevel->width == firstImage->Width &&
+ mtBaseLevel->height == firstImage->Height &&
+ mtBaseLevel->depth == firstImage->Depth);
+}
/**
- * Try to allocate a mipmap tree for the given texture that will fit the
- * given image in the given position.
+ * Try to allocate a mipmap tree for the given texture object.
+ * @param[in] rmesa radeon context
+ * @param[in] t radeon texture object
*/
-void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t,
- radeon_texture_image *image, GLuint face, GLuint level)
+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t)
{
- GLuint compressed = image->base.IsCompressed ? image->base.TexFormat->MesaFormat : 0;
- GLuint numfaces = 1;
- GLuint firstLevel, lastLevel;
+ struct gl_texture_object *texObj = &t->base;
+ struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
+ GLuint numLevels;
assert(!t->mt);
- calculate_first_last_level(&t->base, &firstLevel, &lastLevel, face, level);
- if (t->base.Target == GL_TEXTURE_CUBE_MAP)
- numfaces = 6;
-
- if (level != firstLevel || face >= numfaces)
+ if (!texImg)
return;
- t->mt = radeon_miptree_create(rmesa, t, t->base.Target,
- image->base.InternalFormat,
- firstLevel, lastLevel,
- image->base.Width, image->base.Height, image->base.Depth,
- image->base.TexFormat->TexelBytes, t->tile_bits, compressed);
+ numLevels = MIN2(texObj->MaxLevel - texObj->BaseLevel + 1, texImg->MaxLog2 + 1);
+
+ t->mt = radeon_miptree_create(rmesa, t->base.Target,
+ texImg->TexFormat, texObj->BaseLevel,
+ numLevels, texImg->Width, texImg->Height,
+ texImg->Depth, t->tile_bits);
}
/* Although we use the image_offset[] array to store relative offsets
@@ -401,21 +373,236 @@ void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t,
void
radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets)
{
- if (mt->target != GL_TEXTURE_3D || mt->faces == 1)
- offsets[0] = 0;
- else {
- int i;
- for (i = 0; i < 6; i++)
- offsets[i] = mt->levels[level].faces[i].offset;
- }
+ if (mt->target != GL_TEXTURE_3D || mt->faces == 1) {
+ offsets[0] = 0;
+ } else {
+ int i;
+ for (i = 0; i < 6; i++) {
+ offsets[i] = mt->levels[level].faces[i].offset;
+ }
+ }
}
GLuint
radeon_miptree_image_offset(radeon_mipmap_tree *mt,
GLuint face, GLuint level)
{
- if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
- return (mt->levels[level].faces[face].offset);
- else
- return mt->levels[level].faces[0].offset;
+ if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
+ return (mt->levels[level].faces[face].offset);
+ else
+ return mt->levels[level].faces[0].offset;
+}
+
+/**
+ * Ensure that the given image is stored in the given miptree from now on.
+ */
+static void migrate_image_to_miptree(radeon_mipmap_tree *mt,
+ radeon_texture_image *image,
+ int face, int level)
+{
+ radeon_mipmap_level *dstlvl = &mt->levels[level];
+ unsigned char *dest;
+
+ assert(image->mt != mt);
+ assert(dstlvl->valid);
+ assert(dstlvl->width == image->base.Width);
+ assert(dstlvl->height == image->base.Height);
+ assert(dstlvl->depth == image->base.Depth);
+
+ radeon_bo_map(mt->bo, GL_TRUE);
+ dest = mt->bo->ptr + dstlvl->faces[face].offset;
+
+ if (image->mt) {
+ /* Format etc. should match, so we really just need a memcpy().
+ * In fact, that memcpy() could be done by the hardware in many
+ * cases, provided that we have a proper memory manager.
+ */
+ assert(mt->mesaFormat == image->base.TexFormat);
+
+ radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel];
+
+ /* TODO: bring back these assertions once the FBOs are fixed */
+#if 0
+ assert(image->mtlevel == level);
+ assert(srclvl->size == dstlvl->size);
+ assert(srclvl->rowstride == dstlvl->rowstride);
+#endif
+
+ radeon_bo_map(image->mt->bo, GL_FALSE);
+
+ memcpy(dest,
+ image->mt->bo->ptr + srclvl->faces[face].offset,
+ dstlvl->size);
+ radeon_bo_unmap(image->mt->bo);
+
+ radeon_miptree_unreference(&image->mt);
+ } else if (image->base.Data) {
+ /* This condition should be removed, it's here to workaround
+ * a segfault when mapping textures during software fallbacks.
+ */
+ const uint32_t srcrowstride = _mesa_format_row_stride(image->base.TexFormat, image->base.Width);
+ uint32_t rows = image->base.Height * image->base.Depth;
+
+ if (_mesa_is_format_compressed(image->base.TexFormat)) {
+ uint32_t blockWidth, blockHeight;
+ _mesa_get_format_block_size(image->base.TexFormat, &blockWidth, &blockHeight);
+ rows = (rows + blockHeight - 1) / blockHeight;
+ }
+
+ copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride,
+ rows, srcrowstride);
+
+ _mesa_free_texmemory(image->base.Data);
+ image->base.Data = 0;
+ }
+
+ radeon_bo_unmap(mt->bo);
+
+ radeon_miptree_reference(mt, &image->mt);
+ image->mtface = face;
+ image->mtlevel = level;
+}
+
+/**
+ * Filter matching miptrees, and select one with the most of data.
+ * @param[in] texObj radeon texture object
+ * @param[in] firstLevel first texture level to check
+ * @param[in] lastLevel last texture level to check
+ */
+static radeon_mipmap_tree * get_biggest_matching_miptree(radeonTexObj *texObj,
+ unsigned firstLevel,
+ unsigned lastLevel)
+{
+ const unsigned numLevels = lastLevel - firstLevel + 1;
+ unsigned *mtSizes = calloc(numLevels, sizeof(unsigned));
+ radeon_mipmap_tree **mts = calloc(numLevels, sizeof(radeon_mipmap_tree *));
+ unsigned mtCount = 0;
+ unsigned maxMtIndex = 0;
+ radeon_mipmap_tree *tmp;
+ unsigned level;
+ int i;
+
+ for (level = firstLevel; level <= lastLevel; ++level) {
+ radeon_texture_image *img = get_radeon_texture_image(texObj->base.Image[0][level]);
+ unsigned found = 0;
+ // TODO: why this hack??
+ if (!img)
+ break;
+
+ if (!img->mt)
+ continue;
+
+ for (i = 0; i < mtCount; ++i) {
+ if (mts[i] == img->mt) {
+ found = 1;
+ mtSizes[i] += img->mt->levels[img->mtlevel].size;
+ break;
+ }
+ }
+
+ if (!found && radeon_miptree_matches_texture(img->mt, &texObj->base)) {
+ mtSizes[mtCount] = img->mt->levels[img->mtlevel].size;
+ mts[mtCount] = img->mt;
+ mtCount++;
+ }
+ }
+
+ if (mtCount == 0) {
+ return NULL;
+ }
+
+ for (i = 1; i < mtCount; ++i) {
+ if (mtSizes[i] > mtSizes[maxMtIndex]) {
+ maxMtIndex = i;
+ }
+ }
+
+ tmp = mts[maxMtIndex];
+ free(mtSizes);
+ free(mts);
+
+ return tmp;
+}
+
+/**
+ * Validate texture mipmap tree.
+ * If individual images are stored in different mipmap trees
+ * use the mipmap tree that has the most of the correct data.
+ */
+int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ radeonTexObj *t = radeon_tex_obj(texObj);
+
+ if (t->validated || t->image_override) {
+ return GL_TRUE;
+ }
+
+ if (texObj->Image[0][texObj->BaseLevel]->Border > 0)
+ return GL_FALSE;
+
+ _mesa_test_texobj_completeness(rmesa->glCtx, texObj);
+ if (!texObj->_Complete) {
+ return GL_FALSE;
+ }
+
+ calculate_min_max_lod(&t->base, &t->minLod, &t->maxLod);
+
+ if (RADEON_DEBUG & RADEON_TEXTURE)
+ fprintf(stderr, "%s: Validating texture %p now, minLod = %d, maxLod = %d\n",
+ __FUNCTION__, texObj ,t->minLod, t->maxLod);
+
+ radeon_mipmap_tree *dst_miptree;
+ dst_miptree = get_biggest_matching_miptree(t, t->minLod, t->maxLod);
+
+ if (!dst_miptree) {
+ radeon_miptree_unreference(&t->mt);
+ radeon_try_alloc_miptree(rmesa, t);
+ dst_miptree = t->mt;
+ if (RADEON_DEBUG & RADEON_TEXTURE) {
+ fprintf(stderr, "%s: No matching miptree found, allocated new one %p\n", __FUNCTION__, t->mt);
+ }
+ } else if (RADEON_DEBUG & RADEON_TEXTURE) {
+ fprintf(stderr, "%s: Using miptree %p\n", __FUNCTION__, t->mt);
+ }
+
+ const unsigned faces = texObj->Target == GL_TEXTURE_CUBE_MAP ? 6 : 1;
+ unsigned face, level;
+ radeon_texture_image *img;
+ /* Validate only the levels that will actually be used during rendering */
+ for (face = 0; face < faces; ++face) {
+ for (level = t->minLod; level <= t->maxLod; ++level) {
+ img = get_radeon_texture_image(texObj->Image[face][level]);
+
+ if (RADEON_DEBUG & RADEON_TEXTURE) {
+ fprintf(stderr, "Checking image level %d, face %d, mt %p ... ", level, face, img->mt);
+ }
+
+ if (img->mt != dst_miptree) {
+ if (RADEON_DEBUG & RADEON_TEXTURE) {
+ fprintf(stderr, "MIGRATING\n");
+ }
+ struct radeon_bo *src_bo = (img->mt) ? img->mt->bo : img->bo;
+ if (src_bo && radeon_bo_is_referenced_by_cs(src_bo, rmesa->cmdbuf.cs)) {
+ radeon_firevertices(rmesa);
+ }
+ migrate_image_to_miptree(dst_miptree, img, face, level);
+ } else if (RADEON_DEBUG & RADEON_TEXTURE) {
+ fprintf(stderr, "OK\n");
+ }
+ }
+ }
+
+ t->validated = GL_TRUE;
+
+ return GL_TRUE;
+}
+
+uint32_t get_base_teximage_offset(radeonTexObj *texObj)
+{
+ if (!texObj->mt) {
+ return 0;
+ } else {
+ return radeon_miptree_image_offset(texObj->mt, 0, texObj->minLod);
+ }
}
diff --git a/radeon/radeon_mipmap_tree.h b/radeon/radeon_mipmap_tree.h
index db28252..a10649b 100644
--- a/radeon/radeon_mipmap_tree.h
+++ b/radeon/radeon_mipmap_tree.h
@@ -44,6 +44,7 @@ struct _radeon_mipmap_level {
GLuint depth;
GLuint size; /** Size of each image, in bytes */
GLuint rowstride; /** in bytes */
+ GLuint valid;
radeon_mipmap_image faces[6];
};
@@ -59,43 +60,35 @@ struct _radeon_mipmap_level {
* changed.
*/
struct _radeon_mipmap_tree {
- radeonContextPtr radeon;
- radeonTexObj *t;
struct radeon_bo *bo;
GLuint refcount;
GLuint totalsize; /** total size of the miptree, in bytes */
GLenum target; /** GL_TEXTURE_xxx */
- GLenum internal_format;
+ GLenum mesaFormat; /** MESA_FORMAT_xxx */
GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */
- GLuint firstLevel; /** First mip level stored in this mipmap tree */
- GLuint lastLevel; /** Last mip level stored in this mipmap tree */
+ GLuint baseLevel; /** gl_texture_object->baseLevel it was created for */
+ GLuint numLevels; /** Number of mip levels stored in this mipmap tree */
- GLuint width0; /** Width of firstLevel image */
- GLuint height0; /** Height of firstLevel image */
- GLuint depth0; /** Depth of firstLevel image */
+ GLuint width0; /** Width of baseLevel image */
+ GLuint height0; /** Height of baseLevel image */
+ GLuint depth0; /** Depth of baseLevel image */
- GLuint bpp; /** Bytes per texel */
GLuint tilebits; /** RADEON_TXO_xxx_TILE */
- GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */
radeon_mipmap_level levels[RADEON_MIPTREE_MAX_TEXTURE_LEVELS];
};
-radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t,
- GLenum target, GLenum internal_format, GLuint firstLevel, GLuint lastLevel,
- GLuint width0, GLuint height0, GLuint depth0,
- GLuint bpp, GLuint tilebits, GLuint compressed);
-void radeon_miptree_reference(radeon_mipmap_tree *mt);
-void radeon_miptree_unreference(radeon_mipmap_tree *mt);
+void radeon_miptree_reference(radeon_mipmap_tree *mt, radeon_mipmap_tree **ptr);
+void radeon_miptree_unreference(radeon_mipmap_tree **ptr);
GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
struct gl_texture_image *texImage, GLuint face, GLuint level);
-GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj);
-void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t,
- radeon_texture_image *texImage, GLuint face, GLuint level);
+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t);
GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt,
GLuint face, GLuint level);
void radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets);
+
+uint32_t get_base_teximage_offset(radeonTexObj *texObj);
#endif /* __RADEON_MIPMAP_TREE_H_ */
diff --git a/radeon/radeon_queryobj.c b/radeon/radeon_queryobj.c
index b79d864..98117cd 100644
--- a/radeon/radeon_queryobj.c
+++ b/radeon/radeon_queryobj.c
@@ -31,24 +31,11 @@
#include "main/imports.h"
#include "main/simple_list.h"
-static int radeonQueryIsFlushed(GLcontext *ctx, struct gl_query_object *q)
-{
- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
- struct radeon_query_object *tmp, *query = (struct radeon_query_object *)q;
-
- foreach(tmp, &radeon->query.not_flushed_head) {
- if (tmp == query) {
- return 0;
- }
- }
-
- return 1;
-}
-
static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q)
{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
struct radeon_query_object *query = (struct radeon_query_object *)q;
- uint32_t *result;
+ uint32_t *result;
int i;
radeon_print(RADEON_STATE, RADEON_VERBOSE,
@@ -56,13 +43,35 @@ static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q)
__FUNCTION__, query->Base.Id, (int) query->Base.Result);
radeon_bo_map(query->bo, GL_FALSE);
-
- result = query->bo->ptr;
+ result = query->bo->ptr;
query->Base.Result = 0;
- for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) {
- query->Base.Result += result[i];
- radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, result[i]);
+ if (IS_R600_CLASS(radeon->radeonScreen)) {
+ /* ZPASS EVENT writes alternating qwords
+ * At query start we set the start offset to 0 and
+ * hw writes zpass start counts to qwords 0, 2, 4, 6.
+ * At query end we set the start offset to 8 and
+ * hw writes zpass end counts to qwords 1, 3, 5, 7.
+ * then we substract. MSB is the valid bit.
+ */
+ for (i = 0; i < 16; i += 4) {
+ uint64_t start = (uint64_t)LE32_TO_CPU(result[i]) |
+ (uint64_t)LE32_TO_CPU(result[i + 1]) << 32;
+ uint64_t end = (uint64_t)LE32_TO_CPU(result[i + 2]) |
+ (uint64_t)LE32_TO_CPU(result[i + 3]) << 32;
+ if ((start & 0x8000000000000000) && (end & 0x8000000000000000)) {
+ uint64_t query_count = end - start;
+ query->Base.Result += query_count;
+
+ }
+ radeon_print(RADEON_STATE, RADEON_TRACE,
+ "%d start: %lx, end: %lx %ld\n", i, start, end, end - start);
+ }
+ } else {
+ for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) {
+ query->Base.Result += LE32_TO_CPU(result[i]);
+ radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, LE32_TO_CPU(result[i]));
+ }
}
radeon_bo_unmap(query->bo);
@@ -99,10 +108,11 @@ static void radeonDeleteQuery(GLcontext *ctx, struct gl_query_object *q)
static void radeonWaitQuery(GLcontext *ctx, struct gl_query_object *q)
{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
struct radeon_query_object *query = (struct radeon_query_object *)q;
/* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */
- if (!radeonQueryIsFlushed(ctx, q))
+ if (radeon_bo_is_referenced_by_cs(query->bo, radeon->cmdbuf.cs))
ctx->Driver.Flush(ctx);
radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset);
@@ -134,8 +144,6 @@ static void radeonBeginQuery(GLcontext *ctx, struct gl_query_object *q)
radeon->query.queryobj.dirty = GL_TRUE;
radeon->hw.is_dirty = GL_TRUE;
- insert_at_tail(&radeon->query.not_flushed_head, query);
-
}
void radeonEmitQueryEnd(GLcontext *ctx)
@@ -183,7 +191,7 @@ static void radeonCheckQuery(GLcontext *ctx, struct gl_query_object *q)
uint32_t domain;
/* Need to perform a flush, as per ARB_occlusion_query spec */
- if (!radeonQueryIsFlushed(ctx, q)) {
+ if (radeon_bo_is_referenced_by_cs(query->bo, radeon->cmdbuf.cs)) {
ctx->Driver.Flush(ctx);
}
diff --git a/radeon/radeon_screen.c b/radeon/radeon_screen.c
index 5ffb55d..be2d836 100644
--- a/radeon/radeon_screen.c
+++ b/radeon/radeon_screen.c
@@ -48,17 +48,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_screen.h"
#include "radeon_common.h"
#include "radeon_span.h"
-#if !RADEON_COMMON
+#if defined(RADEON_R100)
#include "radeon_context.h"
#include "radeon_tex.h"
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#include "r200_context.h"
#include "r200_ioctl.h"
#include "r200_tex.h"
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#elif defined(RADEON_R300)
#include "r300_context.h"
#include "r300_tex.h"
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#include "r600_context.h"
#include "r700_driconf.h" /* +r6/r7 */
#include "r600_tex.h" /* +r6/r7 */
@@ -82,7 +82,7 @@ DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
DRI_CONF_OPT_END
-#if !RADEON_COMMON /* R100 */
+#if defined(RADEON_R100) /* R100 */
PUBLIC const char __driConfigOptions[] =
DRI_CONF_BEGIN
DRI_CONF_SECTION_PERFORMANCE
@@ -109,7 +109,7 @@ DRI_CONF_BEGIN
DRI_CONF_END;
static const GLuint __driNConfigOptions = 15;
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
PUBLIC const char __driConfigOptions[] =
DRI_CONF_BEGIN
@@ -141,13 +141,7 @@ DRI_CONF_BEGIN
DRI_CONF_END;
static const GLuint __driNConfigOptions = 17;
-extern const struct dri_extension blend_extensions[];
-extern const struct dri_extension ARB_vp_extension[];
-extern const struct dri_extension NV_vp_extension[];
-extern const struct dri_extension ATI_fs_extension[];
-extern const struct dri_extension point_extensions[];
-
-#elif RADEON_COMMON && (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
+#elif defined(RADEON_R300) || defined(RADEON_R600)
#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
@@ -218,12 +212,7 @@ DRI_CONF_BEGIN
DRI_CONF_END;
static const GLuint __driNConfigOptions = 17;
-extern const struct dri_extension gl_20_extension[];
-
-#endif /* RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) */
-
-extern const struct dri_extension card_extensions[];
-extern const struct dri_extension mm_extensions[];
+#endif
static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo );
@@ -337,7 +326,7 @@ radeonFillInModes( __DRIscreenPrivate *psp,
return (const __DRIconfig **) configs;
}
-#if !RADEON_COMMON
+#if defined(RADEON_R100)
static const __DRItexOffsetExtension radeonTexOffsetExtension = {
{ __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
radeonSetTexOffset,
@@ -350,7 +339,7 @@ static const __DRItexBufferExtension radeonTexBufferExtension = {
};
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
static const __DRIallocateExtension r200AllocateExtension = {
{ __DRI_ALLOCATE, __DRI_ALLOCATE_VERSION },
r200AllocateMemoryMESA,
@@ -370,7 +359,7 @@ static const __DRItexBufferExtension r200TexBufferExtension = {
};
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#if defined(RADEON_R300)
static const __DRItexOffsetExtension r300texOffsetExtension = {
{ __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
r300SetTexOffset,
@@ -383,7 +372,7 @@ static const __DRItexBufferExtension r300TexBufferExtension = {
};
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
static const __DRItexOffsetExtension r600texOffsetExtension = {
{ __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
r600SetTexOffset, /* +r6/r7 */
@@ -401,12 +390,14 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
screen->device_id = device_id;
screen->chip_flags = 0;
switch ( device_id ) {
+ case PCI_CHIP_RN50_515E:
+ case PCI_CHIP_RN50_5969:
+ return -1;
+
case PCI_CHIP_RADEON_LY:
case PCI_CHIP_RADEON_LZ:
case PCI_CHIP_RADEON_QY:
case PCI_CHIP_RADEON_QZ:
- case PCI_CHIP_RN50_515E:
- case PCI_CHIP_RN50_5969:
screen->chip_family = CHIP_FAMILY_RV100;
break;
@@ -1222,22 +1213,22 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
screen->extensions[i++] = &driMediaStreamCounterExtension.base;
}
-#if !RADEON_COMMON
+#if defined(RADEON_R100)
screen->extensions[i++] = &radeonTexOffsetExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
if (IS_R200_CLASS(screen))
screen->extensions[i++] = &r200AllocateExtension.base;
screen->extensions[i++] = &r200texOffsetExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#if defined(RADEON_R300)
screen->extensions[i++] = &r300texOffsetExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
screen->extensions[i++] = &r600texOffsetExtension.base;
#endif
@@ -1376,22 +1367,22 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv)
screen->extensions[i++] = &driMediaStreamCounterExtension.base;
}
-#if !RADEON_COMMON
+#if defined(RADEON_R100)
screen->extensions[i++] = &radeonTexBufferExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
if (IS_R200_CLASS(screen))
screen->extensions[i++] = &r200AllocateExtension.base;
screen->extensions[i++] = &r200TexBufferExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#if defined(RADEON_R300)
screen->extensions[i++] = &r300TexBufferExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
screen->extensions[i++] = &r600TexBufferExtension.base;
#endif
@@ -1480,7 +1471,7 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
const GLboolean swAccum = mesaVis->accumRedBits > 0;
const GLboolean swStencil = mesaVis->stencilBits > 0 &&
mesaVis->depthBits != 24;
- GLenum rgbFormat;
+ gl_format rgbFormat;
struct radeon_framebuffer *rfb;
if (isPixmap)
@@ -1493,11 +1484,11 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
_mesa_initialize_framebuffer(&rfb->base, mesaVis);
if (mesaVis->redBits == 5)
- rgbFormat = GL_RGB5;
+ rgbFormat = _mesa_little_endian() ? MESA_FORMAT_RGB565 : MESA_FORMAT_RGB565_REV;
else if (mesaVis->alphaBits == 0)
- rgbFormat = GL_RGB8;
+ rgbFormat = _mesa_little_endian() ? MESA_FORMAT_XRGB8888 : MESA_FORMAT_XRGB8888_REV;
else
- rgbFormat = GL_RGBA8;
+ rgbFormat = _mesa_little_endian() ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB8888_REV;
/* front color renderbuffer */
rfb->color_rb[0] = radeon_create_renderbuffer(rgbFormat, driDrawPriv);
@@ -1513,19 +1504,22 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
if (mesaVis->depthBits == 24) {
if (mesaVis->stencilBits == 8) {
- struct radeon_renderbuffer *depthStencilRb = radeon_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT, driDrawPriv);
+ struct radeon_renderbuffer *depthStencilRb =
+ radeon_create_renderbuffer(MESA_FORMAT_S8_Z24, driDrawPriv);
_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depthStencilRb->base);
_mesa_add_renderbuffer(&rfb->base, BUFFER_STENCIL, &depthStencilRb->base);
depthStencilRb->has_surface = screen->depthHasSurface;
} else {
/* depth renderbuffer */
- struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT24, driDrawPriv);
+ struct radeon_renderbuffer *depth =
+ radeon_create_renderbuffer(MESA_FORMAT_X8_Z24, driDrawPriv);
_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base);
depth->has_surface = screen->depthHasSurface;
}
} else if (mesaVis->depthBits == 16) {
- /* just 16-bit depth buffer, no hw stencil */
- struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT16, driDrawPriv);
+ /* just 16-bit depth buffer, no hw stencil */
+ struct radeon_renderbuffer *depth =
+ radeon_create_renderbuffer(MESA_FORMAT_Z16, driDrawPriv);
_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base);
depth->has_surface = screen->depthHasSurface;
}
@@ -1589,22 +1583,22 @@ radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
static const __DRIconfig **
radeonInitScreen(__DRIscreenPrivate *psp)
{
-#if !RADEON_COMMON
+#if defined(RADEON_R100)
static const char *driver_name = "Radeon";
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
static const __DRIversion dri_expected = { 4, 0, 0 };
static const __DRIversion drm_expected = { 1, 6, 0 };
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
static const char *driver_name = "R200";
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
static const __DRIversion dri_expected = { 4, 0, 0 };
static const __DRIversion drm_expected = { 1, 6, 0 };
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#elif defined(RADEON_R300)
static const char *driver_name = "R300";
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
static const __DRIversion dri_expected = { 4, 0, 0 };
static const __DRIversion drm_expected = { 1, 24, 0 };
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
static const char *driver_name = "R600";
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
static const __DRIversion dri_expected = { 4, 0, 0 };
@@ -1619,27 +1613,6 @@ radeonInitScreen(__DRIscreenPrivate *psp)
return NULL;
}
- /* Calling driInitExtensions here, with a NULL context pointer,
- * does not actually enable the extensions. It just makes sure
- * that all the dispatch offsets for all the extensions that
- * *might* be enables are known. This is needed because the
- * dispatch offsets need to be known when _mesa_context_create
- * is called, but we can't enable the extensions until we have a
- * context pointer.
- *
- * Hello chicken. Hello egg. How are you two today?
- */
- driInitExtensions( NULL, card_extensions, GL_FALSE );
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
- driInitExtensions( NULL, blend_extensions, GL_FALSE );
- driInitSingleExtension( NULL, ARB_vp_extension );
- driInitSingleExtension( NULL, NV_vp_extension );
- driInitSingleExtension( NULL, ATI_fs_extension );
- driInitExtensions( NULL, point_extensions, GL_FALSE );
-#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
- driInitSingleExtension( NULL, gl_20_extension );
-#endif
-
if (!radeonInitDriver(psp))
return NULL;
@@ -1672,28 +1645,6 @@ __DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp)
int color;
__DRIconfig **configs = NULL;
- /* Calling driInitExtensions here, with a NULL context pointer,
- * does not actually enable the extensions. It just makes sure
- * that all the dispatch offsets for all the extensions that
- * *might* be enables are known. This is needed because the
- * dispatch offsets need to be known when _mesa_context_create
- * is called, but we can't enable the extensions until we have a
- * context pointer.
- *
- * Hello chicken. Hello egg. How are you two today?
- */
- driInitExtensions( NULL, card_extensions, GL_FALSE );
- driInitExtensions( NULL, mm_extensions, GL_FALSE );
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
- driInitExtensions( NULL, blend_extensions, GL_FALSE );
- driInitSingleExtension( NULL, ARB_vp_extension );
- driInitSingleExtension( NULL, NV_vp_extension );
- driInitSingleExtension( NULL, ATI_fs_extension );
- driInitExtensions( NULL, point_extensions, GL_FALSE );
-#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
- driInitSingleExtension( NULL, gl_20_extension );
-#endif
-
if (!radeonInitDriver(psp)) {
return NULL;
}
@@ -1772,13 +1723,13 @@ getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
const struct __DriverAPIRec driDriverAPI = {
.InitScreen = radeonInitScreen,
.DestroyScreen = radeonDestroyScreen,
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
.CreateContext = r200CreateContext,
.DestroyContext = r200DestroyContext,
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
.CreateContext = r600CreateContext,
.DestroyContext = radeonDestroyContext,
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#elif defined(RADEON_R300)
.CreateContext = r300CreateContext,
.DestroyContext = radeonDestroyContext,
#else
diff --git a/radeon/radeon_span.c b/radeon/radeon_span.c
index d603f52..665f2b6 100644
--- a/radeon/radeon_span.c
+++ b/radeon/radeon_span.c
@@ -41,6 +41,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/glheader.h"
+#include "main/texformat.h"
#include "swrast/swrast.h"
#include "radeon_common.h"
@@ -55,7 +56,7 @@ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
/* r200 depth buffer is always tiled - this is the formula
according to the docs unless I typo'ed in it
*/
-#if defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
@@ -112,7 +113,7 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
* - 2D (akin to macro-tiled/micro-tiled on older asics)
* only 1D tiling is implemented below
*/
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
GLint x, GLint y, GLint is_depth, GLint is_stencil)
{
@@ -334,22 +335,6 @@ static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
#endif
-#ifndef COMPILE_R300
-#ifndef COMPILE_R600
-static uint32_t
-z24s8_to_s8z24(uint32_t val)
-{
- return (val << 24) | (val >> 8);
-}
-
-static uint32_t
-s8z24_to_z24s8(uint32_t val)
-{
- return (val >> 24) | (val << 8);
-}
-#endif
-#endif
-
/*
* Note that all information needed to access pixels in a renderbuffer
* should be obtained through the gl_renderbuffer parameter, not per-context
@@ -409,7 +394,19 @@ s8z24_to_z24s8(uint32_t val)
#define TAG(x) radeon##x##_RGB565
#define TAG2(x,y) radeon##x##_RGB565##y
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5_REV
+
+#define TAG(x) radeon##x##_RGB565_REV
+#define TAG2(x,y) radeon##x##_RGB565_REV##y
+#if defined(RADEON_R600)
#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
#else
#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
@@ -423,7 +420,19 @@ s8z24_to_z24s8(uint32_t val)
#define TAG(x) radeon##x##_ARGB1555
#define TAG2(x,y) radeon##x##_ARGB1555##y
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5
+
+#define TAG(x) radeon##x##_ARGB1555_REV
+#define TAG2(x,y) radeon##x##_ARGB1555_REV##y
+#if defined(RADEON_R600)
#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
#else
#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
@@ -437,7 +446,19 @@ s8z24_to_z24s8(uint32_t val)
#define TAG(x) radeon##x##_ARGB4444
#define TAG2(x,y) radeon##x##_ARGB4444##y
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4
+
+#define TAG(x) radeon##x##_ARGB4444_REV
+#define TAG2(x,y) radeon##x##_ARGB4444_REV##y
+#if defined(RADEON_R600)
#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
#else
#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
@@ -451,7 +472,7 @@ s8z24_to_z24s8(uint32_t val)
#define TAG(x) radeon##x##_xRGB8888
#define TAG2(x,y) radeon##x##_xRGB8888##y
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
#define PUT_VALUE(_x, _y, d) { \
GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
@@ -473,7 +494,7 @@ s8z24_to_z24s8(uint32_t val)
#define TAG(x) radeon##x##_ARGB8888
#define TAG2(x,y) radeon##x##_ARGB8888##y
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
#define PUT_VALUE(_x, _y, d) { \
GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
@@ -488,6 +509,42 @@ s8z24_to_z24s8(uint32_t val)
#endif
#include "spantmp2.h"
+/* 32 bit, BGRx8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
+
+#define TAG(x) radeon##x##_BGRx8888
+#define TAG2(x,y) radeon##x##_BGRx8888##y
+#if defined(RADEON_R600)
+#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0x000000ff))
+#define PUT_VALUE(_x, _y, d) { \
+ GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
+ *_ptr = d; \
+} while (0)
+#else
+#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0x000000ff))
+#define PUT_VALUE(_x, _y, d) { \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ *_ptr = d; \
+} while (0)
+#endif
+#include "spantmp2.h"
+
+/* 32 bit, BGRA8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
+
+#define TAG(x) radeon##x##_BGRA8888
+#define TAG2(x,y) radeon##x##_BGRA8888##y
+#if defined(RADEON_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_4byte(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
/* ================================================================
* Depth buffer
*/
@@ -506,10 +563,10 @@ s8z24_to_z24s8(uint32_t val)
*/
#define VALUE_TYPE GLushort
-#if defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
#define WRITE_DEPTH( _x, _y, d ) \
*(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define WRITE_DEPTH( _x, _y, d ) \
*(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
#else
@@ -517,10 +574,10 @@ s8z24_to_z24s8(uint32_t val)
*(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
#endif
-#if defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
#define READ_DEPTH( d, _x, _y ) \
d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define READ_DEPTH( d, _x, _y ) \
d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
#else
@@ -538,16 +595,16 @@ s8z24_to_z24s8(uint32_t val)
*/
#define VALUE_TYPE GLuint
-#if defined(COMPILE_R300)
+#if defined(RADEON_R300)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
- GLuint tmp = *_ptr; \
+ GLuint tmp = LE32_TO_CPU(*_ptr); \
tmp &= 0x000000ff; \
tmp |= ((d << 8) & 0xffffff00); \
- *_ptr = tmp; \
+ *_ptr = CPU_TO_LE32(tmp); \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
@@ -556,44 +613,44 @@ do { \
tmp |= ((d) & 0x00ffffff); \
*_ptr = tmp; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
- GLuint tmp = *_ptr; \
+ GLuint tmp = LE32_TO_CPU(*_ptr); \
tmp &= 0xff000000; \
tmp |= ((d) & 0x00ffffff); \
- *_ptr = tmp; \
+ *_ptr = CPU_TO_LE32(tmp); \
} while (0)
#else
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
- GLuint tmp = *_ptr; \
+ GLuint tmp = LE32_TO_CPU(*_ptr); \
tmp &= 0xff000000; \
tmp |= ((d) & 0x00ffffff); \
- *_ptr = tmp; \
+ *_ptr = CPU_TO_LE32(tmp); \
} while (0)
#endif
-#if defined(COMPILE_R300)
+#if defined(RADEON_R300)
#define READ_DEPTH( d, _x, _y ) \
do { \
- d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
+ d = (LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) & 0xffffff00) >> 8; \
}while(0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define READ_DEPTH( d, _x, _y ) \
do { \
d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
}while(0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define READ_DEPTH( d, _x, _y ) \
do { \
- d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
+ d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \
}while(0)
#else
#define READ_DEPTH( d, _x, _y ) \
- d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff;
+ d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) & 0x00ffffff;
#endif
#define TAG(x) radeon##x##_z24
@@ -607,65 +664,64 @@ do { \
*/
#define VALUE_TYPE GLuint
-#if defined(COMPILE_R300)
+#if defined(RADEON_R300)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
- *_ptr = d; \
+ *_ptr = CPU_TO_LE32((((d) & 0xff000000) >> 24) | (((d) & 0x00ffffff) << 8)); \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
GLuint tmp = *_ptr; \
tmp &= 0xff000000; \
- tmp |= (((d) >> 8) & 0x00ffffff); \
+ tmp |= ((d) & 0x00ffffff); \
*_ptr = tmp; \
_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
tmp = *_ptr; \
tmp &= 0xffffff00; \
- tmp |= (d) & 0xff; \
+ tmp |= ((d) >> 24) & 0xff; \
*_ptr = tmp; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
- GLuint tmp = z24s8_to_s8z24(d); \
- *_ptr = tmp; \
+ *_ptr = CPU_TO_LE32(d); \
} while (0)
#else
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
- GLuint tmp = z24s8_to_s8z24(d); \
- *_ptr = tmp; \
+ *_ptr = CPU_TO_LE32(d); \
} while (0)
#endif
-#if defined(COMPILE_R300)
+#if defined(RADEON_R300)
#define READ_DEPTH( d, _x, _y ) \
do { \
- d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
+ GLuint tmp = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
+ d = LE32_TO_CPU(((tmp & 0x000000ff) << 24) | ((tmp & 0xffffff00) >> 8)); \
}while(0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define READ_DEPTH( d, _x, _y ) \
do { \
- d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \
- d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \
+ d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \
+ d |= ((*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) << 24) & 0xff000000; \
}while(0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define READ_DEPTH( d, _x, _y ) \
do { \
- d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \
+ d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \
}while(0)
#else
#define READ_DEPTH( d, _x, _y ) do { \
- d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \
+ d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
} while (0)
#endif
-#define TAG(x) radeon##x##_z24_s8
+#define TAG(x) radeon##x##_s8_z24
#include "depthtmp.h"
/* ================================================================
@@ -674,16 +730,16 @@ do { \
/* 24 bit depth, 8 bit stencil depthbuffer functions
*/
-#ifdef COMPILE_R300
+#ifdef RADEON_R300
#define WRITE_STENCIL( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
- GLuint tmp = *_ptr; \
+ GLuint tmp = LE32_TO_CPU(*_ptr); \
tmp &= 0xffffff00; \
tmp |= (d) & 0xff; \
- *_ptr = tmp; \
+ *_ptr = CPU_TO_LE32(tmp); \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define WRITE_STENCIL( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
@@ -692,57 +748,57 @@ do { \
tmp |= (d) & 0xff; \
*_ptr = tmp; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define WRITE_STENCIL( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \
- GLuint tmp = *_ptr; \
+ GLuint tmp = LE32_TO_CPU(*_ptr); \
tmp &= 0x00ffffff; \
tmp |= (((d) & 0xff) << 24); \
- *_ptr = tmp; \
+ *_ptr = CPU_TO_LE32(tmp); \
} while (0)
#else
#define WRITE_STENCIL( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
- GLuint tmp = *_ptr; \
+ GLuint tmp = LE32_TO_CPU(*_ptr); \
tmp &= 0x00ffffff; \
tmp |= (((d) & 0xff) << 24); \
- *_ptr = tmp; \
+ *_ptr = CPU_TO_LE32(tmp); \
} while (0)
#endif
-#ifdef COMPILE_R300
+#ifdef RADEON_R300
#define READ_STENCIL( d, _x, _y ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
- GLuint tmp = *_ptr; \
+ GLuint tmp = LE32_TO_CPU(*_ptr); \
d = tmp & 0x000000ff; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define READ_STENCIL( d, _x, _y ) \
do { \
GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \
GLuint tmp = *_ptr; \
d = tmp & 0x000000ff; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define READ_STENCIL( d, _x, _y ) \
do { \
GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
- GLuint tmp = *_ptr; \
+ GLuint tmp = LE32_TO_CPU(*_ptr); \
d = (tmp & 0xff000000) >> 24; \
} while (0)
#else
#define READ_STENCIL( d, _x, _y ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
- GLuint tmp = *_ptr; \
+ GLuint tmp = LE32_TO_CPU(*_ptr); \
d = (tmp & 0xff000000) >> 24; \
} while (0)
#endif
-#define TAG(x) radeon##x##_z24_s8
+#define TAG(x) radeon##x##_s8_z24
#include "stenciltmp.h"
@@ -755,8 +811,7 @@ static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
return;
if (flag) {
- if (rrb->bo->bom->funcs->bo_wait)
- radeon_bo_wait(rrb->bo);
+ radeon_bo_wait(rrb->bo);
r = radeon_bo_map(rrb->bo, 1);
if (r) {
fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
@@ -864,25 +919,35 @@ void radeonInitSpanFuncs(GLcontext * ctx)
*/
static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
{
- if (rrb->base._ActualFormat == GL_RGB5) {
+ if (rrb->base.Format == MESA_FORMAT_RGB565) {
radeonInitPointers_RGB565(&rrb->base);
- } else if (rrb->base._ActualFormat == GL_RGB8) {
+ } else if (rrb->base.Format == MESA_FORMAT_RGB565_REV) {
+ radeonInitPointers_RGB565_REV(&rrb->base);
+ } else if (rrb->base.Format == MESA_FORMAT_XRGB8888) {
radeonInitPointers_xRGB8888(&rrb->base);
- } else if (rrb->base._ActualFormat == GL_RGBA8) {
+ } else if (rrb->base.Format == MESA_FORMAT_XRGB8888_REV) {
+ radeonInitPointers_BGRx8888(&rrb->base);
+ } else if (rrb->base.Format == MESA_FORMAT_ARGB8888) {
radeonInitPointers_ARGB8888(&rrb->base);
- } else if (rrb->base._ActualFormat == GL_RGBA4) {
+ } else if (rrb->base.Format == MESA_FORMAT_ARGB8888_REV) {
+ radeonInitPointers_BGRA8888(&rrb->base);
+ } else if (rrb->base.Format == MESA_FORMAT_ARGB4444) {
radeonInitPointers_ARGB4444(&rrb->base);
- } else if (rrb->base._ActualFormat == GL_RGB5_A1) {
+ } else if (rrb->base.Format == MESA_FORMAT_ARGB4444_REV) {
+ radeonInitPointers_ARGB4444_REV(&rrb->base);
+ } else if (rrb->base.Format == MESA_FORMAT_ARGB1555) {
radeonInitPointers_ARGB1555(&rrb->base);
- } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
+ } else if (rrb->base.Format == MESA_FORMAT_ARGB1555_REV) {
+ radeonInitPointers_ARGB1555_REV(&rrb->base);
+ } else if (rrb->base.Format == MESA_FORMAT_Z16) {
radeonInitDepthPointers_z16(&rrb->base);
- } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
+ } else if (rrb->base.Format == MESA_FORMAT_X8_Z24) {
radeonInitDepthPointers_z24(&rrb->base);
- } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
- radeonInitDepthPointers_z24_s8(&rrb->base);
- } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
- radeonInitStencilPointers_z24_s8(&rrb->base);
+ } else if (rrb->base.Format == MESA_FORMAT_S8_Z24) {
+ radeonInitDepthPointers_s8_z24(&rrb->base);
+ } else if (rrb->base.Format == MESA_FORMAT_S8) {
+ radeonInitStencilPointers_s8_z24(&rrb->base);
} else {
- fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat);
+ fprintf(stderr, "radeonSetSpanFunctions: bad format: 0x%04X\n", rrb->base.Format);
}
}
diff --git a/radeon/radeon_state.c b/radeon/radeon_state.c
index 4d0d35e..f6c733a 100644
--- a/radeon/radeon_state.c
+++ b/radeon/radeon_state.c
@@ -550,6 +550,31 @@ static void radeonPolygonOffset( GLcontext *ctx,
rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
}
+static void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask )
+{
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint i;
+ drm_radeon_stipple_t stipple;
+
+ /* Must flip pattern upside down.
+ */
+ for ( i = 0 ; i < 32 ; i++ ) {
+ rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i];
+ }
+
+ /* TODO: push this into cmd mechanism
+ */
+ radeon_firevertices(&rmesa->radeon);
+ LOCK_HARDWARE( &rmesa->radeon );
+
+ /* FIXME: Use window x,y offsets into stipple RAM.
+ */
+ stipple.mask = rmesa->state.stipple.mask;
+ drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE,
+ &stipple, sizeof(drm_radeon_stipple_t) );
+ UNLOCK_HARDWARE( &rmesa->radeon );
+}
+
static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
{
r100ContextPtr rmesa = R100_CONTEXT(ctx);
diff --git a/radeon/radeon_state_init.c b/radeon/radeon_state_init.c
index f3ad0dd..dd82888 100644
--- a/radeon/radeon_state_init.c
+++ b/radeon/radeon_state_init.c
@@ -440,16 +440,18 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
if (rrb->cpp == 4)
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
- else switch (rrb->base._ActualFormat) {
- case GL_RGB5:
+ else switch (rrb->base.Format) {
+ case MESA_FORMAT_RGB565:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
break;
- case GL_RGBA4:
+ case MESA_FORMAT_ARGB4444:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
break;
- case GL_RGB5_A1:
+ case MESA_FORMAT_ARGB1555:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
break;
+ default:
+ _mesa_problem(ctx, "unexpected format in ctx_emit_cs()");
}
cbpitch = (rrb->pitch / rrb->cpp);
@@ -643,11 +645,11 @@ static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
OUT_BATCH(CP_PACKET0(RADEON_PP_TXOFFSET_0 + (24 * i), 0));
if (t->mt && !t->image_override) {
if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
- lvl = &t->mt->levels[0];
+ lvl = &t->mt->levels[t->minLod];
OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
} else {
- OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t),
RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
}
} else {
diff --git a/radeon/radeon_tex.c b/radeon/radeon_tex.c
index 99865ff..749ab75 100644
--- a/radeon/radeon_tex.c
+++ b/radeon/radeon_tex.c
@@ -38,7 +38,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/enums.h"
#include "main/image.h"
#include "main/simple_list.h"
-#include "main/texformat.h"
#include "main/texstore.h"
#include "main/teximage.h"
#include "main/texobj.h"
@@ -349,17 +348,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
case GL_TEXTURE_MAX_LEVEL:
case GL_TEXTURE_MIN_LOD:
case GL_TEXTURE_MAX_LOD:
-
- /* This isn't the most efficient solution but there doesn't appear to
- * be a nice alternative. Since there's no LOD clamping,
- * we just have to rely on loading the right subset of mipmap levels
- * to simulate a clamped LOD.
- */
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = 0;
- t->validated = GL_FALSE;
- }
+ t->validated = GL_FALSE;
break;
default:
@@ -389,10 +378,8 @@ static void radeonDeleteTexture( GLcontext *ctx,
}
}
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = 0;
- }
+ radeon_miptree_unreference(&t->mt);
+
/* Free mipmap images and the texture object itself */
_mesa_delete_texture_object(ctx, texObj);
}
diff --git a/radeon/radeon_texstate.c b/radeon/radeon_texstate.c
index 9d252aa..3cbe3b4 100644
--- a/radeon/radeon_texstate.c
+++ b/radeon/radeon_texstate.c
@@ -38,8 +38,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/colormac.h"
#include "main/context.h"
#include "main/macros.h"
-#include "main/texformat.h"
#include "main/teximage.h"
+#include "main/texstate.h"
#include "main/texobj.h"
#include "main/enums.h"
@@ -81,8 +81,10 @@ struct tx_table {
GLuint format, filter;
};
+/* XXX verify this table against MESA_FORMAT_x values */
static const struct tx_table tx_table[] =
{
+ _INVALID(NONE), /* MESA_FORMAT_NONE */
_ALPHA(RGBA8888),
_ALPHA_REV(RGBA8888),
_ALPHA(ARGB8888),
@@ -660,7 +662,7 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_
rmesa = pDRICtx->driverPrivate;
rfb = dPriv->driverPrivate;
- texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
+ texUnit = _mesa_get_current_tex_unit(radeon->glCtx);
texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
@@ -697,20 +699,14 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_
radeon_bo_unref(rImage->bo);
rImage->bo = NULL;
}
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = NULL;
- }
- if (rImage->mt) {
- radeon_miptree_unreference(rImage->mt);
- rImage->mt = NULL;
- }
+
+ radeon_miptree_unreference(&t->mt);
+ radeon_miptree_unreference(&rImage->mt);
+
_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
texImage->RowStride = rb->pitch / rb->cpp;
- texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
- internalFormat,
- type, format, 0);
+
rImage->bo = rb->bo;
radeon_bo_ref(rImage->bo);
t->bo = rb->bo;
@@ -718,8 +714,6 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_
t->tile_bits = 0;
t->image_override = GL_TRUE;
t->override_offset = 0;
- t->pp_txpitch &= (1 << 13) -1;
- pitch_val = rb->pitch;
switch (rb->cpp) {
case 4:
if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT)
@@ -738,12 +732,17 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_
t->pp_txfilter |= tx_table[MESA_FORMAT_RGB565].filter;
break;
}
- t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT)
- | ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT);
- t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
- t->pp_txpitch = pitch_val;
- t->pp_txpitch -= 32;
+ t->pp_txpitch &= (1 << 13) -1;
+ pitch_val = rb->pitch;
+
+ t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT)
+ | ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT);
+ if (target == GL_TEXTURE_RECTANGLE_NV) {
+ t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+ t->pp_txpitch = pitch_val;
+ t->pp_txpitch -= 32;
+ }
t->validated = GL_TRUE;
_mesa_unlock_texture(radeon->glCtx, texObj);
return;
@@ -833,11 +832,14 @@ static void import_tex_obj_state( r100ContextPtr rmesa,
cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
- if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
- GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
+ if (texobj->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
+ uint32_t *txr_cmd = &rmesa->hw.txr[unit].cmd[TXR_CMD_0];
txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
- RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.txr[unit] );
+ RADEON_STATECHANGE( rmesa, txr[unit] );
+ }
+
+ if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
se_coord_fmt |= RADEON_VTX_ST0_NONPARAMETRIC << unit;
}
else {
@@ -1018,7 +1020,7 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int
return GL_TRUE;
}
- firstImage = t->base.Image[0][t->mt->firstLevel];
+ firstImage = t->base.Image[0][t->minLod];
if (firstImage->Border > 0) {
fprintf(stderr, "%s: border\n", __FUNCTION__);
@@ -1028,27 +1030,27 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int
log2Width = firstImage->WidthLog2;
log2Height = firstImage->HeightLog2;
log2Depth = firstImage->DepthLog2;
- texelBytes = firstImage->TexFormat->TexelBytes;
+ texelBytes = _mesa_get_format_bytes(firstImage->TexFormat);
if (!t->image_override) {
- if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
+ if (VALID_FORMAT(firstImage->TexFormat)) {
const struct tx_table *table = tx_table;
t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
RADEON_TXFORMAT_ALPHA_IN_MAP);
t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
- t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
- t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
+ t->pp_txformat |= table[ firstImage->TexFormat ].format;
+ t->pp_txfilter |= table[ firstImage->TexFormat ].filter;
} else {
_mesa_problem(NULL, "unexpected texture format in %s",
__FUNCTION__);
return GL_FALSE;
}
}
-
+
t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
- t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << RADEON_MAX_MIP_LEVEL_SHIFT;
+ t->pp_txfilter |= (t->maxLod - t->minLod) << RADEON_MAX_MIP_LEVEL_SHIFT;
t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
RADEON_TXFORMAT_HEIGHT_MASK |
@@ -1057,9 +1059,9 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int
RADEON_TXFORMAT_F5_HEIGHT_MASK);
t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
(log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
-
+
t->tile_bits = 0;
-
+
if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
ASSERT(log2Width == log2Height);
t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
@@ -1080,7 +1082,7 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int
| ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT));
if ( !t->image_override ) {
- if (firstImage->IsCompressed)
+ if (_mesa_is_format_compressed(firstImage->TexFormat))
t->pp_txpitch = (firstImage->Width + 63) & ~(63);
else
t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
@@ -1114,7 +1116,6 @@ static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_objec
RADEON_STATECHANGE( rmesa, ctx );
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=
(RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
-
RADEON_STATECHANGE( rmesa, tcl );
rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
diff --git a/radeon/radeon_texture.c b/radeon/radeon_texture.c
index fad3d1c..0317811 100644
--- a/radeon/radeon_texture.c
+++ b/radeon/radeon_texture.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (C) 2009 Maciej Cencora.
* Copyright (C) 2008 Nicolai Haehnle.
* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
*
@@ -34,7 +35,6 @@
#include "main/convolve.h"
#include "main/mipmap.h"
#include "main/texcompress.h"
-#include "main/texformat.h"
#include "main/texstore.h"
#include "main/teximage.h"
#include "main/texobj.h"
@@ -47,7 +47,7 @@
#include "radeon_mipmap_tree.h"
-static void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
+void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
GLuint numrows, GLuint rowsize)
{
assert(rowsize <= dststride);
@@ -82,8 +82,7 @@ void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage)
radeon_texture_image* image = get_radeon_texture_image(timage);
if (image->mt) {
- radeon_miptree_unreference(image->mt);
- image->mt = 0;
+ radeon_miptree_unreference(&image->mt);
assert(!image->base.Data);
} else {
_mesa_free_texture_image_data(ctx, timage);
@@ -101,10 +100,15 @@ void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage)
/* Set Data pointer and additional data for mapped texture image */
static void teximage_set_map_data(radeon_texture_image *image)
{
- radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
+ radeon_mipmap_level *lvl;
+
+ if (!image->mt)
+ return;
+
+ lvl = &image->mt->levels[image->mtlevel];
image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset;
- image->base.RowStride = lvl->rowstride / image->mt->bpp;
+ image->base.RowStride = lvl->rowstride / _mesa_get_format_bytes(image->base.TexFormat);
}
@@ -139,7 +143,6 @@ static void map_override(GLcontext *ctx, radeonTexObj *t)
radeon_bo_map(t->bo, GL_FALSE);
img->base.Data = t->bo->ptr;
- _mesa_set_fetch_functions(&img->base, 2);
}
static void unmap_override(GLcontext *ctx, radeonTexObj *t)
@@ -171,7 +174,7 @@ void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
radeon_bo_map(t->mt->bo, GL_FALSE);
for(face = 0; face < t->mt->faces; ++face) {
- for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
+ for(level = t->minLod; level <= t->maxLod; ++level)
teximage_set_map_data(get_radeon_texture_image(texObj->Image[face][level]));
}
}
@@ -188,7 +191,7 @@ void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
return;
for(face = 0; face < t->mt->faces; ++face) {
- for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
+ for(level = t->minLod; level <= t->maxLod; ++level)
texObj->Image[face][level]->Data = 0;
}
radeon_bo_unmap(t->mt->bo);
@@ -237,8 +240,7 @@ static void radeon_generate_mipmap(GLcontext *ctx, GLenum target,
image->mtlevel = i;
image->mtface = face;
- radeon_miptree_unreference(image->mt);
- image->mt = NULL;
+ radeon_miptree_unreference(&image->mt);
}
}
@@ -256,9 +258,9 @@ void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_objec
/* try to find a format which will only need a memcopy */
-static const struct gl_texture_format *radeonChoose8888TexFormat(radeonContextPtr rmesa,
- GLenum srcFormat,
- GLenum srcType, GLboolean fbo)
+static gl_format radeonChoose8888TexFormat(radeonContextPtr rmesa,
+ GLenum srcFormat,
+ GLenum srcType, GLboolean fbo)
{
const GLuint ui = 1;
const GLubyte littleEndian = *((const GLubyte *)&ui);
@@ -271,37 +273,37 @@ static const struct gl_texture_format *radeonChoose8888TexFormat(radeonContextPt
(srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
(srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
(srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
- return &_mesa_texformat_rgba8888;
+ return MESA_FORMAT_RGBA8888;
} else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
(srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
(srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
(srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
- return &_mesa_texformat_rgba8888_rev;
+ return MESA_FORMAT_RGBA8888_REV;
} else if (IS_R200_CLASS(rmesa->radeonScreen)) {
return _dri_texformat_argb8888;
} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
srcType == GL_UNSIGNED_INT_8_8_8_8)) {
- return &_mesa_texformat_argb8888_rev;
+ return MESA_FORMAT_ARGB8888_REV;
} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
- return &_mesa_texformat_argb8888;
+ return MESA_FORMAT_ARGB8888;
} else
return _dri_texformat_argb8888;
}
-const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx,
- GLint internalFormat,
- GLenum format,
- GLenum type)
+gl_format radeonChooseTextureFormat_mesa(GLcontext * ctx,
+ GLint internalFormat,
+ GLenum format,
+ GLenum type)
{
return radeonChooseTextureFormat(ctx, internalFormat, format,
type, 0);
}
-const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
- GLint internalFormat,
- GLenum format,
- GLenum type, GLboolean fbo)
+gl_format radeonChooseTextureFormat(GLcontext * ctx,
+ GLint internalFormat,
+ GLenum format,
+ GLenum type, GLboolean fbo)
{
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
const GLboolean do32bpt =
@@ -425,58 +427,72 @@ const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
case GL_YCBCR_MESA:
if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
type == GL_UNSIGNED_BYTE)
- return &_mesa_texformat_ycbcr;
+ return MESA_FORMAT_YCBCR;
else
- return &_mesa_texformat_ycbcr_rev;
+ return MESA_FORMAT_YCBCR_REV;
case GL_RGB_S3TC:
case GL_RGB4_S3TC:
case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
- return &_mesa_texformat_rgb_dxt1;
+ return MESA_FORMAT_RGB_DXT1;
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
- return &_mesa_texformat_rgba_dxt1;
+ return MESA_FORMAT_RGBA_DXT1;
case GL_RGBA_S3TC:
case GL_RGBA4_S3TC:
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
- return &_mesa_texformat_rgba_dxt3;
+ return MESA_FORMAT_RGBA_DXT3;
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
- return &_mesa_texformat_rgba_dxt5;
+ return MESA_FORMAT_RGBA_DXT5;
case GL_ALPHA16F_ARB:
- return &_mesa_texformat_alpha_float16;
+ return MESA_FORMAT_ALPHA_FLOAT16;
case GL_ALPHA32F_ARB:
- return &_mesa_texformat_alpha_float32;
+ return MESA_FORMAT_ALPHA_FLOAT32;
case GL_LUMINANCE16F_ARB:
- return &_mesa_texformat_luminance_float16;
+ return MESA_FORMAT_LUMINANCE_FLOAT16;
case GL_LUMINANCE32F_ARB:
- return &_mesa_texformat_luminance_float32;
+ return MESA_FORMAT_LUMINANCE_FLOAT32;
case GL_LUMINANCE_ALPHA16F_ARB:
- return &_mesa_texformat_luminance_alpha_float16;
+ return MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16;
case GL_LUMINANCE_ALPHA32F_ARB:
- return &_mesa_texformat_luminance_alpha_float32;
+ return MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32;
case GL_INTENSITY16F_ARB:
- return &_mesa_texformat_intensity_float16;
+ return MESA_FORMAT_INTENSITY_FLOAT16;
case GL_INTENSITY32F_ARB:
- return &_mesa_texformat_intensity_float32;
+ return MESA_FORMAT_INTENSITY_FLOAT32;
case GL_RGB16F_ARB:
- return &_mesa_texformat_rgba_float16;
+ return MESA_FORMAT_RGBA_FLOAT16;
case GL_RGB32F_ARB:
- return &_mesa_texformat_rgba_float32;
+ return MESA_FORMAT_RGBA_FLOAT32;
case GL_RGBA16F_ARB:
- return &_mesa_texformat_rgba_float16;
+ return MESA_FORMAT_RGBA_FLOAT16;
case GL_RGBA32F_ARB:
- return &_mesa_texformat_rgba_float32;
+ return MESA_FORMAT_RGBA_FLOAT32;
+#ifdef RADEON_R300
+ case GL_DEPTH_COMPONENT:
+ case GL_DEPTH_COMPONENT16:
+ return MESA_FORMAT_Z16;
+ case GL_DEPTH_COMPONENT24:
+ case GL_DEPTH_COMPONENT32:
+ case GL_DEPTH_STENCIL_EXT:
+ case GL_DEPTH24_STENCIL8_EXT:
+ if (rmesa->radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ return MESA_FORMAT_S8_Z24;
+ else
+ return MESA_FORMAT_Z16;
+#else
case GL_DEPTH_COMPONENT:
case GL_DEPTH_COMPONENT16:
case GL_DEPTH_COMPONENT24:
case GL_DEPTH_COMPONENT32:
case GL_DEPTH_STENCIL_EXT:
case GL_DEPTH24_STENCIL8_EXT:
- return &_mesa_texformat_s8_z24;
+ return MESA_FORMAT_S8_Z24;
+#endif
/* EXT_texture_sRGB */
case GL_SRGB:
@@ -485,26 +501,193 @@ const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
case GL_SRGB8_ALPHA8:
case GL_COMPRESSED_SRGB:
case GL_COMPRESSED_SRGB_ALPHA:
- return &_mesa_texformat_srgba8;
+ return MESA_FORMAT_SRGBA8;
case GL_SLUMINANCE:
case GL_SLUMINANCE8:
case GL_COMPRESSED_SLUMINANCE:
- return &_mesa_texformat_sl8;
+ return MESA_FORMAT_SL8;
case GL_SLUMINANCE_ALPHA:
case GL_SLUMINANCE8_ALPHA8:
case GL_COMPRESSED_SLUMINANCE_ALPHA:
- return &_mesa_texformat_sla8;
+ return MESA_FORMAT_SLA8;
default:
_mesa_problem(ctx,
"unexpected internalFormat 0x%x in %s",
(int)internalFormat, __func__);
+ return MESA_FORMAT_NONE;
+ }
+
+ return MESA_FORMAT_NONE; /* never get here */
+}
+
+/** Check if given image is valid within current texture object.
+ */
+static int image_matches_texture_obj(struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage,
+ unsigned level)
+{
+ const struct gl_texture_image *baseImage = texObj->Image[0][texObj->BaseLevel];
+
+ if (!baseImage)
+ return 0;
+
+ if (level < texObj->BaseLevel || level > texObj->MaxLevel)
+ return 0;
+
+ const unsigned levelDiff = level - texObj->BaseLevel;
+ const unsigned refWidth = MAX2(baseImage->Width >> levelDiff, 1);
+ const unsigned refHeight = MAX2(baseImage->Height >> levelDiff, 1);
+ const unsigned refDepth = MAX2(baseImage->Depth >> levelDiff, 1);
+
+ return (texImage->Width == refWidth &&
+ texImage->Height == refHeight &&
+ texImage->Depth == refDepth);
+}
+
+static void teximage_assign_miptree(radeonContextPtr rmesa,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage,
+ unsigned face,
+ unsigned level)
+{
+ radeonTexObj *t = radeon_tex_obj(texObj);
+ radeon_texture_image* image = get_radeon_texture_image(texImage);
+
+ /* Since miptree holds only images for levels <BaseLevel..MaxLevel>
+ * don't allocate the miptree if the teximage won't fit.
+ */
+ if (!image_matches_texture_obj(texObj, texImage, level))
+ return;
+
+ /* Try using current miptree, or create new if there isn't any */
+ if (!t->mt || !radeon_miptree_matches_image(t->mt, texImage, face, level)) {
+ radeon_miptree_unreference(&t->mt);
+ radeon_try_alloc_miptree(rmesa, t);
+ if (RADEON_DEBUG & RADEON_TEXTURE) {
+ fprintf(stderr, "%s: texObj %p, texImage %p, face %d, level %d, "
+ "texObj miptree doesn't match, allocated new miptree %p\n",
+ __FUNCTION__, texObj, texImage, face, level, t->mt);
+ }
+ }
+
+ /* Miptree alocation may have failed,
+ * when there was no image for baselevel specified */
+ if (t->mt) {
+ image->mtface = face;
+ image->mtlevel = level;
+ radeon_miptree_reference(t->mt, &image->mt);
+ }
+}
+
+static GLuint * allocate_image_offsets(GLcontext *ctx,
+ unsigned alignedWidth,
+ unsigned height,
+ unsigned depth)
+{
+ int i;
+ GLuint *offsets;
+
+ offsets = _mesa_malloc(depth * sizeof(GLuint)) ;
+ if (!offsets) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTex[Sub]Image");
return NULL;
}
- return NULL; /* never get here */
+ for (i = 0; i < depth; ++i) {
+ offsets[i] = alignedWidth * height * i;
+ }
+
+ return offsets;
+}
+
+/**
+ * Update a subregion of the given texture image.
+ */
+static void radeon_store_teximage(GLcontext* ctx, int dims,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLsizei imageSize,
+ GLenum format, GLenum type,
+ const GLvoid * pixels,
+ const struct gl_pixelstore_attrib *packing,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage,
+ int compressed)
+{
+ radeonTexObj *t = radeon_tex_obj(texObj);
+ radeon_texture_image* image = get_radeon_texture_image(texImage);
+
+ GLuint dstRowStride;
+ GLuint *dstImageOffsets;
+
+ if (image->mt) {
+ dstRowStride = image->mt->levels[image->mtlevel].rowstride;
+ } else if (t->bo) {
+ /* TFP case */
+ /* TODO */
+ assert(0);
+ } else {
+ dstRowStride = _mesa_format_row_stride(texImage->TexFormat, texImage->Width);
+ }
+
+ assert(dstRowStride);
+
+ if (dims == 3) {
+ unsigned alignedWidth = dstRowStride/_mesa_get_format_bytes(texImage->TexFormat);
+ dstImageOffsets = allocate_image_offsets(ctx, alignedWidth, texImage->Height, texImage->Depth);
+ if (!dstImageOffsets) {
+ return;
+ }
+ } else {
+ dstImageOffsets = texImage->ImageOffsets;
+ }
+
+ radeon_teximage_map(image, GL_TRUE);
+
+ if (compressed) {
+ uint32_t srcRowStride, bytesPerRow, rows, block_width, block_height;
+ GLubyte *img_start;
+
+ _mesa_get_format_block_size(texImage->TexFormat, &block_width, &block_height);
+
+ if (!image->mt) {
+ dstRowStride = _mesa_format_row_stride(texImage->TexFormat, texImage->Width);
+ img_start = _mesa_compressed_image_address(xoffset, yoffset, 0,
+ texImage->TexFormat,
+ texImage->Width, texImage->Data);
+ }
+ else {
+ uint32_t offset;
+ offset = dstRowStride / _mesa_get_format_bytes(texImage->TexFormat) * yoffset / block_height + xoffset / block_width;
+ offset *= _mesa_get_format_bytes(texImage->TexFormat);
+ img_start = texImage->Data + offset;
+ }
+ srcRowStride = _mesa_format_row_stride(texImage->TexFormat, width);
+ bytesPerRow = srcRowStride;
+ rows = (height + block_height - 1) / block_height;
+
+ copy_rows(img_start, dstRowStride, pixels, srcRowStride, rows, bytesPerRow);
+ }
+ else {
+ if (!_mesa_texstore(ctx, dims, texImage->_BaseFormat,
+ texImage->TexFormat, texImage->Data,
+ xoffset, yoffset, zoffset,
+ dstRowStride,
+ dstImageOffsets,
+ width, height, depth,
+ format, type, pixels, packing)) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
+ }
+ }
+
+ if (dims == 3) {
+ _mesa_free(dstImageOffsets);
+ }
+
+ radeon_teximage_unmap(image);
}
/**
@@ -525,13 +708,22 @@ static void radeon_teximage(
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
radeonTexObj* t = radeon_tex_obj(texObj);
radeon_texture_image* image = get_radeon_texture_image(texImage);
- GLuint dstRowStride;
GLint postConvWidth = width;
GLint postConvHeight = height;
- GLuint texelBytes;
GLuint face = radeon_face_for_target(target);
- radeon_firevertices(rmesa);
+ {
+ struct radeon_bo *bo;
+ bo = !image->mt ? image->bo : image->mt->bo;
+ if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
+ radeon_firevertices(rmesa);
+ }
+ }
+
+ if (RADEON_DEBUG & RADEON_TEXTURE) {
+ fprintf(stderr, "radeon_teximage%dd: texObj %p, texImage %p, face %d, level %d\n",
+ dims, texObj, texImage, face, level);
+ }
t->validated = GL_FALSE;
@@ -540,62 +732,35 @@ static void radeon_teximage(
&postConvHeight);
}
- /* Choose and fill in the texture format for this image */
- texImage->TexFormat = radeonChooseTextureFormat(ctx, internalFormat, format, type, 0);
- _mesa_set_fetch_functions(texImage, dims);
-
- if (texImage->TexFormat->TexelBytes == 0) {
- texelBytes = 0;
- texImage->IsCompressed = GL_TRUE;
- texImage->CompressedSize =
- ctx->Driver.CompressedTextureSize(ctx, texImage->Width,
- texImage->Height, texImage->Depth,
- texImage->TexFormat->MesaFormat);
- } else {
- texImage->IsCompressed = GL_FALSE;
- texImage->CompressedSize = 0;
-
- texelBytes = texImage->TexFormat->TexelBytes;
+ if (!_mesa_is_format_compressed(texImage->TexFormat)) {
+ GLuint texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
/* Minimum pitch of 32 bytes */
if (postConvWidth * texelBytes < 32) {
- postConvWidth = 32 / texelBytes;
- texImage->RowStride = postConvWidth;
+ postConvWidth = 32 / texelBytes;
+ texImage->RowStride = postConvWidth;
}
- if (!image->mt) {
+ if (!image->mt) {
assert(texImage->RowStride == postConvWidth);
}
}
- /* Allocate memory for image */
- radeonFreeTexImageData(ctx, texImage); /* Mesa core only clears texImage->Data but not image->mt */
-
- if (t->mt &&
- t->mt->firstLevel == level &&
- t->mt->lastLevel == level &&
- t->mt->target != GL_TEXTURE_CUBE_MAP_ARB &&
- !radeon_miptree_matches_image(t->mt, texImage, face, level)) {
- radeon_miptree_unreference(t->mt);
- t->mt = NULL;
- }
-
- if (!t->mt)
- radeon_try_alloc_miptree(rmesa, t, image, face, level);
- if (t->mt && radeon_miptree_matches_image(t->mt, texImage, face, level)) {
- radeon_mipmap_level *lvl;
- image->mt = t->mt;
- image->mtlevel = level - t->mt->firstLevel;
- image->mtface = face;
- radeon_miptree_reference(t->mt);
- lvl = &image->mt->levels[image->mtlevel];
- dstRowStride = lvl->rowstride;
- } else {
- int size;
- if (texImage->IsCompressed) {
- size = texImage->CompressedSize;
- } else {
- size = texImage->Width * texImage->Height * texImage->Depth * texImage->TexFormat->TexelBytes;
+ /* Mesa core only clears texImage->Data but not image->mt */
+ radeonFreeTexImageData(ctx, texImage);
+
+ if (!t->bo) {
+ teximage_assign_miptree(rmesa, texObj, texImage, face, level);
+ if (!image->mt) {
+ int size = _mesa_format_image_size(texImage->TexFormat,
+ texImage->Width,
+ texImage->Height,
+ texImage->Depth);
+ texImage->Data = _mesa_alloc_texmemory(size);
+ if (RADEON_DEBUG & RADEON_TEXTURE) {
+ fprintf(stderr, "radeon_teximage%dd: texObj %p, texImage %p, "
+ " no miptree assigned, using local memory %p\n",
+ dims, texObj, texImage, texImage->Data);
+ }
}
- texImage->Data = _mesa_alloc_texmemory(size);
}
/* Upload texture image; note that the spec allows pixels to be NULL */
@@ -609,69 +774,16 @@ static void radeon_teximage(
}
if (pixels) {
- radeon_teximage_map(image, GL_TRUE);
- if (compressed) {
- if (image->mt) {
- uint32_t srcRowStride, bytesPerRow, rows;
- srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
- bytesPerRow = srcRowStride;
- rows = (height + 3) / 4;
- copy_rows(texImage->Data, image->mt->levels[level].rowstride,
- pixels, srcRowStride, rows, bytesPerRow);
- } else {
- memcpy(texImage->Data, pixels, imageSize);
- }
- } else {
- GLuint dstRowStride;
- GLuint *dstImageOffsets;
-
- if (image->mt) {
- radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
- dstRowStride = lvl->rowstride;
- } else {
- dstRowStride = texImage->Width * texImage->TexFormat->TexelBytes;
- }
-
- if (dims == 3) {
- int i;
-
- dstImageOffsets = _mesa_malloc(depth * sizeof(GLuint)) ;
- if (!dstImageOffsets)
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
-
- for (i = 0; i < depth; ++i) {
- dstImageOffsets[i] = dstRowStride/texImage->TexFormat->TexelBytes * height * i;
- }
- } else {
- dstImageOffsets = texImage->ImageOffsets;
- }
-
- if (!texImage->TexFormat->StoreImage(ctx, dims,
- texImage->_BaseFormat,
- texImage->TexFormat,
- texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */
- dstRowStride,
- dstImageOffsets,
- width, height, depth,
- format, type, pixels, packing))
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
-
- if (dims == 3)
- _mesa_free(dstImageOffsets);
- }
-
- /* SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- radeon_generate_mipmap(ctx, target, texObj);
- }
+ radeon_store_teximage(ctx, dims,
+ 0, 0, 0,
+ width, height, depth,
+ imageSize, format, type,
+ pixels, packing,
+ texObj, texImage,
+ compressed);
}
_mesa_unmap_teximage_pbo(ctx, packing);
-
- if (pixels)
- radeon_teximage_unmap(image);
-
-
}
void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
@@ -724,7 +836,7 @@ void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level,
}
/**
- * Update a subregion of the given texture image.
+ * All glTexSubImage calls go through this function.
*/
static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int level,
GLint xoffset, GLint yoffset, GLint zoffset,
@@ -741,69 +853,39 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int leve
radeonTexObj* t = radeon_tex_obj(texObj);
radeon_texture_image* image = get_radeon_texture_image(texImage);
- radeon_firevertices(rmesa);
+ {
+ struct radeon_bo *bo;
+ bo = !image->mt ? image->bo : image->mt->bo;
+ if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
+ radeon_firevertices(rmesa);
+ }
+ }
+
+ if (RADEON_DEBUG & RADEON_TEXTURE) {
+ fprintf(stderr, "radeon_texsubimage%dd: texObj %p, texImage %p, face %d, level %d\n",
+ dims, texObj, texImage, radeon_face_for_target(target), level);
+ }
t->validated = GL_FALSE;
if (compressed) {
pixels = _mesa_validate_pbo_compressed_teximage(
- ctx, imageSize, pixels, packing, "glCompressedTexImage");
+ ctx, imageSize, pixels, packing, "glCompressedTexSubImage");
} else {
pixels = _mesa_validate_pbo_teximage(ctx, dims,
- width, height, depth, format, type, pixels, packing, "glTexSubImage1D");
+ width, height, depth, format, type, pixels, packing, "glTexSubImage");
}
if (pixels) {
- GLint dstRowStride;
- radeon_teximage_map(image, GL_TRUE);
-
- if (image->mt) {
- radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
- dstRowStride = lvl->rowstride;
- } else {
- dstRowStride = texImage->RowStride * texImage->TexFormat->TexelBytes;
- }
-
- if (compressed) {
- uint32_t srcRowStride, bytesPerRow, rows;
- GLubyte *img_start;
- if (!image->mt) {
- dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width);
- img_start = _mesa_compressed_image_address(xoffset, yoffset, 0,
- texImage->TexFormat->MesaFormat,
- texImage->Width, texImage->Data);
- }
- else {
- uint32_t blocks_x = dstRowStride / (image->mt->bpp * 4);
- img_start = texImage->Data + image->mt->bpp * 4 * (blocks_x * (yoffset / 4) + xoffset / 4);
- }
- srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
- bytesPerRow = srcRowStride;
- rows = (height + 3) / 4;
-
- copy_rows(img_start, dstRowStride, pixels, srcRowStride, rows, bytesPerRow);
-
- } else {
- if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
- texImage->TexFormat, texImage->Data,
- xoffset, yoffset, zoffset,
- dstRowStride,
- texImage->ImageOffsets,
- width, height, depth,
- format, type, pixels, packing))
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
- }
-
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- radeon_generate_mipmap(ctx, target, texObj);
- }
+ radeon_store_teximage(ctx, dims,
+ xoffset, yoffset, zoffset,
+ width, height, depth,
+ imageSize, format, type,
+ pixels, packing,
+ texObj, texImage,
+ compressed);
}
- radeon_teximage_unmap(image);
-
_mesa_unmap_teximage_pbo(ctx, packing);
-
-
}
void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
@@ -859,143 +941,6 @@ void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
format, type, pixels, packing, texObj, texImage, 0);
}
-
-
-/**
- * Ensure that the given image is stored in the given miptree from now on.
- */
-static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_texture_image *image, int face, int level)
-{
- radeon_mipmap_level *dstlvl = &mt->levels[level - mt->firstLevel];
- unsigned char *dest;
-
- assert(image->mt != mt);
- assert(dstlvl->width == image->base.Width);
- assert(dstlvl->height == image->base.Height);
- assert(dstlvl->depth == image->base.Depth);
-
-
- radeon_bo_map(mt->bo, GL_TRUE);
- dest = mt->bo->ptr + dstlvl->faces[face].offset;
-
- if (image->mt) {
- /* Format etc. should match, so we really just need a memcpy().
- * In fact, that memcpy() could be done by the hardware in many
- * cases, provided that we have a proper memory manager.
- */
- radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel-image->mt->firstLevel];
-
- assert(srclvl->size == dstlvl->size);
- assert(srclvl->rowstride == dstlvl->rowstride);
-
- radeon_bo_map(image->mt->bo, GL_FALSE);
-
- memcpy(dest,
- image->mt->bo->ptr + srclvl->faces[face].offset,
- dstlvl->size);
- radeon_bo_unmap(image->mt->bo);
-
- radeon_miptree_unreference(image->mt);
- } else {
- uint32_t srcrowstride;
- uint32_t height;
- /* need to confirm this value is correct */
- if (mt->compressed) {
- height = (image->base.Height + 3) / 4;
- srcrowstride = _mesa_compressed_row_stride(image->base.TexFormat->MesaFormat, image->base.Width);
- } else {
- height = image->base.Height * image->base.Depth;
- srcrowstride = image->base.Width * image->base.TexFormat->TexelBytes;
- }
-
-// if (mt->tilebits)
-// WARN_ONCE("%s: tiling not supported yet", __FUNCTION__);
-
- copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride,
- height, srcrowstride);
-
- _mesa_free_texmemory(image->base.Data);
- image->base.Data = 0;
- }
-
- radeon_bo_unmap(mt->bo);
-
- image->mt = mt;
- image->mtface = face;
- image->mtlevel = level;
- radeon_miptree_reference(image->mt);
-}
-
-int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj)
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- radeonTexObj *t = radeon_tex_obj(texObj);
- radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[0][texObj->BaseLevel]);
- int face, level;
-
- if (t->validated || t->image_override)
- return GL_TRUE;
-
- if (RADEON_DEBUG & RADEON_TEXTURE)
- fprintf(stderr, "%s: Validating texture %p now\n", __FUNCTION__, texObj);
-
- if (baseimage->base.Border > 0)
- return GL_FALSE;
-
- /* Ensure a matching miptree exists.
- *
- * Differing mipmap trees can result when the app uses TexImage to
- * change texture dimensions.
- *
- * Prefer to use base image's miptree if it
- * exists, since that most likely contains more valid data (remember
- * that the base level is usually significantly larger than the rest
- * of the miptree, so cubemaps are the only possible exception).
- */
- if (baseimage->mt &&
- baseimage->mt != t->mt &&
- radeon_miptree_matches_texture(baseimage->mt, &t->base)) {
- radeon_miptree_unreference(t->mt);
- t->mt = baseimage->mt;
- radeon_miptree_reference(t->mt);
- } else if (t->mt && !radeon_miptree_matches_texture(t->mt, &t->base)) {
- radeon_miptree_unreference(t->mt);
- t->mt = 0;
- }
-
- if (!t->mt) {
- if (RADEON_DEBUG & RADEON_TEXTURE)
- fprintf(stderr, " Allocate new miptree\n");
- radeon_try_alloc_miptree(rmesa, t, baseimage, 0, texObj->BaseLevel);
- if (!t->mt) {
- _mesa_problem(ctx, "radeon_validate_texture failed to alloc miptree");
- return GL_FALSE;
- }
- }
-
- /* Ensure all images are stored in the single main miptree */
- for(face = 0; face < t->mt->faces; ++face) {
- for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) {
- radeon_texture_image *image = get_radeon_texture_image(texObj->Image[face][level]);
- if (RADEON_DEBUG & RADEON_TEXTURE)
- fprintf(stderr, " face %i, level %i... %p vs %p ", face, level, t->mt, image->mt);
- if (t->mt == image->mt) {
- if (RADEON_DEBUG & RADEON_TEXTURE)
- fprintf(stderr, "OK\n");
-
- continue;
- }
-
- if (RADEON_DEBUG & RADEON_TEXTURE)
- fprintf(stderr, "migrating\n");
- migrate_image_to_miptree(t->mt, image, face, level);
- }
- }
-
- return GL_TRUE;
-}
-
-
/**
* Need to map texture image into memory before copying image data,
* then unmap it.
diff --git a/radeon/radeon_texture.h b/radeon/radeon_texture.h
index 888a55b..906daf1 100644
--- a/radeon/radeon_texture.h
+++ b/radeon/radeon_texture.h
@@ -30,6 +30,11 @@
#ifndef RADEON_TEXTURE_H
#define RADEON_TEXTURE_H
+
+#include "main/formats.h"
+
+void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
+ GLuint numrows, GLuint rowsize);
struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx);
void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage);
@@ -40,14 +45,16 @@ void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj);
int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj);
GLuint radeon_face_for_target(GLenum target);
-const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx,
- GLint internalFormat,
- GLenum format,
- GLenum type);
-const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
- GLint internalFormat,
- GLenum format,
- GLenum type, GLboolean fbo);
+
+gl_format radeonChooseTextureFormat_mesa(GLcontext * ctx,
+ GLint internalFormat,
+ GLenum format,
+ GLenum type);
+
+gl_format radeonChooseTextureFormat(GLcontext * ctx,
+ GLint internalFormat,
+ GLenum format,
+ GLenum type, GLboolean fbo);
void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
GLint internalFormat,