diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-03-14 22:15:41 +0000 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-03-14 22:15:41 +0000 |
commit | 534eb0f6eea95ff5851d3cb74663679fcd375572 (patch) | |
tree | fe6b3c1c482725e17e0fed3ad9d9004e8870c988 | |
parent | 0c8469d1892b441c38d1cb09d6bbf85692c89e92 (diff) |
Import radeon, r200, r300 and r600 dri drivers from mesa 7.7.0.7.7.0
115 files changed, 9048 insertions, 4906 deletions
diff --git a/configure.ac b/configure.ac index d9be032..5cd936f 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Process this file with autoconf to produce a configure script AC_PREREQ(2.57) -AC_INIT([mesa-dri-radeon], 7.6.0, [], mesa-dri-radeon) +AC_INIT([mesa-dri-radeon], 7.7.0, [], mesa-dri-radeon) AM_INIT_AUTOMAKE([dist-bzip2]) @@ -16,12 +16,11 @@ AC_PROG_CC AC_HEADER_STDC PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0]) -# we now need dri_metaops.h -PKG_CHECK_MODULES([DRI], [libmesadri >= 7.6.0 libmesadri < 7.7.0 - libmesadricommon >= 7.6.0 libmesadricommon < 7.7.0]) +PKG_CHECK_MODULES([DRI], [libmesadri >= 7.7.0 libmesadri < 7.8.0 + libmesadricommon >= 7.7.0 libmesadricommon < 7.8.0]) # libdrm 2.4.17 changed the api significantly. -PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon libdrm <= 2.4.16], +PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon libdrm >= 2.4.17], HAVE_LIBDRM_RADEON=yes, HAVE_LIBDRM_RADEON=no) AM_CONDITIONAL(HAVE_LIBDRM_RADEON, test "x$HAVE_LIBDRM_RADEON" = xyes) diff --git a/r200/Makefile.am b/r200/Makefile.am index 804b285..7cdc634 100644 --- a/r200/Makefile.am +++ b/r200/Makefile.am @@ -1,7 +1,6 @@ AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING -R200_CFLAGS = -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R200 -R200_CFLAGS += -I../radeon -I../radeon/server +R200_CFLAGS = -DRADEON_R200 -I../radeon -I../radeon/server r200_dri_la_LTLIBRARIES = r200_dri.la r200_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(R200_CFLAGS) @@ -41,5 +40,7 @@ if HAVE_LIBDRM_RADEON r200_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS) r200_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS) r200_dri_la_SOURCES += \ - ../radeon/radeon_cs_space_drm.c + ../radeon/radeon_cs_space_drm.c \ + ../radeon/radeon_bo.c \ + ../radeon/radeon_cs.c endif diff --git a/r200/r200_context.c b/r200/r200_context.c index 3ddb5bf..5f985d6 100644 --- a/r200/r200_context.c +++ b/r200/r200_context.c @@ -75,7 +75,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_NV_vertex_program #define need_GL_ARB_point_parameters #define need_GL_EXT_framebuffer_object -#include "extension_helper.h" +#include "main/remap_helper.h" #define DRIVER_DATE "20060602" @@ -115,7 +115,7 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name ) /* Extension strings exported by the R200 driver. */ -const struct dri_extension card_extensions[] = +static const struct dri_extension card_extensions[] = { { "GL_ARB_multitexture", NULL }, { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, @@ -146,31 +146,31 @@ const struct dri_extension card_extensions[] = { NULL, NULL } }; -const struct dri_extension blend_extensions[] = { +static const struct dri_extension blend_extensions[] = { { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions }, { "GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions }, { NULL, NULL } }; -const struct dri_extension ARB_vp_extension[] = { +static const struct dri_extension ARB_vp_extension[] = { { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions } }; -const struct dri_extension NV_vp_extension[] = { +static const struct dri_extension NV_vp_extension[] = { { "GL_NV_vertex_program", GL_NV_vertex_program_functions } }; -const struct dri_extension ATI_fs_extension[] = { +static const struct dri_extension ATI_fs_extension[] = { { "GL_ATI_fragment_shader", GL_ATI_fragment_shader_functions } }; -const struct dri_extension point_extensions[] = { +static const struct dri_extension point_extensions[] = { { "GL_ARB_point_sprite", NULL }, { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, { NULL, NULL } }; -const struct dri_extension mm_extensions[] = { +static const struct dri_extension mm_extensions[] = { { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, { NULL, NULL } }; @@ -325,9 +325,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, _mesa_init_driver_functions(&functions); r200InitDriverFuncs(&functions); r200InitIoctlFuncs(&functions); - r200InitStateFuncs(&functions, screen->kernel_mm); + r200InitStateFuncs(&functions); r200InitTextureFuncs(&functions); - r200InitShaderFuncs(&functions); + r200InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); if (!radeonInitContext(&rmesa->radeon, &functions, diff --git a/r200/r200_state.c b/r200/r200_state.c index 76852e3..6d99c03 100644 --- a/r200/r200_state.c +++ b/r200/r200_state.c @@ -1578,13 +1578,6 @@ static void r200ClearStencil( GLcontext *ctx, GLint s ) * Window position and viewport transformation */ -/* - * To correctly position primitives: - */ -#define SUBPIXEL_X 0.125 -#define SUBPIXEL_Y 0.125 - - /** * Called when window size or position changes or viewport or depth range * state is changed. We update the hardware viewport state here. @@ -1609,9 +1602,9 @@ void r200UpdateWindow( GLcontext *ctx ) } float_ui32_type sx = { v[MAT_SX] }; - float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X }; + float_ui32_type tx = { v[MAT_TX] + xoffset }; float_ui32_type sy = { v[MAT_SY] * y_scale }; - float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y }; + float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias }; float_ui32_type sz = { v[MAT_SZ] * depthScale }; float_ui32_type tz = { v[MAT_TZ] * depthScale }; @@ -1680,8 +1673,8 @@ void r200UpdateViewportOffset( GLcontext *ctx ) float_ui32_type tx; float_ui32_type ty; - tx.f = v[MAT_TX] + xoffset + SUBPIXEL_X; - ty.f = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y; + tx.f = v[MAT_TX] + xoffset; + ty.f = (- v[MAT_TY]) + yoffset; if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 || rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 ) @@ -2483,7 +2476,7 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask ) } /* Initialize the driver's state functions. */ -void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 ) +void r200InitStateFuncs( struct dd_function_table *functions ) { functions->UpdateState = r200InvalidateState; functions->LightingSpaceChange = r200LightingSpaceChange; @@ -2517,10 +2510,7 @@ void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 ) functions->LogicOpcode = r200LogicOpCode; functions->PolygonMode = r200PolygonMode; functions->PolygonOffset = r200PolygonOffset; - if (dri2) - functions->PolygonStipple = r200PolygonStipple; - else - functions->PolygonStipple = radeonPolygonStipplePreKMS; + functions->PolygonStipple = r200PolygonStipple; functions->PointParameterfv = r200PointParameter; functions->PointSize = r200PointSize; functions->RenderMode = r200RenderMode; diff --git a/r200/r200_state.h b/r200/r200_state.h index 9c62f0a..7b9b0c1 100644 --- a/r200/r200_state.h +++ b/r200/r200_state.h @@ -38,7 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_context.h" extern void r200InitState( r200ContextPtr rmesa ); -extern void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 ); +extern void r200InitStateFuncs( struct dd_function_table *functions ); extern void r200InitTnlFuncs( GLcontext *ctx ); extern void r200UpdateMaterial( GLcontext *ctx ); diff --git a/r200/r200_state_init.c b/r200/r200_state_init.c index 7697306..6c5a0b7 100644 --- a/r200/r200_state_init.c +++ b/r200/r200_state_init.c @@ -529,16 +529,18 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); if (rrb->cpp == 4) atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; - else switch (rrb->base._ActualFormat) { - case GL_RGB5: + else switch (rrb->base.Format) { + case MESA_FORMAT_RGB565: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; break; - case GL_RGBA4: + case MESA_FORMAT_ARGB4444: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444; break; - case GL_RGB5_A1: + case MESA_FORMAT_ARGB1555: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555; break; + default: + _mesa_problem(ctx, "Unexpected format in ctx_emit_cs"); } cbpitch = (rrb->pitch / rrb->cpp); @@ -638,7 +640,7 @@ static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) OUT_BATCH_TABLE(atom->cmd, 10); if (t && t->mt && !t->image_override) { - OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t), RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); } else if (!t) { /* workaround for old CS mechanism */ @@ -885,10 +887,8 @@ void r200InitState( r200ContextPtr rmesa ) } } } - /* polygon stipple is done with irq for non-kms */ - if (rmesa->radeon.radeonScreen->kernel_mm) { - ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 ); - } + + ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 ); for (i = 0; i < 6; i++) if (rmesa->radeon.radeonScreen->kernel_mm) @@ -1120,12 +1120,11 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.sci.cmd[SCI_CMD_1] = CP_PACKET0(R200_RE_TOP_LEFT, 0); rmesa->hw.sci.cmd[SCI_CMD_2] = CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0); - if (rmesa->radeon.radeonScreen->kernel_mm) { - - rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0); - rmesa->hw.stp.cmd[STP_DATA_0] = 0; - rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31); + rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0); + rmesa->hw.stp.cmd[STP_DATA_0] = 0; + rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31); + if (rmesa->radeon.radeonScreen->kernel_mm) { rmesa->hw.mtl[0].emit = mtl_emit; rmesa->hw.mtl[1].emit = mtl_emit; diff --git a/r200/r200_tcl.c b/r200/r200_tcl.c index c702910..e7d48a7 100644 --- a/r200/r200_tcl.c +++ b/r200/r200_tcl.c @@ -509,25 +509,26 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, prog to a not enabled output however, so just don't mess with it. We only need to change compsel. */ GLuint out_compsel = 0; - GLuint vp_out = rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten; + const GLbitfield64 vp_out = + rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten; vimap_rev = &rmesa->curr_vp_hw->inputmap_rev[0]; - assert(vp_out & (1 << VERT_RESULT_HPOS)); + assert(vp_out & BITFIELD64_BIT(VERT_RESULT_HPOS)); out_compsel = R200_OUTPUT_XYZW; - if (vp_out & (1 << VERT_RESULT_COL0)) { + if (vp_out & BITFIELD64_BIT(VERT_RESULT_COL0)) { out_compsel |= R200_OUTPUT_COLOR_0; } - if (vp_out & (1 << VERT_RESULT_COL1)) { + if (vp_out & BITFIELD64_BIT(VERT_RESULT_COL1)) { out_compsel |= R200_OUTPUT_COLOR_1; } - if (vp_out & (1 << VERT_RESULT_FOGC)) { + if (vp_out & BITFIELD64_BIT(VERT_RESULT_FOGC)) { out_compsel |= R200_OUTPUT_DISCRETE_FOG; } - if (vp_out & (1 << VERT_RESULT_PSIZ)) { + if (vp_out & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { out_compsel |= R200_OUTPUT_PT_SIZE; } for (i = VERT_RESULT_TEX0; i < VERT_RESULT_TEX6; i++) { - if (vp_out & (1 << i)) { + if (vp_out & BITFIELD64_BIT(i)) { out_compsel |= R200_OUTPUT_TEX_0 << (i - VERT_RESULT_TEX0); } } diff --git a/r200/r200_tex.c b/r200/r200_tex.c index 36d9e37..a417721 100644 --- a/r200/r200_tex.c +++ b/r200/r200_tex.c @@ -38,7 +38,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/enums.h" #include "main/image.h" #include "main/simple_list.h" -#include "main/texformat.h" #include "main/texstore.h" #include "main/teximage.h" #include "main/texobj.h" @@ -386,16 +385,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, case GL_TEXTURE_MAX_LEVEL: case GL_TEXTURE_MIN_LOD: case GL_TEXTURE_MAX_LOD: - /* This isn't the most efficient solution but there doesn't appear to - * be a nice alternative. Since there's no LOD clamping, - * we just have to rely on loading the right subset of mipmap levels - * to simulate a clamped LOD. - */ - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = 0; - t->validated = GL_FALSE; - } + t->validated = GL_FALSE; break; default: @@ -414,7 +404,7 @@ static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) (void *)texObj, _mesa_lookup_enum_by_nr(texObj->Target)); } - + if (rmesa) { int i; radeon_firevertices(&rmesa->radeon); @@ -426,11 +416,9 @@ static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) } } } - - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = 0; - } + + radeon_miptree_unreference(&t->mt); + _mesa_delete_texture_object(ctx, texObj); } diff --git a/r200/r200_texstate.c b/r200/r200_texstate.c index c948347..7782404 100644 --- a/r200/r200_texstate.c +++ b/r200/r200_texstate.c @@ -36,7 +36,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/context.h" #include "main/macros.h" -#include "main/texformat.h" #include "main/teximage.h" #include "main/texobj.h" #include "main/enums.h" @@ -825,20 +824,14 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo radeon_bo_unref(rImage->bo); rImage->bo = NULL; } - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = NULL; - } - if (rImage->mt) { - radeon_miptree_unreference(rImage->mt); - rImage->mt = NULL; - } + + radeon_miptree_unreference(&t->mt); + radeon_miptree_unreference(&rImage->mt); + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, rb->base.Width, rb->base.Height, 1, 0, rb->cpp); texImage->RowStride = rb->pitch / rb->cpp; - texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx, - internalFormat, - type, format, 0); + rImage->bo = rb->bo; radeon_bo_ref(rImage->bo); t->bo = rb->bo; @@ -1426,10 +1419,9 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ) */ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) { - int firstlevel = t->mt ? t->mt->firstLevel : 0; - const struct gl_texture_image *firstImage = t->base.Image[0][firstlevel]; + const struct gl_texture_image *firstImage = t->base.Image[0][t->minLod]; GLint log2Width, log2Height, log2Depth, texelBytes; - + if ( t->bo ) { return; } @@ -1437,11 +1429,11 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) log2Width = firstImage->WidthLog2; log2Height = firstImage->HeightLog2; log2Depth = firstImage->DepthLog2; - texelBytes = firstImage->TexFormat->TexelBytes; + texelBytes = _mesa_get_format_bytes(firstImage->TexFormat); if (!t->image_override) { - if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) { + if (VALID_FORMAT(firstImage->TexFormat)) { const struct tx_table *table = _mesa_little_endian() ? tx_table_le : tx_table_be; @@ -1449,17 +1441,17 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) R200_TXFORMAT_ALPHA_IN_MAP); t->pp_txfilter &= ~R200_YUV_TO_RGB; - t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format; - t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter; + t->pp_txformat |= table[ firstImage->TexFormat ].format; + t->pp_txfilter |= table[ firstImage->TexFormat ].filter; } else { _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); return; } } - + t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK; - t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << R200_MAX_MIP_LEVEL_SHIFT; + t->pp_txfilter |= (t->maxLod - t->minLod) << R200_MAX_MIP_LEVEL_SHIFT; t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK | R200_TXFORMAT_HEIGHT_MASK | @@ -1504,7 +1496,7 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT)); if ( !t->image_override ) { - if (firstImage->IsCompressed) + if (_mesa_is_format_compressed(firstImage->TexFormat)) t->pp_txpitch = (firstImage->Width + 63) & ~(63); else t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63); diff --git a/r300/Makefile.am b/r300/Makefile.am index a678ddb..236710a 100644 --- a/r300/Makefile.am +++ b/r300/Makefile.am @@ -2,8 +2,7 @@ SUBDIRS = compiler AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING -R300_CFLAGS = -DCOMPILE_R300 -DR200_MERGED=0 -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 -R300_CFLAGS += -I../radeon -I../radeon/server +R300_CFLAGS = -DRADEON_R300 -I../radeon -I../radeon/server r300_dri_la_LTLIBRARIES = r300_dri.la r300_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(R300_CFLAGS) @@ -37,6 +36,7 @@ r300_dri_la_SOURCES = \ r300_vertprog.c \ r300_fragprog_common.c \ r300_shader.c \ + radeon_mesa_to_rc.c \ r300_emit.c \ r300_swtcl.c @@ -44,5 +44,7 @@ if HAVE_LIBDRM_RADEON r300_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS) r300_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS) r300_dri_la_SOURCES += \ - ../radeon/radeon_cs_space_drm.c + ../radeon/radeon_cs_space_drm.c \ + ../radeon/radeon_bo.c \ + ../radeon/radeon_cs.c endif diff --git a/r300/compiler/Makefile.am b/r300/compiler/Makefile.am index e24a719..c3dc39f 100644 --- a/r300/compiler/Makefile.am +++ b/r300/compiler/Makefile.am @@ -5,10 +5,17 @@ libr300compiler_la_CFLAGS = $(AM_CFLAGS) $(DRI_CFLAGS) libr300compiler_la_SOURCES = \ radeon_code.c \ radeon_compiler.c \ - radeon_nqssadce.c \ radeon_program.c \ + radeon_program_print.c \ + radeon_opcodes.c \ radeon_program_alu.c \ radeon_program_pair.c \ + radeon_pair_translate.c \ + radeon_pair_schedule.c \ + radeon_pair_regalloc.c \ + radeon_dataflow.c \ + radeon_dataflow_deadcode.c \ + radeon_dataflow_swizzles.c \ r3xx_fragprog.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ diff --git a/r300/compiler/SConscript b/r300/compiler/SConscript new file mode 100644 index 0000000..46075a8 --- /dev/null +++ b/r300/compiler/SConscript @@ -0,0 +1,37 @@ +Import('*') + +env = env.Clone() +env.Append(CPPPATH = '#/include') +env.Append(CPPPATH = '#/src/mesa') + +# temporary fix +env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '') + +r300compiler = env.ConvenienceLibrary( + target = 'r300compiler', + source = [ + 'radeon_code.c', + 'radeon_compiler.c', + 'radeon_program.c', + 'radeon_program_print.c', + 'radeon_opcodes.c', + 'radeon_program_alu.c', + 'radeon_program_pair.c', + 'radeon_pair_translate.c', + 'radeon_pair_schedule.c', + 'radeon_pair_regalloc.c', + 'radeon_dataflow.c', + 'radeon_dataflow_deadcode.c', + 'radeon_dataflow_swizzles.c', + 'r3xx_fragprog.c', + 'r300_fragprog.c', + 'r300_fragprog_swizzle.c', + 'r300_fragprog_emit.c', + 'r500_fragprog.c', + 'r500_fragprog_emit.c', + 'r3xx_vertprog.c', + 'r3xx_vertprog_dump.c', + 'memory_pool.c', + ]) + +Return('r300compiler') diff --git a/r300/compiler/r300_fragprog.c b/r300/compiler/r300_fragprog.c index 6c9fba4..aa69b0f 100644 --- a/r300/compiler/r300_fragprog.c +++ b/r300/compiler/r300_fragprog.c @@ -27,17 +27,17 @@ #include "r300_fragprog.h" -#include "shader/prog_parameter.h" +#include <stdio.h> #include "../r300_reg.h" -static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) +static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu) { - struct prog_src_register reg = { 0, }; + struct rc_src_register reg = { 0, }; - reg.File = PROGRAM_STATE_VAR; + reg.File = RC_FILE_CONSTANT; reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); - reg.Swizzle = SWIZZLE_WWWW; + reg.Swizzle = RC_SWIZZLE_WWWW; return reg; } @@ -47,7 +47,7 @@ static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int t * - extract operand swizzles * - introduce a temporary register when write masks are needed */ -GLboolean r300_transform_TEX( +int r300_transform_TEX( struct radeon_compiler * c, struct rc_instruction* inst, void* data) @@ -55,77 +55,77 @@ GLboolean r300_transform_TEX( struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; - if (inst->I.Opcode != OPCODE_TEX && - inst->I.Opcode != OPCODE_TXB && - inst->I.Opcode != OPCODE_TXP && - inst->I.Opcode != OPCODE_KIL) - return GL_FALSE; + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; /* ARB_shadow & EXT_shadow_funcs */ - if (inst->I.Opcode != OPCODE_KIL && - c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + if (inst->U.I.Opcode != RC_OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - inst->I.Opcode = OPCODE_MOV; + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; - if (comparefunc == GL_ALWAYS) { - inst->I.SrcReg[0].File = PROGRAM_BUILTIN; - inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; } else { - inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); + inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit); } - return GL_TRUE; + return 1; } else { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); int pass, fail; - inst_rcp->I.Opcode = OPCODE_RCP; - inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; - inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; - inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; - - inst_cmp->I.DstReg = inst->I.DstReg; - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = rc_find_free_temporary(c); - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; - - inst_mad->I.Opcode = OPCODE_MAD; - inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mad->I.DstReg.Index = rc_find_free_temporary(c); - inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; - inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; - inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; - inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; - inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_cmp->U.I.DstReg = inst->U.I.DstReg; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = rc_find_free_temporary(c); + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; + inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) + inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; else - inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - inst_cmp->I.Opcode = OPCODE_CMP; + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; /* DstReg has been filled out above */ - inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { pass = 1; fail = 2; } else { @@ -133,9 +133,9 @@ GLboolean r300_transform_TEX( fail = 1; } - inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; - inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; - inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); + inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; + inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; + inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit); } } @@ -143,52 +143,52 @@ GLboolean r300_transform_TEX( * instead of [0..Width]x[0..Height]. * Add a scaling instruction. */ - if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) { + if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.TexSrcTarget == RC_TEXTURE_RECT) { struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev); - inst_mul->I.Opcode = OPCODE_MUL; - inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mul->I.DstReg.Index = rc_find_free_temporary(c); - inst_mul->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR; - inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mul->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit); - reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mul->U.I.DstReg.Index; } /* Cannot write texture to output registers or with masks */ - if (inst->I.Opcode != OPCODE_KIL && - (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) { + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.DstReg.WriteMask != RC_MASK_XYZW)) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg = inst->I.DstReg; - inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ - if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mov->I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; } - return GL_TRUE; + return 1; } /* just some random things... */ diff --git a/r300/compiler/r300_fragprog.h b/r300/compiler/r300_fragprog.h index 0ac46db..418df36 100644 --- a/r300/compiler/r300_fragprog.h +++ b/r300/compiler/r300_fragprog.h @@ -33,9 +33,6 @@ #ifndef __R300_FRAGPROG_H_ #define __R300_FRAGPROG_H_ -#include "shader/program.h" -#include "shader/prog_instruction.h" - #include "radeon_compiler.h" #include "radeon_program.h" @@ -44,6 +41,6 @@ extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); -extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); +extern int r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); #endif diff --git a/r300/compiler/r300_fragprog_emit.c b/r300/compiler/r300_fragprog_emit.c index c7227bb..bbc0003 100644 --- a/r300/compiler/r300_fragprog_emit.c +++ b/r300/compiler/r300_fragprog_emit.c @@ -56,7 +56,6 @@ struct r300_emit_state { }; #define PROG_CODE \ - struct r300_emit_state * emit = (struct r300_emit_state*)data; \ struct r300_fragment_program_compiler *c = emit->compiler; \ struct r300_fragment_program_code *code = &c->code->code.r300 @@ -69,64 +68,76 @@ struct r300_emit_state { /** * Mark a temporary register as used. */ -static void use_temporary(struct r300_fragment_program_code *code, GLuint index) +static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) { if (index > code->pixsize) code->pixsize = index; } +static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src) +{ + if (src.File == RC_FILE_CONSTANT) { + return src.Index | (1 << 5); + } else if (src.File == RC_FILE_TEMPORARY) { + use_temporary(code, src.Index); + return src.Index; + } + + return 0; +} + -static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) +static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R300_ALU_OUTC_CMP; - case OPCODE_DP3: return R300_ALU_OUTC_DP3; - case OPCODE_DP4: return R300_ALU_OUTC_DP4; - case OPCODE_FRC: return R300_ALU_OUTC_FRC; + case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; + case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; + case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; + case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; default: error("translate_rgb_opcode(%i): Unknown opcode", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R300_ALU_OUTC_MAD; - case OPCODE_MAX: return R300_ALU_OUTC_MAX; - case OPCODE_MIN: return R300_ALU_OUTC_MIN; - case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; + case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; } } -static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) +static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R300_ALU_OUTA_CMP; - case OPCODE_DP3: return R300_ALU_OUTA_DP4; - case OPCODE_DP4: return R300_ALU_OUTA_DP4; - case OPCODE_EX2: return R300_ALU_OUTA_EX2; - case OPCODE_FRC: return R300_ALU_OUTA_FRC; - case OPCODE_LG2: return R300_ALU_OUTA_LG2; + case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; + case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; + case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; + case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; + case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; + case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; default: error("translate_rgb_opcode(%i): Unknown opcode", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R300_ALU_OUTA_MAD; - case OPCODE_MAX: return R300_ALU_OUTA_MAX; - case OPCODE_MIN: return R300_ALU_OUTA_MIN; - case OPCODE_RCP: return R300_ALU_OUTA_RCP; - case OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; + case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; + case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; } } /** * Emit one paired ALU instruction. */ -static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) +static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) { PROG_CODE; if (code->alu.length >= R300_PFS_MAX_ALU_INST) { error("Too many ALU instructions"); - return GL_FALSE; + return 0; } int ip = code->alu.length++; @@ -136,17 +147,13 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { - GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); - if (!inst->RGB.Src[j].Constant) - use_temporary(code, inst->RGB.Src[j].Index); + unsigned int src = use_source(code, inst->RGB.Src[j]); code->alu.inst[ip].rgb_addr |= src << (6*j); - src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); - if (!inst->Alpha.Src[j].Constant) - use_temporary(code, inst->Alpha.Src[j].Index); + src = use_source(code, inst->Alpha.Src[j]); code->alu.inst[ip].alpha_addr |= src << (6*j); - GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); arg |= inst->RGB.Arg[j].Abs << 6; arg |= inst->RGB.Arg[j].Negate << 5; code->alu.inst[ip].rgb_inst |= arg << (7*j); @@ -186,27 +193,27 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) if (inst->Alpha.DepthWriteMask) { code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; emit->node_flags |= R300_W_OUT; - c->code->writes_depth = GL_TRUE; + c->code->writes_depth = 1; } - return GL_TRUE; + return 1; } /** * Finish the current node without advancing to the next one. */ -static GLboolean finish_node(struct r300_emit_state * emit) +static int finish_node(struct r300_emit_state * emit) { struct r300_fragment_program_compiler * c = emit->compiler; struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ - struct radeon_pair_instruction inst; - _mesa_bzero(&inst, sizeof(inst)); + struct rc_pair_instruction inst; + memset(&inst, 0, sizeof(inst)); if (!emit_alu(emit, &inst)) - return GL_FALSE; + return 0; } unsigned alu_offset = emit->node_first_alu; @@ -217,7 +224,7 @@ static GLboolean finish_node(struct r300_emit_state * emit) if (code->tex.length == emit->node_first_tex) { if (emit->current_node > 0) { error("Node %i has no TEX instructions", emit->current_node); - return GL_FALSE; + return 0; } tex_end = 0; @@ -240,7 +247,7 @@ static GLboolean finish_node(struct r300_emit_state * emit) (tex_end << R300_TEX_SIZE_SHIFT) | emit->node_flags; - return GL_TRUE; + return 1; } @@ -248,79 +255,72 @@ static GLboolean finish_node(struct r300_emit_state * emit) * Begin a block of texture instructions. * Create the necessary indirection. */ -static GLboolean begin_tex(void* data) +static int begin_tex(struct r300_emit_state * emit) { PROG_CODE; if (code->alu.length == emit->node_first_alu && code->tex.length == emit->node_first_tex) { - return GL_TRUE; + return 1; } if (emit->current_node == 3) { error("Too many texture indirections"); - return GL_FALSE; + return 0; } if (!finish_node(emit)) - return GL_FALSE; + return 0; emit->current_node++; emit->node_first_tex = code->tex.length; emit->node_first_alu = code->alu.length; emit->node_flags = 0; - return GL_TRUE; + return 1; } -static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst) +static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) { PROG_CODE; if (code->tex.length >= R300_PFS_MAX_TEX_INST) { error("Too many TEX instructions"); - return GL_FALSE; + return 0; } - GLuint unit = inst->TexSrcUnit; - GLuint dest = inst->DestIndex; - GLuint opcode; + unsigned int unit = inst->U.I.TexSrcUnit; + unsigned int dest = inst->U.I.DstReg.Index; + unsigned int opcode; - switch(inst->Opcode) { - case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; - case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; - case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; - case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + switch(inst->U.I.Opcode) { + case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; default: - error("Unknown texture opcode %i", inst->Opcode); - return GL_FALSE; + error("Unknown texture opcode %i", inst->U.I.Opcode); + return 0; } - if (inst->Opcode == RADEON_OPCODE_KIL) { + if (inst->U.I.Opcode == RC_OPCODE_KIL) { unit = 0; dest = 0; } else { use_temporary(code, dest); } - use_temporary(code, inst->SrcIndex); + use_temporary(code, inst->U.I.SrcReg[0].Index); code->tex.inst[code->tex.length++] = - (inst->SrcIndex << R300_SRC_ADDR_SHIFT) | + (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | (dest << R300_DST_ADDR_SHIFT) | (unit << R300_TEX_ID_SHIFT) | (opcode << R300_TEX_INST_SHIFT); - return GL_TRUE; + return 1; } -static const struct radeon_pair_handler pair_handler = { - .EmitPaired = &emit_alu, - .EmitTex = &emit_tex, - .BeginTexBlock = &begin_tex, - .MaxHwTemps = R300_PFS_NUM_TEMP_REGS -}; - /** * Final compilation step: Turn the intermediate radeon_program into * machine-readable instructions. @@ -329,13 +329,31 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi { struct r300_emit_state emit; struct r300_fragment_program_code *code = &compiler->code->code.r300; + struct rc_instruction * inst; memset(&emit, 0, sizeof(emit)); emit.compiler = compiler; - _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); + memset(code, 0, sizeof(struct r300_fragment_program_code)); + + for(inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + begin_tex(&emit); + continue; + } + + emit_tex(&emit, inst); + } else { + emit_alu(&emit, &inst->U.P); + } + } + + if (code->pixsize >= R300_PFS_NUM_TEMP_REGS) + rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); - radeonPairProgram(compiler, &pair_handler, &emit); if (compiler->Base.Error) return; diff --git a/r300/compiler/r300_fragprog_swizzle.c b/r300/compiler/r300_fragprog_swizzle.c index 1b14cc3..cfa48a5 100644 --- a/r300/compiler/r300_fragprog_swizzle.c +++ b/r300/compiler/r300_fragprog_swizzle.c @@ -33,16 +33,17 @@ #include "r300_fragprog_swizzle.h" +#include <stdio.h> + #include "../r300_reg.h" -#include "radeon_nqssadce.h" #include "radeon_compiler.h" -#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) +#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) struct swizzle_data { - GLuint hash; /**< swizzle value this matches */ - GLuint base; /**< base value for hw swizzle */ - GLuint stride; /**< difference in base between arg0/1/2 */ + unsigned int hash; /**< swizzle value this matches */ + unsigned int base; /**< base value for hw swizzle */ + unsigned int stride; /**< difference in base between arg0/1/2 */ }; static const struct swizzle_data native_swizzles[] = { @@ -65,15 +66,15 @@ static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swi * Find a native RGB swizzle that matches the given swizzle. * Returns 0 if none found. */ -static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) +static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) { int i, comp; for(i = 0; i < num_native_swizzles; ++i) { const struct swizzle_data* sd = &native_swizzles[i]; for(comp = 0; comp < 3; ++comp) { - GLuint swz = GET_SWZ(swizzle, comp); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) continue; if (swz != GET_SWZ(sd->hash, comp)) break; @@ -90,71 +91,72 @@ static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) * Check whether the given instruction supports the swizzle and negate * combinations in the given source register. */ -GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { if (reg.Abs) - reg.Negate = NEGATE_NONE; + reg.Negate = RC_MASK_NONE; - if (opcode == OPCODE_KIL || - opcode == OPCODE_TEX || - opcode == OPCODE_TXB || - opcode == OPCODE_TXP) { + if (opcode == RC_OPCODE_KIL || + opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP) { int j; if (reg.Abs || reg.Negate) - return GL_FALSE; + return 0; for(j = 0; j < 4; ++j) { - GLuint swz = GET_SWZ(reg.Swizzle, j); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(reg.Swizzle, j); + if (swz == RC_SWIZZLE_UNUSED) continue; if (swz != j) - return GL_FALSE; + return 0; } - return GL_TRUE; + return 1; } - GLuint relevant = 0; + unsigned int relevant = 0; int j; for(j = 0; j < 3; ++j) - if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) + if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) relevant |= 1 << j; if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return GL_FALSE; + return 0; if (!lookup_native_swizzle(reg.Swizzle)) - return GL_FALSE; + return 0; - return GL_TRUE; + return 1; } -/** - * Generate MOV dst, src using only native swizzles. - */ -void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +static void r300_swizzle_split( + struct rc_src_register src, unsigned int mask, + struct rc_swizzle_split * split) { if (src.Abs) - src.Negate = NEGATE_NONE; + src.Negate = RC_MASK_NONE; + + split->NumPhases = 0; - while(dst.WriteMask) { + while(mask) { const struct swizzle_data *best_swizzle = 0; - GLuint best_matchcount = 0; - GLuint best_matchmask = 0; + unsigned int best_matchcount = 0; + unsigned int best_matchmask = 0; int i, comp; for(i = 0; i < num_native_swizzles; ++i) { const struct swizzle_data *sd = &native_swizzles[i]; - GLuint matchcount = 0; - GLuint matchmask = 0; + unsigned int matchcount = 0; + unsigned int matchmask = 0; for(comp = 0; comp < 3; ++comp) { - if (!GET_BIT(dst.WriteMask, comp)) + if (!GET_BIT(mask, comp)) continue; - GLuint swz = GET_SWZ(src.Swizzle, comp); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(src.Swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) continue; if (swz == GET_SWZ(sd->hash, comp)) { /* check if the negate bit of current component @@ -170,34 +172,35 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, best_swizzle = sd; best_matchcount = matchcount; best_matchmask = matchmask; - if (matchmask == (dst.WriteMask & WRITEMASK_XYZ)) + if (matchmask == (mask & RC_MASK_XYZ)) break; } } - struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); - inst->I.Opcode = OPCODE_MOV; - inst->I.DstReg = dst; - inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); - inst->I.SrcReg[0] = src; - inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; - /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ + if (mask & RC_MASK_W) + best_matchmask |= RC_MASK_W; - dst.WriteMask &= ~inst->I.DstReg.WriteMask; + split->Phase[split->NumPhases++] = best_matchmask; + mask &= ~best_matchmask; } } +struct rc_swizzle_caps r300_swizzle_caps = { + .IsNative = r300_swizzle_is_native, + .Split = r300_swizzle_split +}; + /** * Translate an RGB (XYZ) swizzle into the hardware code for the given * instruction source. */ -GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) { const struct swizzle_data* sd = lookup_native_swizzle(swizzle); if (!sd) { - _mesa_printf("Not a native swizzle: %08x\n", swizzle); + fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); return 0; } @@ -209,15 +212,15 @@ GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) * Translate an Alpha (W) swizzle into the hardware code for the given * instruction source. */ -GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle) +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) { if (swizzle < 3) return swizzle + 3*src; switch(swizzle) { - case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; - case SWIZZLE_ONE: return R300_ALU_ARGA_ONE; - case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; default: return R300_ALU_ARGA_ONE; } } diff --git a/r300/compiler/r300_fragprog_swizzle.h b/r300/compiler/r300_fragprog_swizzle.h index 231bf4e..118476a 100644 --- a/r300/compiler/r300_fragprog_swizzle.h +++ b/r300/compiler/r300_fragprog_swizzle.h @@ -28,15 +28,11 @@ #ifndef __R300_FRAGPROG_SWIZZLE_H_ #define __R300_FRAGPROG_SWIZZLE_H_ -#include "main/glheader.h" -#include "shader/prog_instruction.h" +#include "radeon_swizzle.h" -struct nqssadce_state; +extern struct rc_swizzle_caps r300_swizzle_caps; -GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); -void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - -GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle); -GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle); +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); #endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/r300/compiler/r3xx_fragprog.c b/r300/compiler/r3xx_fragprog.c index 76c3a7e..5581f25 100644 --- a/r300/compiler/r3xx_fragprog.c +++ b/r300/compiler/r3xx_fragprog.c @@ -22,22 +22,21 @@ #include "radeon_compiler.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_statevars.h" +#include <stdio.h> -#include "radeon_nqssadce.h" +#include "radeon_dataflow.h" #include "radeon_program_alu.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" #include "r500_fragprog.h" -static void nqssadce_init(struct nqssadce_state* s) +static void dataflow_outputs_mark_use(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) { - struct r300_fragment_program_compiler * c = s->UserData; - s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW; - s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W; + struct r300_fragment_program_compiler * c = userdata; + callback(data, c->OutputColor, RC_MASK_XYZW); + callback(data, c->OutputDepth, RC_MASK_W); } static void rewrite_depth_out(struct r300_fragment_program_compiler * c) @@ -45,35 +44,35 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c) struct rc_instruction *rci; for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { - struct prog_instruction * inst = &rci->I; + struct rc_sub_instruction * inst = &rci->U.I; - if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth) + if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) continue; - if (inst->DstReg.WriteMask & WRITEMASK_Z) { - inst->DstReg.WriteMask = WRITEMASK_W; + if (inst->DstReg.WriteMask & RC_MASK_Z) { + inst->DstReg.WriteMask = RC_MASK_W; } else { inst->DstReg.WriteMask = 0; continue; } switch (inst->Opcode) { - case OPCODE_FRC: - case OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + case RC_OPCODE_FRC: + case RC_OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + case RC_OPCODE_ADD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); break; - case OPCODE_CMP: - case OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + case RC_OPCODE_CMP: + case RC_OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]); break; default: // Scalar instructions needn't be reswizzled @@ -89,11 +88,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) if (c->is_r500) { struct radeon_program_transformation transformations[] = { { &r500_transform_TEX, c }, + { &r500_transform_IF, 0 }, { &radeonTransformALU, 0 }, { &radeonTransformDeriv, 0 }, { &radeonTransformTrigScale, 0 } }; - radeonLocalTransform(&c->Base, 4, transformations); + radeonLocalTransform(&c->Base, 5, transformations); + + c->Base.SwizzleCaps = &r500_swizzle_caps; } else { struct radeon_program_transformation transformations[] = { { &r300_transform_TEX, c }, @@ -101,32 +103,66 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { &radeonTransformTrigSimple, 0 } }; radeonLocalTransform(&c->Base, 3, transformations); + + c->Base.SwizzleCaps = &r300_swizzle_caps; } if (c->Base.Debug) { - _mesa_printf("Fragment Program: After native rewrite:\n"); + fprintf(stderr, "Fragment Program: After native rewrite:\n"); rc_print_program(&c->Base.Program); fflush(stderr); } - if (c->is_r500) { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle - }; - radeonNqssaDce(&c->Base, &nqssadce, c); - } else { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle - }; - radeonNqssaDce(&c->Base, &nqssadce, c); + rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c); + if (c->Base.Error) + return; + + if (c->Base.Debug) { + fprintf(stderr, "Fragment Program: After deadcode:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); + } + + rc_dataflow_swizzles(&c->Base); + if (c->Base.Error) + return; + + if (c->Base.Debug) { + fprintf(stderr, "Compiler: after dataflow passes:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); + } + + rc_pair_translate(c); + if (c->Base.Error) + return; + + if (c->Base.Debug) { + fprintf(stderr, "Compiler: after pair translate:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); } + rc_pair_schedule(c); + if (c->Base.Error) + return; + + if (c->Base.Debug) { + fprintf(stderr, "Compiler: after pair scheduling:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); + } + + if (c->is_r500) + rc_pair_regalloc(c, 128); + else + rc_pair_regalloc(c, R300_PFS_NUM_TEMP_REGS); + + if (c->Base.Error) + return; + if (c->Base.Debug) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); + fprintf(stderr, "Compiler: after pair register allocation:\n"); rc_print_program(&c->Base.Program); fflush(stderr); } diff --git a/r300/compiler/r3xx_vertprog.c b/r300/compiler/r3xx_vertprog.c index dad27fc..1b2cb8d 100644 --- a/r300/compiler/r3xx_vertprog.c +++ b/r300/compiler/r3xx_vertprog.c @@ -22,13 +22,13 @@ #include "radeon_compiler.h" +#include <stdio.h> + #include "../r300_reg.h" -#include "radeon_nqssadce.h" -#include "radeon_program.h" +#include "radeon_dataflow.h" #include "radeon_program_alu.h" - -#include "shader/prog_print.h" +#include "radeon_swizzle.h" /* @@ -42,104 +42,83 @@ t_swizzle(y), \ t_swizzle(y), \ t_src_class(vpi->SrcReg[x].File), \ - NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) -static unsigned long t_dst_mask(GLuint mask) +static unsigned long t_dst_mask(unsigned int mask) { - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & WRITEMASK_XYZW; + /* RC_MASK_* is equivalent to VSF_FLAG_* */ + return mask & RC_MASK_XYZW; } -static unsigned long t_dst_class(gl_register_file file) +static unsigned long t_dst_class(rc_register_file file) { - switch (file) { - case PROGRAM_TEMPORARY: + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_TEMPORARY: return PVS_DST_REG_TEMPORARY; - case PROGRAM_OUTPUT: + case RC_FILE_OUTPUT: return PVS_DST_REG_OUT; - case PROGRAM_ADDRESS: + case RC_FILE_ADDRESS: return PVS_DST_REG_A0; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; } } static unsigned long t_dst_index(struct r300_vertex_program_code *vp, - struct prog_dst_register *dst) + struct rc_dst_register *dst) { - if (dst->File == PROGRAM_OUTPUT) + if (dst->File == RC_FILE_OUTPUT) return vp->outputs[dst->Index]; return dst->Index; } -static unsigned long t_src_class(gl_register_file file) +static unsigned long t_src_class(rc_register_file file) { switch (file) { - case PROGRAM_BUILTIN: - case PROGRAM_TEMPORARY: + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_NONE: + case RC_FILE_TEMPORARY: return PVS_SRC_REG_TEMPORARY; - case PROGRAM_INPUT: + case RC_FILE_INPUT: return PVS_SRC_REG_INPUT; - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - case PROGRAM_STATE_VAR: + case RC_FILE_CONSTANT: return PVS_SRC_REG_CONSTANT; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; } } -static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b) +static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) { unsigned long aclass = t_src_class(a.File); unsigned long bclass = t_src_class(b.File); if (aclass != bclass) - return GL_FALSE; + return 0; if (aclass == PVS_SRC_REG_TEMPORARY) - return GL_FALSE; + return 0; if (a.RelAddr || b.RelAddr) - return GL_TRUE; + return 1; if (a.Index != b.Index) - return GL_TRUE; + return 1; - return GL_FALSE; + return 0; } -static INLINE unsigned long t_swizzle(GLubyte swizzle) +static inline unsigned long t_swizzle(unsigned int swizzle) { - /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ return swizzle; } static unsigned long t_src_index(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - if (src->File == PROGRAM_INPUT) { + if (src->File == RC_FILE_INPUT) { assert(vp->inputs[src->Index] != -1); return vp->inputs[src->Index]; } else { @@ -155,9 +134,9 @@ static unsigned long t_src_index(struct r300_vertex_program_code *vp, /* these two functions should probably be merged... */ static unsigned long t_src(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - /* src->Negate uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -170,9 +149,9 @@ static unsigned long t_src(struct r300_vertex_program_code *vp, } static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - /* src->Negate uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -181,79 +160,79 @@ static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src->Negate ? NEGATE_XYZW : NEGATE_NONE) | + src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (src->RelAddr << 4); } -static GLboolean valid_dst(struct r300_vertex_program_code *vp, - struct prog_dst_register *dst) +static int valid_dst(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) { - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { + if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { + return 0; + } else if (dst->File == RC_FILE_ADDRESS) { assert(dst->Index == 0); } - return GL_TRUE; + return 1; } static void ei_vector1(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_FALSE, - GL_FALSE, + 0, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); } static void ei_vector2(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_FALSE, - GL_FALSE, + 0, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src(vp, &vpi->SrcReg[0]); inst[2] = t_src(vp, &vpi->SrcReg[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); + inst[3] = __CONST(1, RC_SWIZZLE_ZERO); } static void ei_math1(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); } static void ei_lit(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); @@ -263,27 +242,27 @@ static void ei_lit(struct r300_vertex_program_code *vp, PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); } static void ei_mad(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { /* Remarks about hardware limitations of MAD * (please preserve this comment, as this information is _NOT_ @@ -311,22 +290,22 @@ static void ei_mad(struct r300_vertex_program_code *vp, * according to AMD docs, this should improve performance by one clock * as a nice side bonus. */ - if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY && - vpi->SrcReg[1].File == PROGRAM_TEMPORARY && - vpi->SrcReg[2].File == PROGRAM_TEMPORARY && + if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && + vpi->SrcReg[1].File == RC_FILE_TEMPORARY && + vpi->SrcReg[2].File == RC_FILE_TEMPORARY && vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - GL_FALSE, - GL_TRUE, + 0, + 1, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); } else { inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, + 0, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); @@ -337,17 +316,17 @@ static void ei_mad(struct r300_vertex_program_code *vp, } static void ei_pow(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } @@ -362,8 +341,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi compiler->SetHwInputOutput(compiler); for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { - struct prog_instruction *vpi = &rci->I; - GLuint *inst = compiler->code->body.d + compiler->code->length; + struct rc_sub_instruction *vpi = &rci->U.I; + unsigned int *inst = compiler->code->body.d + compiler->code->length; /* Skip instructions writing to non-existing destination */ if (!valid_dst(compiler->code, &vpi->DstReg)) @@ -375,26 +354,26 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi } switch (vpi->Opcode) { - case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; - case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; - case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; - case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; - case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; - case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; - case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; - case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; - case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; - case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; - case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; - case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; - case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; - case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; - case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; - case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; - case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; - case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; - case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; - case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; default: rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode); return; @@ -408,38 +387,37 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi } struct temporary_allocation { - GLuint Allocated:1; - GLuint HwTemp:15; + unsigned int Allocated:1; + unsigned int HwTemp:15; struct rc_instruction * LastRead; }; static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) { struct rc_instruction *inst; - GLuint num_orig_temps = 0; - GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + unsigned int num_orig_temps = 0; + char hwtemps[VSF_MAX_FRAGMENT_TEMPS]; struct temporary_allocation * ta; - GLuint i, j; + unsigned int i, j; compiler->code->num_temporaries = 0; memset(hwtemps, 0, sizeof(hwtemps)); /* Pass 1: Count original temporaries and allocate structures */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); - GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { - if (inst->I.SrcReg[i].Index >= num_orig_temps) - num_orig_temps = inst->I.SrcReg[i].Index + 1; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + if (inst->U.I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->U.I.SrcReg[i].Index + 1; } } - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { - if (inst->I.DstReg.Index >= num_orig_temps) - num_orig_temps = inst->I.DstReg.Index + 1; + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + if (inst->U.I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->U.I.DstReg.Index + 1; } } } @@ -450,32 +428,31 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) - ta[inst->I.SrcReg[i].Index].LastRead = inst; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) + ta[inst->U.I.SrcReg[i].Index].LastRead = inst; } } /* Pass 3: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); - GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { - GLuint orig = inst->I.SrcReg[i].Index; - inst->I.SrcReg[i].Index = ta[orig].HwTemp; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; if (ta[orig].Allocated && inst == ta[orig].LastRead) - hwtemps[ta[orig].HwTemp] = GL_FALSE; + hwtemps[ta[orig].HwTemp] = 0; } } - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { - GLuint orig = inst->I.DstReg.Index; + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.DstReg.Index; if (!ta[orig].Allocated) { for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { @@ -485,16 +462,16 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c if (j >= VSF_MAX_FRAGMENT_TEMPS) { fprintf(stderr, "Out of hw temporaries\n"); } else { - ta[orig].Allocated = GL_TRUE; + ta[orig].Allocated = 1; ta[orig].HwTemp = j; - hwtemps[j] = GL_TRUE; + hwtemps[j] = 1; if (j >= compiler->code->num_temporaries) compiler->code->num_temporaries = j + 1; } } - inst->I.DstReg.Index = ta[orig].HwTemp; + inst->U.I.DstReg.Index = ta[orig].HwTemp; } } } @@ -505,45 +482,45 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c * Vertex engine cannot read two inputs or two constants at the same time. * Introduce intermediate MOVs to temporary registers to account for this. */ -static GLboolean transform_source_conflicts( +static int transform_source_conflicts( struct radeon_compiler *c, struct rc_instruction* inst, void* unused) { - GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (num_operands == 3) { - if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2]) - || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) { + if (opcode->NumSrcRegs == 3) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) + || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mov->I.DstReg.Index = tmpreg; - inst_mov->I.SrcReg[0] = inst->I.SrcReg[2]; - - reset_srcreg(&inst->I.SrcReg[2]); - inst->I.SrcReg[2].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[2].Index = tmpreg; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + + reset_srcreg(&inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[2].Index = tmpreg; } } - if (num_operands >= 2) { - if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) { + if (opcode->NumSrcRegs >= 2) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mov->I.DstReg.Index = tmpreg; - inst_mov->I.SrcReg[0] = inst->I.SrcReg[1]; - - reset_srcreg(&inst->I.SrcReg[1]); - inst->I.SrcReg[1].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[1].Index = tmpreg; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + + reset_srcreg(&inst->U.I.SrcReg[1]); + inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[1].Index = tmpreg; } } - return GL_TRUE; + return 1; } static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) @@ -554,44 +531,52 @@ static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) if ((compiler->RequiredOutputs & (1 << i)) && !(compiler->Base.Program.OutputsWritten & (1 << i))) { struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); - inst->I.Opcode = OPCODE_MOV; + inst->U.I.Opcode = RC_OPCODE_MOV; - inst->I.DstReg.File = PROGRAM_OUTPUT; - inst->I.DstReg.Index = i; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = i; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - inst->I.SrcReg[0].File = PROGRAM_CONSTANT; - inst->I.SrcReg[0].Index = 0; - inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; + inst->U.I.SrcReg[0].Index = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; compiler->Base.Program.OutputsWritten |= 1 << i; } } } -static void nqssadceInit(struct nqssadce_state* s) +static void dataflow_outputs_mark_used(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) { - struct r300_vertex_program_compiler * compiler = s->UserData; + struct r300_vertex_program_compiler * c = userdata; int i; - for(i = 0; i < VERT_RESULT_MAX; ++i) { - if (compiler->RequiredOutputs & (1 << i)) - s->Outputs[i].Sourced = WRITEMASK_XYZW; + for(i = 0; i < 32; ++i) { + if (c->RequiredOutputs & (1 << i)) + callback(data, i, RC_MASK_XYZW); } } -static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { (void) opcode; (void) reg; - return GL_TRUE; + return 1; } +static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { + .IsNative = &swizzle_is_native, + .Split = 0 /* should never be called */ +}; + void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { + compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; + addArtificialOutputs(compiler); { @@ -624,22 +609,22 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) fflush(stderr); } - { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadceInit, - .IsNativeSwizzle = &swizzleIsNative, - .BuildSwizzle = NULL - }; - radeonNqssaDce(&compiler->Base, &nqssadce, compiler); + rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after deadcode:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stderr); + } - /* We need this step for reusing temporary registers */ - allocate_temporary_registers(compiler); + rc_dataflow_swizzles(&compiler->Base); - if (compiler->Base.Debug) { - fprintf(stderr, "Vertex program after NQSSADCE:\n"); - rc_print_program(&compiler->Base.Program); - fflush(stderr); - } + allocate_temporary_registers(compiler); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after dataflow:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stderr); } translate_vertex_program(compiler); diff --git a/r300/compiler/r3xx_vertprog_dump.c b/r300/compiler/r3xx_vertprog_dump.c index 980ef3e..66f9b05 100644 --- a/r300/compiler/r3xx_vertprog_dump.c +++ b/r300/compiler/r3xx_vertprog_dump.c @@ -146,7 +146,7 @@ static void r300_vs_op_dump(uint32_t op) static void r300_vs_src_dump(uint32_t src) { fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", - (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3], + (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3], src & (1 << 25) ? "-" : " ", r300_vs_swiz_debug[(src >> 13) & 0x7], src & (1 << 26) ? "-" : " ", diff --git a/r300/compiler/r500_fragprog.c b/r300/compiler/r500_fragprog.c index 7e2faed..d87acec 100644 --- a/r300/compiler/r500_fragprog.c +++ b/r300/compiler/r500_fragprog.c @@ -27,15 +27,17 @@ #include "r500_fragprog.h" +#include <stdio.h> + #include "../r300_reg.h" -static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) +static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu) { - struct prog_src_register reg = { 0, }; + struct rc_src_register reg = { 0, }; - reg.File = PROGRAM_STATE_VAR; + reg.File = RC_FILE_CONSTANT; reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); - reg.Swizzle = SWIZZLE_WWWW; + reg.Swizzle = RC_SWIZZLE_WWWW; return reg; } @@ -44,7 +46,7 @@ static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int t * - implement texture compare (shadow extensions) * - extract non-native source / destination operands */ -GLboolean r500_transform_TEX( +int r500_transform_TEX( struct radeon_compiler * c, struct rc_instruction * inst, void* data) @@ -52,77 +54,77 @@ GLboolean r500_transform_TEX( struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; - if (inst->I.Opcode != OPCODE_TEX && - inst->I.Opcode != OPCODE_TXB && - inst->I.Opcode != OPCODE_TXP && - inst->I.Opcode != OPCODE_KIL) - return GL_FALSE; + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; /* ARB_shadow & EXT_shadow_funcs */ - if (inst->I.Opcode != OPCODE_KIL && - c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + if (inst->U.I.Opcode != RC_OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - inst->I.Opcode = OPCODE_MOV; + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; - if (comparefunc == GL_ALWAYS) { - inst->I.SrcReg[0].File = PROGRAM_BUILTIN; - inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; } else { - inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); + inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit); } - return GL_TRUE; + return 1; } else { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); int pass, fail; - inst_rcp->I.Opcode = OPCODE_RCP; - inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; - inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; - inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; - - inst_cmp->I.DstReg = inst->I.DstReg; - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = rc_find_free_temporary(c); - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; - - inst_mad->I.Opcode = OPCODE_MAD; - inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mad->I.DstReg.Index = rc_find_free_temporary(c); - inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; - inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; - inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; - inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; - inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_cmp->U.I.DstReg = inst->U.I.DstReg; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = rc_find_free_temporary(c); + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; + inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) + inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; else - inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - inst_cmp->I.Opcode = OPCODE_CMP; + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; /* DstReg has been filled out above */ - inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { pass = 1; fail = 2; } else { @@ -130,131 +132,161 @@ GLboolean r500_transform_TEX( fail = 1; } - inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; - inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; - inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); + inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; + inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; + inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit); } } /* Cannot write texture to output registers */ - if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) { + if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg = inst->I.DstReg; - inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ - if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mov->I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; } - return GL_TRUE; + return 1; +} + +/** + * Rewrite IF instructions to use the ALU result special register. + */ +int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + if (inst->U.I.Opcode != RC_OPCODE_IF) + return 0; + + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.WriteMask = 0; + inst_mov->U.I.WriteALUResult = RC_ALURESULT_W; + inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X); + + inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL; + inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[0].Negate = 0; + + return 1; } -GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { - GLuint relevant; + unsigned int relevant; int i; - if (opcode == OPCODE_TEX || - opcode == OPCODE_TXB || - opcode == OPCODE_TXP || - opcode == OPCODE_KIL) { + if (opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP || + opcode == RC_OPCODE_KIL) { if (reg.Abs) - return GL_FALSE; + return 0; - if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE)) - return GL_FALSE; + if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) + return 0; if (reg.Negate) - reg.Negate ^= NEGATE_XYZW; + reg.Negate ^= RC_MASK_XYZW; for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz == SWIZZLE_NIL) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED) { reg.Negate &= ~(1 << i); continue; } if (swz >= 4) - return GL_FALSE; + return 0; } if (reg.Negate) - return GL_FALSE; + return 0; - return GL_TRUE; - } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { + return 1; + } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; * if it doesn't fit perfectly into a .xyzw case... */ - if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) - return GL_TRUE; + if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) + return 1; - return GL_FALSE; + return 0; } else { /* ALU instructions support almost everything */ if (reg.Abs) - return GL_TRUE; + return 1; relevant = 0; for(i = 0; i < 3; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) relevant |= 1 << i; } if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return GL_FALSE; + return 0; - return GL_TRUE; + return 1; } } /** - * Implement a MOV with a potentially non-native swizzle. + * Split source register access. * * The only thing we *cannot* do in an ALU instruction is per-component - * negation. Therefore, we split the MOV into two instructions when necessary. + * negation. */ -void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask, + struct rc_swizzle_split * split) { - GLuint negatebase[2] = { 0, 0 }; + unsigned int negatebase[2] = { 0, 0 }; int i; for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(src.Swizzle, i); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(src.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) continue; negatebase[GET_BIT(src.Negate, i)] |= 1 << i; } + split->NumPhases = 0; + for(i = 0; i <= 1; ++i) { if (!negatebase[i]) continue; - struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); - inst->I.Opcode = OPCODE_MOV; - inst->I.DstReg = dst; - inst->I.DstReg.WriteMask = negatebase[i]; - inst->I.SrcReg[0] = src; - inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; + split->Phase[split->NumPhases++] = negatebase[i]; } } +struct rc_swizzle_caps r500_swizzle_caps = { + .IsNative = r500_swizzle_is_native, + .Split = r500_swizzle_split +}; static char *toswiz(int swiz_val) { switch(swiz_val) { diff --git a/r300/compiler/r500_fragprog.h b/r300/compiler/r500_fragprog.h index 9091f65..0918cdf 100644 --- a/r300/compiler/r500_fragprog.h +++ b/r300/compiler/r500_fragprog.h @@ -33,21 +33,21 @@ #ifndef __R500_FRAGPROG_H_ #define __R500_FRAGPROG_H_ -#include "shader/prog_parameter.h" -#include "shader/prog_instruction.h" - #include "radeon_compiler.h" -#include "radeon_nqssadce.h" +#include "radeon_swizzle.h" extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); -extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); +extern struct rc_swizzle_caps r500_swizzle_caps; -extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); +extern int r500_transform_TEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); -extern GLboolean r500_transform_TEX( +extern int r500_transform_IF( struct radeon_compiler * c, struct rc_instruction * inst, void* data); diff --git a/r300/compiler/r500_fragprog_emit.c b/r300/compiler/r500_fragprog_emit.c index d694725..2942267 100644 --- a/r300/compiler/r500_fragprog_emit.c +++ b/r300/compiler/r500_fragprog_emit.c @@ -37,10 +37,6 @@ * * \author Corbin Simpson <MostAwesomeDude@gmail.com> * - * \todo Depth write, WPOS/FOGC inputs - * - * \todo FogOption - * */ #include "r500_fragprog.h" @@ -51,7 +47,6 @@ #define PROG_CODE \ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ struct r500_fragment_program_code *code = &c->code->code.r500 #define error(fmt, args...) do { \ @@ -60,63 +55,80 @@ } while(0) -static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode) +struct branch_info { + int If; + int Else; + int Endif; +}; + +struct emit_state { + struct radeon_compiler * C; + struct r500_fragment_program_code * Code; + + struct branch_info * Branches; + unsigned int CurrentBranchDepth; + unsigned int BranchesReserved; + + unsigned int MaxBranchDepth; +}; + +static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; - case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; - case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; - case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; - case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; - case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; default: error("translate_rgb_op(%d): unknown opcode\n", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; - case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; - case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; - case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; } } -static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode) +static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R500_ALPHA_OP_CMP; - case OPCODE_COS: return R500_ALPHA_OP_COS; - case OPCODE_DDX: return R500_ALPHA_OP_MDH; - case OPCODE_DDY: return R500_ALPHA_OP_MDV; - case OPCODE_DP3: return R500_ALPHA_OP_DP; - case OPCODE_DP4: return R500_ALPHA_OP_DP; - case OPCODE_EX2: return R500_ALPHA_OP_EX2; - case OPCODE_FRC: return R500_ALPHA_OP_FRC; - case OPCODE_LG2: return R500_ALPHA_OP_LN2; + case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; + case RC_OPCODE_COS: return R500_ALPHA_OP_COS; + case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; + case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; + case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; + case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; default: error("translate_alpha_op(%d): unknown opcode\n", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R500_ALPHA_OP_MAD; - case OPCODE_MAX: return R500_ALPHA_OP_MAX; - case OPCODE_MIN: return R500_ALPHA_OP_MIN; - case OPCODE_RCP: return R500_ALPHA_OP_RCP; - case OPCODE_RSQ: return R500_ALPHA_OP_RSQ; - case OPCODE_SIN: return R500_ALPHA_OP_SIN; + case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; + case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; + case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; } } -static GLuint fix_hw_swizzle(GLuint swz) +static unsigned int fix_hw_swizzle(unsigned int swz) { if (swz == 5) swz = 6; - if (swz == SWIZZLE_NIL) swz = 4; + if (swz == RC_SWIZZLE_UNUSED) swz = 4; return swz; } -static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) +static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) { - GLuint t = inst->RGB.Arg[arg].Source; + unsigned int t = inst->RGB.Arg[arg].Source; int comp; t |= inst->RGB.Arg[arg].Negate << 11; t |= inst->RGB.Arg[arg].Abs << 12; @@ -127,39 +139,57 @@ static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) return t; } -static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i) +static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) { - GLuint t = inst->Alpha.Arg[i].Source; + unsigned int t = inst->Alpha.Arg[i].Source; t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; t |= inst->Alpha.Arg[i].Negate << 5; t |= inst->Alpha.Arg[i].Abs << 6; return t; } -static void use_temporary(struct r500_fragment_program_code* code, GLuint index) +static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) +{ + switch(func) { + case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; + case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; + case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; + case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; + default: + rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); + return 0; + } +} + +static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) { if (index > code->max_temp_idx) code->max_temp_idx = index; } -static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) +static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) { - if (!src.Constant) + if (src.File == RC_FILE_CONSTANT) { + return src.Index | 0x100; + } else if (src.File == RC_FILE_TEMPORARY) { use_temporary(code, src.Index); - return src.Index | src.Constant << 8; + return src.Index; + } + + return 0; } /** * Emit a paired ALU instruction. */ -static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) +static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) { PROG_CODE; if (code->inst_end >= 511) { error("emit_alu: Too many instructions"); - return GL_FALSE; + return; } int ip = ++code->inst_end; @@ -167,17 +197,22 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); - if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { code->inst[ip].inst0 = R500_INST_TYPE_OUT; - else + if (inst->WriteALUResult) { + error("%s: cannot write output and ALU result at the same time"); + return; + } + } else { code->inst[ip].inst0 = R500_INST_TYPE_ALU; + } code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); if (inst->Alpha.DepthWriteMask) { code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; - c->code->writes_depth = GL_TRUE; + c->code->writes_depth = 1; } code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); @@ -206,12 +241,21 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; - return GL_TRUE; + if (inst->WriteALUResult) { + code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; + + if (inst->WriteALUResult == RC_ALURESULT_X) + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; + else + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; + + code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); + } } -static GLuint translate_strq_swizzle(GLuint swizzle) +static unsigned int translate_strq_swizzle(unsigned int swizzle) { - GLuint swiz = 0; + unsigned int swiz = 0; int i; for (i = 0; i < 4; i++) swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; @@ -221,67 +265,194 @@ static GLuint translate_strq_swizzle(GLuint swizzle) /** * Emit a single TEX instruction */ -static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst) +static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) { PROG_CODE; if (code->inst_end >= 511) { error("emit_tex: Too many instructions"); - return GL_FALSE; + return 0; } int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_TEX - | (inst->WriteMask << 11) + | (inst->DstReg.WriteMask << 11) | R500_INST_TEX_SEM_WAIT; code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) - code->inst[ip].inst1 |= R500_TEX_UNSCALED; + if (inst->TexSrcTarget == RC_TEXTURE_RECT) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; switch (inst->Opcode) { - case RADEON_OPCODE_KIL: + case RC_OPCODE_KIL: code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; break; - case RADEON_OPCODE_TEX: + case RC_OPCODE_TEX: code->inst[ip].inst1 |= R500_TEX_INST_LD; break; - case RADEON_OPCODE_TXB: + case RC_OPCODE_TXB: code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; break; - case RADEON_OPCODE_TXP: + case RC_OPCODE_TXP: code->inst[ip].inst1 |= R500_TEX_INST_PROJ; break; default: error("emit_tex can't handle opcode %x\n", inst->Opcode); } - code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex) - | (translate_strq_swizzle(inst->SrcSwizzle) << 8) - | R500_TEX_DST_ADDR(inst->DestIndex) + use_temporary(code, inst->SrcReg[0].Index); + if (inst->Opcode != RC_OPCODE_KIL) + use_temporary(code, inst->DstReg.Index); + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - return GL_TRUE; + return 1; } -static const struct radeon_pair_handler pair_handler = { - .EmitPaired = emit_paired, - .EmitTex = emit_tex, - .MaxHwTemps = 128 -}; +static void grow_branches(struct emit_state * s) +{ + unsigned int newreserved = s->BranchesReserved * 2; + struct branch_info * newbranches; + + if (!newreserved) + newreserved = 4; + + newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info)); + memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info)); + + s->Branches = newbranches; + s->BranchesReserved = newreserved; +} + +static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) +{ + if (s->Code->inst_end >= 511) { + rc_error(s->C, "emit_tex: Too many instructions"); + return; + } + + unsigned int newip = ++s->Code->inst_end; + + s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; + + if (inst->U.I.Opcode == RC_OPCODE_IF) { + if (s->CurrentBranchDepth >= 32) { + rc_error(s->C, "Branch depth exceeds hardware limit"); + return; + } + + if (s->CurrentBranchDepth >= s->BranchesReserved) + grow_branches(s); + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++]; + branch->If = newip; + branch->Else = -1; + branch->Endif = -1; + + if (s->CurrentBranchDepth > s->MaxBranchDepth) + s->MaxBranchDepth = s->CurrentBranchDepth; + + /* actual instruction is filled in at ENDIF time */ + } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) { + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Else = newip; + + /* actual instruction is filled in at ENDIF time */ + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Endif = newip; + + s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ + | R500_FC_B_OP0_INCR /* increment branch counter if stay */ + ; + + if (branch->Else >= 0) { + /* increment branch counter also if jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); + + s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_B_ELSE /* all active pixels want to jump */ + | R500_FC_B_OP0_NONE /* no counter op if stay */ + | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } else { + /* don't touch branch counter on jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + + s->CurrentBranchDepth--; + } else { + rc_error(s->C, "%s: unknown opcode %i\n", __FUNCTION__, inst->U.I.Opcode); + } +} void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { + struct emit_state s; struct r500_fragment_program_code *code = &compiler->code->code.r500; + struct rc_instruction * inst; + + memset(&s, 0, sizeof(s)); + s.C = &compiler->Base; + s.Code = code; - _mesa_bzero(code, sizeof(*code)); + memset(code, 0, sizeof(*code)); code->max_temp_idx = 1; code->inst_end = -1; - radeonPairProgram(compiler, &pair_handler, compiler); + for (inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->IsFlowControl) { + emit_flowcontrol(&s, inst); + } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + continue; + } else { + emit_tex(compiler, &inst->U.I); + } + } else { + emit_paired(compiler, &inst->U.P); + } + } + + if (code->max_temp_idx >= 128) + rc_error(&compiler->Base, "Too many hardware temporaries used"); + if (compiler->Base.Error) return; @@ -296,4 +467,11 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } + + if (s.MaxBranchDepth >= 4) { + if (code->max_temp_idx < 1) + code->max_temp_idx = 1; + + code->us_fc_ctrl |= R500_FC_FULL_FC_EN; + } } diff --git a/r300/compiler/radeon_code.c b/r300/compiler/radeon_code.c index a9dedf7..853b2be 100644 --- a/r300/compiler/radeon_code.c +++ b/r300/compiler/radeon_code.c @@ -25,11 +25,13 @@ * */ -#include "main/mtypes.h" -#include "shader/prog_instruction.h" - #include "radeon_code.h" +#include <stdlib.h> +#include <string.h> + +#include "radeon_program.h" + void rc_constants_init(struct rc_constant_list * c) { memset(c, 0, sizeof(*c)); @@ -138,13 +140,13 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da unsigned index; int free_index = -1; struct rc_constant constant; - unsigned comp; for(index = 0; index < c->Count; ++index) { if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + unsigned comp; for(comp = 0; comp < c->Constants[index].Size; ++comp) { if (c->Constants[index].u.Immediate[comp] == data) { - *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp); return index; } } @@ -155,9 +157,9 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da } if (free_index >= 0) { - comp = c->Constants[free_index].Size++; + unsigned comp = c->Constants[free_index].Size++; c->Constants[free_index].u.Immediate[comp] = data; - *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp); return free_index; } @@ -165,7 +167,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da constant.Type = RC_CONSTANT_IMMEDIATE; constant.Size = 1; constant.u.Immediate[0] = data; - *swizzle = SWIZZLE_XXXX; + *swizzle = RC_SWIZZLE_XXXX; return rc_constants_add(c, &constant); } diff --git a/r300/compiler/radeon_code.h b/r300/compiler/radeon_code.h index 3e88554..902b7cf 100644 --- a/r300/compiler/radeon_code.h +++ b/r300/compiler/radeon_code.h @@ -89,6 +89,23 @@ unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const floa unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); /** + * Compare functions. + * + * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you + * the correct GL compare function. + */ +typedef enum { + RC_COMPARE_FUNC_NEVER = 0, + RC_COMPARE_FUNC_LESS, + RC_COMPARE_FUNC_EQUAL, + RC_COMPARE_FUNC_LEQUAL, + RC_COMPARE_FUNC_GREATER, + RC_COMPARE_FUNC_NOTEQUAL, + RC_COMPARE_FUNC_GEQUAL, + RC_COMPARE_FUNC_ALWAYS +} rc_compare_func; + +/** * Stores state that influences the compilation of a fragment program. */ struct r300_fragment_program_external_state { @@ -105,10 +122,12 @@ struct r300_fragment_program_external_state { /** * If the sampler is used as a shadow sampler, - * this field is (texture_compare_func - GL_NEVER). - * [e.g. if compare function is GL_LEQUAL, this field is 3] + * this field specifies the compare function. + * + * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). * * Otherwise, this field is 0. + * \sa rc_compare_func */ unsigned texture_compare_func : 3; } unit[16]; @@ -163,6 +182,8 @@ struct r500_fragment_program_code { int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ int max_temp_idx; + + uint32_t us_fc_ctrl; }; struct rX00_fragment_program_code { diff --git a/r300/compiler/radeon_compiler.c b/r300/compiler/radeon_compiler.c index da950d5..c0e7a7f 100644 --- a/r300/compiler/radeon_compiler.c +++ b/r300/compiler/radeon_compiler.c @@ -23,6 +23,8 @@ #include "radeon_compiler.h" #include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> #include "radeon_program.h" @@ -34,7 +36,7 @@ void rc_init(struct radeon_compiler * c) memory_pool_init(&c->Pool); c->Program.Instructions.Prev = &c->Program.Instructions; c->Program.Instructions.Next = &c->Program.Instructions; - c->Program.Instructions.I.Opcode = OPCODE_END; + c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; } void rc_destroy(struct radeon_compiler * c) @@ -60,7 +62,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...) { va_list ap; - c->Error = GL_TRUE; + c->Error = 1; if (!c->ErrorMsg) { /* Only remember the first error */ @@ -91,28 +93,63 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...) } } +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) +{ + rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); + return 1; +} + +/** + * Recompute c->Program.InputsRead and c->Program.OutputsWritten + * based on which inputs and outputs are actually referenced + * in program instructions. + */ +void rc_calculate_inputs_outputs(struct radeon_compiler * c) +{ + struct rc_instruction *inst; + + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) + { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int i; + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) + c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; + } + } +} + /** * Rewrite the program such that everything that source the given input * register will source new_input instead. */ -void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input) +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) { struct rc_instruction * inst; c->Program.InputsRead &= ~(1 << input); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; - for(i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) { - inst->I.SrcReg[i].File = new_input.File; - inst->I.SrcReg[i].Index = new_input.Index; - inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle); - if (!inst->I.SrcReg[i].Abs) { - inst->I.SrcReg[i].Negate ^= new_input.Negate; - inst->I.SrcReg[i].Abs = new_input.Abs; + for(i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { + inst->U.I.SrcReg[i].File = new_input.File; + inst->U.I.SrcReg[i].Index = new_input.Index; + inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); + if (!inst->U.I.SrcReg[i].Abs) { + inst->U.I.SrcReg[i].Negate ^= new_input.Negate; + inst->U.I.SrcReg[i].Abs = new_input.Abs; } c->Program.InputsRead |= 1 << new_input.Index; @@ -134,12 +171,12 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou c->Program.OutputsWritten &= ~(1 << output); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { - inst->I.DstReg.Index = new_output; - inst->I.DstReg.WriteMask &= writemask; + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.Index = new_output; + inst->U.I.DstReg.WriteMask &= writemask; c->Program.OutputsWritten |= 1 << new_output; } @@ -157,33 +194,33 @@ void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_ou struct rc_instruction * inst; for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = tempreg; + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tempreg; } } } inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - inst->I.Opcode = OPCODE_MOV; - inst->I.DstReg.File = PROGRAM_OUTPUT; - inst->I.DstReg.Index = output; + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = output; - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[0].Index = tempreg; - inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - inst->I.Opcode = OPCODE_MOV; - inst->I.DstReg.File = PROGRAM_OUTPUT; - inst->I.DstReg.Index = dup_output; + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = dup_output; - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[0].Index = tempreg; - inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; c->Program.OutputsWritten |= 1 << dup_output; } @@ -201,60 +238,60 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig /* perspective divide */ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); - inst_rcp->I.Opcode = OPCODE_RCP; + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; - inst_rcp->I.DstReg.Index = tempregi; - inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tempregi; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT; - inst_rcp->I.SrcReg[0].Index = new_input; - inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_rcp->U.I.SrcReg[0].Index = new_input; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp); - inst_mul->I.Opcode = OPCODE_MUL; + inst_mul->U.I.Opcode = RC_OPCODE_MUL; - inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mul->I.DstReg.Index = tempregi; - inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tempregi; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mul->I.SrcReg[0].File = PROGRAM_INPUT; - inst_mul->I.SrcReg[0].Index = new_input; + inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_mul->U.I.SrcReg[0].Index = new_input; - inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY; - inst_mul->I.SrcReg[1].Index = tempregi; - inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tempregi; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; /* viewport transformation */ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul); - inst_mad->I.Opcode = OPCODE_MAD; + inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mad->I.DstReg.Index = tempregi; - inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = tempregi; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst_mad->I.SrcReg[0].Index = tempregi; - inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = tempregi; + inst_mad->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); - inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR; - inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); - inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + inst_mad->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); - inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR; - inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index; - inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[2].Index = inst_mad->U.I.SrcReg[1].Index; + inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); struct rc_instruction * inst; for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; - for(i = 0; i < numsrcs; i++) { - if (inst->I.SrcReg[i].File == PROGRAM_INPUT && - inst->I.SrcReg[i].Index == wpos) { - inst->I.SrcReg[i].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[i].Index = tempregi; + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == wpos) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; } } } diff --git a/r300/compiler/radeon_compiler.h b/r300/compiler/radeon_compiler.h index e63ab88..87a732c 100644 --- a/r300/compiler/radeon_compiler.h +++ b/r300/compiler/radeon_compiler.h @@ -23,35 +23,11 @@ #ifndef RADEON_COMPILER_H #define RADEON_COMPILER_H -#include "main/mtypes.h" -#include "shader/prog_instruction.h" - #include "memory_pool.h" #include "radeon_code.h" +#include "radeon_program.h" - -struct rc_instruction { - struct rc_instruction * Prev; - struct rc_instruction * Next; - struct prog_instruction I; -}; - -struct rc_program { - /** - * Instructions.Next points to the first instruction, - * Instructions.Prev points to the last instruction. - */ - struct rc_instruction Instructions; - - /* Long term, we should probably remove InputsRead & OutputsWritten, - * since updating dependent state can be fragile, and they aren't - * actually used very often. */ - uint32_t InputsRead; - uint32_t OutputsWritten; - uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ - - struct rc_constant_list Constants; -}; +struct rc_swizzle_caps; struct radeon_compiler { struct memory_pool Pool; @@ -59,6 +35,14 @@ struct radeon_compiler { unsigned Debug:1; unsigned Error:1; char * ErrorMsg; + + /** + * Variables used internally, not be touched by callers + * of the compiler + */ + /*@{*/ + struct rc_swizzle_caps * SwizzleCaps; + /*@}*/ }; void rc_init(struct radeon_compiler * c); @@ -67,11 +51,26 @@ void rc_destroy(struct radeon_compiler * c); void rc_debug(struct radeon_compiler * c, const char * fmt, ...); void rc_error(struct radeon_compiler * c, const char * fmt, ...); -void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program); +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion); + +/** + * This macro acts like an if-statement that can be used to implement + * non-aborting assertions in the compiler. + * + * It checks whether \p cond is true. If not, an internal compiler error is + * flagged and the if-clause is run. + * + * A typical use-case would be: + * + * if (rc_assert(c, condition-that-must-be-true)) + * return; + */ +#define rc_assert(c, cond) \ + (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) void rc_calculate_inputs_outputs(struct radeon_compiler * c); -void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input); +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input); @@ -97,7 +96,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); struct r300_vertex_program_compiler { struct radeon_compiler Base; struct r300_vertex_program_code *code; - GLbitfield RequiredOutputs; + uint32_t RequiredOutputs; void * UserData; void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); diff --git a/r300/compiler/radeon_dataflow.c b/r300/compiler/radeon_dataflow.c new file mode 100644 index 0000000..a003e77 --- /dev/null +++ b/r300/compiler/radeon_dataflow.c @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_program.h" + + +static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + unsigned int src; + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0, chan; + + if (inst->SrcReg[src].File == RC_FILE_NONE) + return; + + for(chan = 0; chan < 4; ++chan) + refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan); + + refmask &= RC_MASK_XYZW; + + for(chan = 0; chan < 4; ++chan) { + if (GET_BIT(refmask, chan)) { + cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan); + } + } + + if (refmask && inst->SrcReg[src].RelAddr) + cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); + } +} + +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + unsigned int refmasks[3] = { 0, 0, 0 }; + unsigned int arg, src; + + if (inst->RGB.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + + for(arg = 0; arg < opcode->NumSrcRegs; ++arg) { + unsigned int chan; + + for(chan = 0; chan < 3; ++chan) { + unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); + if (swz < 4) + refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz; + } + } + } + + if (inst->Alpha.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + + for(arg = 0; arg < opcode->NumSrcRegs; ++arg) { + if (inst->Alpha.Arg[arg].Swizzle < 4) + refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle; + } + } + + for(src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + unsigned int chan; + for(chan = 0; chan < 3; ++chan) { + if (GET_BIT(refmasks[src], chan)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan); + } + } + + if (inst->Alpha.Src[src].Used) { + if (GET_BIT(refmasks[src], 3)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3); + } + } +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + reads_normal(inst, cb, userdata); + } else { + reads_pair(inst, cb, userdata); + } +} + + + +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + unsigned int chan; + + for(chan = 0; chan < 4; ++chan) { + if (GET_BIT(inst->DstReg.WriteMask, chan)) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan); + } + } + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); +} + +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + unsigned int chan; + + for(chan = 0; chan < 3; ++chan) { + if (GET_BIT(inst->RGB.WriteMask, chan)) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan); + } + + if (inst->Alpha.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + writes_normal(inst, cb, userdata); + } else { + writes_pair(inst, cb, userdata); + } +} diff --git a/r300/compiler/radeon_dataflow.h b/r300/compiler/radeon_dataflow.h new file mode 100644 index 0000000..5aa4cb6 --- /dev/null +++ b/r300/compiler/radeon_dataflow.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_DATAFLOW_H +#define RADEON_DATAFLOW_H + +#include "radeon_program_constants.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_swizzle_caps; + + +/** + * Help analyze the register accesses of instructions. + */ +/*@{*/ +typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan); +void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); +void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); +/*@}*/ + + +/** + * Compiler passes based on dataflow analysis. + */ +/*@{*/ +typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, + void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); +void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata); +void rc_dataflow_swizzles(struct radeon_compiler * c); +/*@}*/ + +#endif /* RADEON_DATAFLOW_H */ diff --git a/r300/compiler/radeon_dataflow_deadcode.c b/r300/compiler/radeon_dataflow_deadcode.c new file mode 100644 index 0000000..d78efa1 --- /dev/null +++ b/r300/compiler/radeon_dataflow_deadcode.c @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +struct updatemask_state { + unsigned char Output[RC_REGISTER_MAX_INDEX]; + unsigned char Temporary[RC_REGISTER_MAX_INDEX]; + unsigned char Address; + unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; +}; + +struct instruction_state { + unsigned char WriteMask:4; + unsigned char WriteALUResult:1; + unsigned char SrcReg[3]; +}; + +struct branchinfo { + unsigned int HaveElse:1; + + struct updatemask_state StoreEndif; + struct updatemask_state StoreElse; +}; + +struct deadcode_state { + struct radeon_compiler * C; + struct instruction_state * Instructions; + + struct updatemask_state R; + + struct branchinfo * BranchStack; + unsigned int BranchStackSize; + unsigned int BranchStackReserved; +}; + + +static void or_updatemasks( + struct updatemask_state * dst, + struct updatemask_state * a, + struct updatemask_state * b) +{ + unsigned int i; + + for(i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { + dst->Output[i] = a->Output[i] | b->Output[i]; + dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; + } + + for(i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) + dst->Special[i] = a->Special[i] | b->Special[i]; + + dst->Address = a->Address | b->Address; +} + +static void push_branch(struct deadcode_state * s) +{ + if (s->BranchStackSize >= s->BranchStackReserved) { + unsigned int new_reserve = 2 * s->BranchStackReserved; + struct branchinfo * new_stack; + + if (!new_reserve) + new_reserve = 4; + + new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct branchinfo)); + memcpy(new_stack, s->BranchStack, s->BranchStackSize * sizeof(struct branchinfo)); + + s->BranchStack = new_stack; + s->BranchStackReserved = new_reserve; + } + + struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++]; + branch->HaveElse = 0; + memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); +} + +static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); + return 0; + } + + if (file == RC_FILE_OUTPUT) + return &s->R.Output[index]; + else + return &s->R.Temporary[index]; + } else if (file == RC_FILE_ADDRESS) { + return &s->R.Address; + } else if (file == RC_FILE_SPECIAL) { + if (index >= RC_NUM_SPECIAL_REGISTERS) { + rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->R.Special[index]; + } + + return 0; +} + +static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned char * pused = get_used_ptr(s, file, index); + if (pused) + *pused |= mask; +} + +static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + struct instruction_state * insts = &s->Instructions[inst->IP]; + unsigned int usedmask = 0, src; + + if (opcode->HasDstReg) { + unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); + if (pused) { + usedmask = *pused & inst->U.I.DstReg.WriteMask; + *pused &= ~usedmask; + } + } + + insts->WriteMask |= usedmask; + + if (inst->U.I.WriteALUResult) { + unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); + if (pused && *pused) { + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usedmask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usedmask |= RC_MASK_W; + + *pused = 0; + insts->WriteALUResult = 1; + } + } + + unsigned int srcmasks[3]; + rc_compute_sources_for_writemask(opcode, usedmask, srcmasks); + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0, chan; + unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; + insts->SrcReg[src] |= newsrcmask; + + for(chan = 0; chan < 4; ++chan) { + if (GET_BIT(newsrcmask, chan)) + refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + } + + /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ + refmask &= RC_MASK_XYZW; + + if (!refmask) + continue; + + mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); + + if (inst->U.I.SrcReg[src].RelAddr) + mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); + } +} + +static void mark_output_use(void * data, unsigned int index, unsigned int mask) +{ + struct deadcode_state * s = data; + + mark_used(s, RC_FILE_OUTPUT, index, mask); +} + +void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata) +{ + struct deadcode_state s; + unsigned int nr_instructions; + struct rc_instruction * inst; + + memset(&s, 0, sizeof(s)); + s.C = c; + + nr_instructions = rc_recompute_ips(c); + s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); + memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); + + dce(userdata, &s, &mark_output_use); + + for(inst = c->Program.Instructions.Prev; + inst != &c->Program.Instructions; + inst = inst->Prev) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->IsFlowControl) { + if (opcode->Opcode == RC_OPCODE_ENDIF) { + push_branch(&s); + } else { + if (s.BranchStackSize) { + struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; + + if (opcode->Opcode == RC_OPCODE_IF) { + or_updatemasks(&s.R, + &s.R, + branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); + + s.BranchStackSize--; + } else if (opcode->Opcode == RC_OPCODE_ELSE) { + if (branch->HaveElse) { + rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); + } else { + memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); + memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); + branch->HaveElse = 1; + } + } else { + rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); + } + } else { + rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__); + } + } + } + + update_instruction(&s, inst); + } + + unsigned int ip = 0; + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next, ++ip) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);\ + int dead = 1; + unsigned int src, chan; + + if (!opcode->HasDstReg) { + dead = 0; + } else { + inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; + if (s.Instructions[ip].WriteMask) + dead = 0; + + if (s.Instructions[ip].WriteALUResult) + dead = 0; + else + inst->U.I.WriteALUResult = RC_ALURESULT_NONE; + } + + if (dead) { + struct rc_instruction * todelete = inst; + inst = inst->Prev; + rc_remove_instruction(todelete); + continue; + } + + unsigned int srcmasks[3]; + unsigned int usemask = s.Instructions[ip].WriteMask; + + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usemask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usemask |= RC_MASK_W; + + rc_compute_sources_for_writemask(opcode, usemask, srcmasks); + + for(src = 0; src < 3; ++src) { + for(chan = 0; chan < 4; ++chan) { + if (!GET_BIT(srcmasks[src], chan)) + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + } + + rc_calculate_inputs_outputs(c); +} diff --git a/r300/compiler/radeon_dataflow_swizzles.c b/r300/compiler/radeon_dataflow_swizzles.c new file mode 100644 index 0000000..d4ccd35 --- /dev/null +++ b/r300/compiler/radeon_dataflow_swizzles.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static void rewrite_source(struct radeon_compiler * c, + struct rc_instruction * inst, unsigned src) +{ + struct rc_swizzle_split split; + unsigned int tempreg = rc_find_free_temporary(c); + unsigned int usemask, chan, phase; + + usemask = 0; + for(chan = 0; chan < 4; ++chan) { + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) + usemask |= 1 << chan; + } + + c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); + + for(phase = 0; phase < split.NumPhases; ++phase) { + struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); + unsigned int phase_refmask; + unsigned int masked_negate; + + mov->U.I.Opcode = RC_OPCODE_MOV; + mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + mov->U.I.DstReg.Index = tempreg; + mov->U.I.DstReg.WriteMask = split.Phase[phase]; + mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + + phase_refmask = 0; + for(chan = 0; chan < 4; ++chan) { + if (!GET_BIT(split.Phase[phase], chan)) + SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); + else + phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); + } + + phase_refmask &= RC_MASK_XYZW; + + masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; + if (masked_negate == 0) + mov->U.I.SrcReg[0].Negate = 0; + else if (masked_negate == split.Phase[phase]) + mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + + } + + inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[src].Index = tempreg; + inst->U.I.SrcReg[src].Swizzle = 0; + inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; + inst->U.I.SrcReg[src].Abs = 0; + for(chan = 0; chan < 4; ++chan) { + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, + GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); + } +} + +void rc_dataflow_swizzles(struct radeon_compiler * c) +{ + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int src; + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + rewrite_source(c, inst, src); + } + } +} diff --git a/r300/compiler/radeon_nqssadce.c b/r300/compiler/radeon_nqssadce.c deleted file mode 100644 index aaaa50a..0000000 --- a/r300/compiler/radeon_nqssadce.c +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * "Not-quite SSA" and Dead-Code Elimination. - * - * @note This code uses SWIZZLE_NIL in a source register to indicate that - * the corresponding component is ignored by the corresponding instruction. - */ - -#include "radeon_nqssadce.h" - -#include "radeon_compiler.h" - - -/** - * Return the @ref register_state for the given register (or 0 for untracked - * registers, i.e. constants). - */ -static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) -{ - switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_OUTPUT: return &s->Outputs[index]; - case PROGRAM_ADDRESS: return &s->Address; - default: return 0; - } -} - - -/** - * Left multiplication of a register with a swizzle - * - * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. - */ -struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) -{ - struct prog_src_register tmp = srcreg; - int i; - tmp.Swizzle = 0; - tmp.Negate = NEGATE_NONE; - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); - } - } - return tmp; -} - - -static void track_used_srcreg(struct nqssadce_state* s, - GLint src, GLuint sourced) -{ - struct prog_instruction * inst = &s->IP->I; - int i; - GLuint deswz_source = 0; - - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) { - GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); - deswz_source |= 1 << swz; - } else { - inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); - } - } - - if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { - struct prog_dst_register dstreg = inst->DstReg; - dstreg.File = PROGRAM_TEMPORARY; - dstreg.Index = rc_find_free_temporary(s->Compiler); - dstreg.WriteMask = sourced; - - s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); - - inst->SrcReg[src].File = PROGRAM_TEMPORARY; - inst->SrcReg[src].Index = dstreg.Index; - inst->SrcReg[src].Swizzle = 0; - inst->SrcReg[src].Negate = NEGATE_NONE; - inst->SrcReg[src].Abs = 0; - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) - inst->SrcReg[src].Swizzle |= i << (3*i); - else - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); - } - deswz_source = sourced; - } - - struct register_state *regstate; - - if (inst->SrcReg[src].RelAddr) { - regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); - if (regstate) - regstate->Sourced |= WRITEMASK_X; - } else { - regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); - if (regstate) - regstate->Sourced |= deswz_source & 0xf; - } -} - -static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex) -{ - int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode); - int i; - for(i = 0; i < nsrc; ++i) - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex) - inst->I.SrcReg[i].Index = newindex; -} - -static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) -{ - GLuint newindex = rc_find_free_temporary(s->Compiler); - struct rc_instruction * inst; - for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) { - if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex) - inst->I.DstReg.Index = newindex; - unalias_srcregs(inst, oldindex, newindex); - } - unalias_srcregs(s->IP, oldindex, newindex); -} - - -/** - * Handle one instruction. - */ -static void process_instruction(struct nqssadce_state* s) -{ - struct prog_instruction *inst = &s->IP->I; - GLuint WriteMask; - - if (inst->Opcode == OPCODE_END) - return; - - if (inst->Opcode != OPCODE_KIL) { - struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); - if (!regstate) { - rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n", - inst->DstReg.File, inst->DstReg.Index); - return; - } - - inst->DstReg.WriteMask &= regstate->Sourced; - regstate->Sourced &= ~inst->DstReg.WriteMask; - - if (inst->DstReg.WriteMask == 0) { - struct rc_instruction * inst_remove = s->IP; - s->IP = s->IP->Prev; - rc_remove_instruction(inst_remove); - return; - } - - if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) - unalias_temporary(s, inst->DstReg.Index); - } - - WriteMask = inst->DstReg.WriteMask; - - switch (inst->Opcode) { - case OPCODE_ARL: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MOV: - track_used_srcreg(s, 0, WriteMask); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - case OPCODE_SGE: - case OPCODE_SLT: - track_used_srcreg(s, 0, WriteMask); - track_used_srcreg(s, 1, WriteMask); - break; - case OPCODE_CMP: - case OPCODE_MAD: - track_used_srcreg(s, 0, WriteMask); - track_used_srcreg(s, 1, WriteMask); - track_used_srcreg(s, 2, WriteMask); - break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - track_used_srcreg(s, 0, 0x1); - break; - case OPCODE_DP3: - track_used_srcreg(s, 0, 0x7); - track_used_srcreg(s, 1, 0x7); - break; - case OPCODE_DP4: - track_used_srcreg(s, 0, 0xf); - track_used_srcreg(s, 1, 0xf); - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - track_used_srcreg(s, 0, 0xf); - break; - case OPCODE_DST: - track_used_srcreg(s, 0, 0x6); - track_used_srcreg(s, 1, 0xa); - break; - case OPCODE_EXP: - case OPCODE_LOG: - case OPCODE_POW: - track_used_srcreg(s, 0, 0x3); - break; - case OPCODE_LIT: - track_used_srcreg(s, 0, 0xb); - break; - default: - rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode); - return; - } - - s->IP = s->IP->Prev; -} - -void rc_calculate_inputs_outputs(struct radeon_compiler * c) -{ - struct rc_instruction *inst; - - c->Program.InputsRead = 0; - c->Program.OutputsWritten = 0; - - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) - { - int i; - int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode); - - for (i = 0; i < num_src_regs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_INPUT) - c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index; - } - - if (_mesa_num_inst_dst_regs(inst->I.Opcode)) { - if (inst->I.DstReg.File == PROGRAM_OUTPUT) - c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index; - } - } -} - -void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data) -{ - struct nqssadce_state s; - - _mesa_bzero(&s, sizeof(s)); - s.Compiler = c; - s.Descr = descr; - s.UserData = data; - s.Descr->Init(&s); - s.IP = c->Program.Instructions.Prev; - - while(s.IP != &c->Program.Instructions && !c->Error) - process_instruction(&s); - - rc_calculate_inputs_outputs(c); -} diff --git a/r300/compiler/radeon_nqssadce.h b/r300/compiler/radeon_nqssadce.h deleted file mode 100644 index b3fc77a..0000000 --- a/r300/compiler/radeon_nqssadce.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_NQSSADCE_H_ -#define __RADEON_PROGRAM_NQSSADCE_H_ - -#include "radeon_program.h" - -struct register_state { - /** - * Bitmask indicating which components of the register are sourced - * by later instructions. - */ - GLuint Sourced : 4; -}; - -/** - * Maintain state such as which registers are used, which registers are - * read from, etc. - */ -struct nqssadce_state { - struct radeon_compiler *Compiler; - struct radeon_nqssadce_descr *Descr; - - /** - * All instructions after this instruction pointer have been dealt with. - */ - struct rc_instruction * IP; - - /** - * Which registers are read by subsequent instructions? - */ - struct register_state Temps[MAX_PROGRAM_TEMPS]; - struct register_state Outputs[VERT_RESULT_MAX]; - struct register_state Address; - - void * UserData; -}; - - -/** - * This structure contains a description of the hardware in-so-far as - * it is required for the NqSSA-DCE pass. - */ -struct radeon_nqssadce_descr { - /** - * Fill in which outputs - */ - void (*Init)(struct nqssadce_state *); - - /** - * Check whether the given swizzle, absolute and negate combination - * can be implemented natively by the hardware for this opcode. - */ - GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); - - /** - * Emit (at the current IP) the instruction MOV dst, src; - * The transformation will work recursively on the emitted instruction(s). - */ - void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); -}; - -void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data); -struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); - -#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/r300/compiler/radeon_opcodes.c b/r300/compiler/radeon_opcodes.c new file mode 100644 index 0000000..9285748 --- /dev/null +++ b/r300/compiler/radeon_opcodes.c @@ -0,0 +1,431 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_opcodes.h" + +#include "radeon_program_constants.h" + +struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { + { + .Opcode = RC_OPCODE_NOP, + .Name = "NOP" + }, + { + .Opcode = RC_OPCODE_ILLEGAL_OPCODE, + .Name = "ILLEGAL OPCODE" + }, + { + .Opcode = RC_OPCODE_ABS, + .Name = "ABS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ADD, + .Name = "ADD", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ARL, + .Name = "ARL", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_CMP, + .Name = "CMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_COS, + .Name = "COS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_DDX, + .Name = "DDX", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DDY, + .Name = "DDY", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DP3, + .Name = "DP3", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP4, + .Name = "DP4", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DPH, + .Name = "DPH", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DST, + .Name = "DST", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_EX2, + .Name = "EX2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_EXP, + .Name = "EXP", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_FLR, + .Name = "FLR", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_FRC, + .Name = "FRC", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_KIL, + .Name = "KIL", + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_LG2, + .Name = "LG2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_LIT, + .Name = "LIT", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LOG, + .Name = "LOG", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LRP, + .Name = "LRP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAD, + .Name = "MAD", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAX, + .Name = "MAX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MIN, + .Name = "MIN", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MOV, + .Name = "MOV", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MUL, + .Name = "MUL", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_POW, + .Name = "POW", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RCP, + .Name = "RCP", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RSQ, + .Name = "RSQ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SCS, + .Name = "SCS", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SEQ, + .Name = "SEQ", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SFL, + .Name = "SFL", + .NumSrcRegs = 0, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGE, + .Name = "SGE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGT, + .Name = "SGT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SIN, + .Name = "SIN", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SLE, + .Name = "SLE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SLT, + .Name = "SLT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SNE, + .Name = "SNE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SUB, + .Name = "SUB", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SWZ, + .Name = "SWZ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_XPD, + .Name = "XPD", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TEX, + .Name = "TEX", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXB, + .Name = "TXB", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXD, + .Name = "TXD", + .HasTexture = 1, + .NumSrcRegs = 3, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXL, + .Name = "TXL", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXP, + .Name = "TXP", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_IF, + .Name = "IF", + .IsFlowControl = 1, + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_ELSE, + .Name = "ELSE", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDIF, + .Name = "ENDIF", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_REPL_ALPHA, + .Name = "REPL_ALPHA", + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_BEGIN_TEX, + .Name = "BEGIN_TEX" + } +}; + +void rc_compute_sources_for_writemask( + const struct rc_opcode_info * opcode, + unsigned int writemask, + unsigned int *srcmasks) +{ + unsigned int src; + + srcmasks[0] = 0; + srcmasks[1] = 0; + srcmasks[2] = 0; + + if (opcode->Opcode == RC_OPCODE_KIL) + srcmasks[0] |= RC_MASK_XYZW; + else if (opcode->Opcode == RC_OPCODE_IF) + srcmasks[0] |= RC_MASK_X; + + if (!writemask) + return; + + if (opcode->IsComponentwise) { + for(src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= writemask; + } else if (opcode->IsStandardScalar) { + for(src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= RC_MASK_X; + } else { + switch(opcode->Opcode) { + case RC_OPCODE_ARL: + srcmasks[0] |= RC_MASK_X; + break; + case RC_OPCODE_DP3: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + break; + case RC_OPCODE_DP4: + srcmasks[0] |= RC_MASK_XYZW; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_TEX: + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: + srcmasks[0] |= RC_MASK_XYZW; + break; + case RC_OPCODE_DST: + srcmasks[0] |= 0x6; + srcmasks[1] |= 0xa; + break; + case RC_OPCODE_EXP: + case RC_OPCODE_LOG: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_OPCODE_LIT: + srcmasks[0] |= 0xb; + break; + default: + break; + } + } +} diff --git a/r300/compiler/radeon_opcodes.h b/r300/compiler/radeon_opcodes.h new file mode 100644 index 0000000..a3c5b86 --- /dev/null +++ b/r300/compiler/radeon_opcodes.h @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_OPCODES_H +#define RADEON_OPCODES_H + +#include <assert.h> + +/** + * Opcodes understood by the Radeon compiler. + */ +typedef enum { + RC_OPCODE_NOP = 0, + RC_OPCODE_ILLEGAL_OPCODE, + + /** vec4 instruction: dst.c = abs(src0.c); */ + RC_OPCODE_ABS, + + /** vec4 instruction: dst.c = src0.c + src1.c; */ + RC_OPCODE_ADD, + + /** special instruction: load address register + * dst.x = floor(src.x), where dst must be an address register */ + RC_OPCODE_ARL, + + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ + RC_OPCODE_CMP, + + /** scalar instruction: dst = cos(src0.x) */ + RC_OPCODE_COS, + + /** special instruction: take vec4 partial derivative in X direction + * dst.c = d src0.c / dx */ + RC_OPCODE_DDX, + + /** special instruction: take vec4 partial derivative in Y direction + * dst.c = d src0.c / dy */ + RC_OPCODE_DDY, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ + RC_OPCODE_DP3, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ + RC_OPCODE_DP4, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */ + RC_OPCODE_DPH, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_DST, + + /** scalar instruction: dst = 2**src0.x */ + RC_OPCODE_EX2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_EXP, + + /** vec4 instruction: dst.c = floor(src0.c) */ + RC_OPCODE_FLR, + + /** vec4 instruction: dst.c = src0.c - floor(src0.c) */ + RC_OPCODE_FRC, + + /** special instruction: stop execution if any component of src0 is negative */ + RC_OPCODE_KIL, + + /** scalar instruction: dst = log_2(src0.x) */ + RC_OPCODE_LG2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LIT, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LOG, + + /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */ + RC_OPCODE_LRP, + + /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ + RC_OPCODE_MAD, + + /** vec4 instruction: dst.c = max(src0.c, src1.c) */ + RC_OPCODE_MAX, + + /** vec4 instruction: dst.c = min(src0.c, src1.c) */ + RC_OPCODE_MIN, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_MOV, + + /** vec4 instruction: dst.c = src0.c*src1.c */ + RC_OPCODE_MUL, + + /** scalar instruction: dst = src0.x ** src1.x */ + RC_OPCODE_POW, + + /** scalar instruction: dst = 1 / src0.x */ + RC_OPCODE_RCP, + + /** scalar instruction: dst = 1 / sqrt(src0.x) */ + RC_OPCODE_RSQ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_SCS, + + /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SEQ, + + /** vec4 instruction: dst.c = 0.0 */ + RC_OPCODE_SFL, + + /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGE, + + /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGT, + + /** scalar instruction: dst = sin(src0.x) */ + RC_OPCODE_SIN, + + /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLE, + + /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLT, + + /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SNE, + + /** vec4 instruction: dst.c = src0.c - src1.c */ + RC_OPCODE_SUB, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_SWZ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_XPD, + + RC_OPCODE_TEX, + RC_OPCODE_TXB, + RC_OPCODE_TXD, + RC_OPCODE_TXL, + RC_OPCODE_TXP, + + /** branch instruction: + * If src0.x != 0.0, continue with the next instruction; + * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. + */ + RC_OPCODE_IF, + + /** branch instruction: jump to matching RC_OPCODE_ENDIF */ + RC_OPCODE_ELSE, + + /** branch instruction: has no effect */ + RC_OPCODE_ENDIF, + + /** special instruction, used in R300-R500 fragment program pair instructions + * indicates that the result of the alpha operation shall be replicated + * across all other channels */ + RC_OPCODE_REPL_ALPHA, + + /** special instruction, used in R300-R500 fragment programs + * to indicate the start of a block of texture instructions that + * can run simultaneously. */ + RC_OPCODE_BEGIN_TEX, + + MAX_RC_OPCODE +} rc_opcode; + + +struct rc_opcode_info { + rc_opcode Opcode; + const char * Name; + + /** true if the instruction reads from a texture. + * + * \note This is false for the KIL instruction, even though KIL is + * a texture instruction from a hardware point of view. */ + unsigned int HasTexture:1; + + unsigned int NumSrcRegs:2; + unsigned int HasDstReg:1; + + /** true if this instruction affects control flow */ + unsigned int IsFlowControl:1; + + /** true if this is a vector instruction that operates on components in parallel + * without any cross-component interaction */ + unsigned int IsComponentwise:1; + + /** true if this instruction sources only its operands X components + * to compute one result which is smeared across all output channels */ + unsigned int IsStandardScalar:1; +}; + +extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; + +static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) +{ + assert((unsigned int)opcode < MAX_RC_OPCODE); + assert(rc_opcodes[opcode].Opcode == opcode); + + return &rc_opcodes[opcode]; +} + +void rc_compute_sources_for_writemask( + const struct rc_opcode_info * opcode, + unsigned int writemask, + unsigned int *srcmasks); + +#endif /* RADEON_OPCODES_H */ diff --git a/r300/compiler/radeon_pair_regalloc.c b/r300/compiler/radeon_pair_regalloc.c new file mode 100644 index 0000000..23c2a5e --- /dev/null +++ b/r300/compiler/radeon_pair_regalloc.c @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + +struct live_intervals { + int Start; + int End; + struct live_intervals * Next; +}; + +struct register_info { + struct live_intervals Live; + + unsigned int Used:1; + unsigned int Allocated:1; + rc_register_file File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct hardware_register { + struct live_intervals * Used; +}; + +struct regalloc_state { + struct radeon_compiler * C; + + struct register_info Input[RC_REGISTER_MAX_INDEX]; + struct register_info Temporary[RC_REGISTER_MAX_INDEX]; + + struct hardware_register * HwTemporary; + unsigned int NumHwTemporaries; +}; + +static void print_live_intervals(struct live_intervals * src) +{ + if (!src) { + DBG("(null)"); + return; + } + + while(src) { + DBG("(%i,%i)", src->Start, src->End); + src = src->Next; + } +} + +static void add_live_intervals(struct regalloc_state * s, + struct live_intervals ** dst, struct live_intervals * src) +{ + struct live_intervals ** dst_backup = dst; + + if (VERBOSE) { + DBG("add_live_intervals: "); + print_live_intervals(*dst); + DBG(" to "); + print_live_intervals(src); + DBG("\n"); + } + + while(src) { + if (*dst && (*dst)->End < src->Start) { + dst = &(*dst)->Next; + } else if (!*dst || (*dst)->Start > src->End) { + struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li)); + li->Start = src->Start; + li->End = src->End; + li->Next = *dst; + *dst = li; + src = src->Next; + } else { + if (src->End > (*dst)->End) + (*dst)->End = src->End; + if (src->Start < (*dst)->Start) + (*dst)->Start = src->Start; + src = src->Next; + } + } + + if (VERBOSE) { + DBG(" result: "); + print_live_intervals(*dst_backup); + DBG("\n"); + } +} + +static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src) +{ + if (VERBOSE) { + DBG("overlap_live_intervals: "); + print_live_intervals(dst); + DBG(" to "); + print_live_intervals(src); + DBG("\n"); + } + + while(src && dst) { + if (dst->End <= src->Start) { + dst = dst->Next; + } else if (dst->End <= src->End) { + DBG(" overlap\n"); + return 1; + } else if (dst->Start < src->End) { + DBG(" overlap\n"); + return 1; + } else { + src = src->Next; + } + } + + DBG(" no overlap\n"); + + return 0; +} + +static int try_add_live_intervals(struct regalloc_state * s, + struct live_intervals ** dst, struct live_intervals * src) +{ + if (overlap_live_intervals(*dst, src)) + return 0; + + add_live_intervals(s, dst, src); + return 1; +} + +static void scan_callback(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct regalloc_state * s = data; + struct register_info * reg; + + if (file == RC_FILE_TEMPORARY) + reg = &s->Temporary[index]; + else if (file == RC_FILE_INPUT) + reg = &s->Input[index]; + else + return; + + if (!reg->Used) { + reg->Used = 1; + if (file == RC_FILE_INPUT) + reg->Live.Start = -1; + else + reg->Live.Start = inst->IP; + reg->Live.End = inst->IP; + } else { + if (inst->IP > reg->Live.End) + reg->Live.End = inst->IP; + } +} + +static void compute_live_intervals(struct regalloc_state * s) +{ + struct rc_instruction * inst; + + rc_recompute_ips(s->C); + + for(inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads(inst, scan_callback, s); + rc_for_all_writes(inst, scan_callback, s); + } +} + +static void rewrite_register(struct regalloc_state * s, + rc_register_file * file, unsigned int * index) +{ + const struct register_info * reg; + + if (*file == RC_FILE_TEMPORARY) + reg = &s->Temporary[*index]; + else if (*file == RC_FILE_INPUT) + reg = &s->Input[*index]; + else + return; + + if (reg->Allocated) { + *file = reg->File; + *index = reg->Index; + } +} + +static void rewrite_normal_instruction(struct regalloc_state * s, struct rc_sub_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + unsigned int src; + + if (opcode->HasDstReg) { + rc_register_file file = inst->DstReg.File; + unsigned int index = inst->DstReg.Index; + + rewrite_register(s, &file, &index); + + inst->DstReg.File = file; + inst->DstReg.Index = index; + } + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + rc_register_file file = inst->SrcReg[src].File; + unsigned int index = inst->SrcReg[src].Index; + + rewrite_register(s, &file, &index); + + inst->SrcReg[src].File = file; + inst->SrcReg[src].Index = index; + } +} + +static void rewrite_pair_instruction(struct regalloc_state * s, struct rc_pair_instruction * inst) +{ + unsigned int src; + + if (inst->RGB.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->RGB.DestIndex; + + rewrite_register(s, &file, &index); + + inst->RGB.DestIndex = index; + } + + if (inst->Alpha.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->Alpha.DestIndex; + + rewrite_register(s, &file, &index); + + inst->Alpha.DestIndex = index; + } + + for(src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + rc_register_file file = inst->RGB.Src[src].File; + unsigned int index = inst->RGB.Src[src].Index; + + rewrite_register(s, &file, &index); + + inst->RGB.Src[src].File = file; + inst->RGB.Src[src].Index = index; + } + + if (inst->Alpha.Src[src].Used) { + rc_register_file file = inst->Alpha.Src[src].File; + unsigned int index = inst->Alpha.Src[src].Index; + + rewrite_register(s, &file, &index); + + inst->Alpha.Src[src].File = file; + inst->Alpha.Src[src].Index = index; + } + } +} + +static void do_regalloc(struct regalloc_state * s) +{ + struct rc_instruction * inst; + unsigned int index; + + /* Simple and stupid greedy register allocation */ + for(index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + struct register_info * reg = &s->Temporary[index]; + unsigned int hwreg; + + if (!reg->Used) + continue; + + for(hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) { + if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, ®->Live)) { + reg->Allocated = 1; + reg->File = RC_FILE_TEMPORARY; + reg->Index = hwreg; + goto success; + } + } + + rc_error(s->C, "Ran out of hardware temporaries\n"); + return; + + success:; + } + + /* Rewrite all instructions based on the translation table we built */ + for(inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) + rewrite_normal_instruction(s, &inst->U.I); + else + rewrite_pair_instruction(s, &inst->U.P); + } +} + +static void alloc_input(void * data, unsigned int input, unsigned int hwreg) +{ + struct regalloc_state * s = data; + + if (!s->Input[input].Used) + return; + + add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live); + + s->Input[input].Allocated = 1; + s->Input[input].File = RC_FILE_TEMPORARY; + s->Input[input].Index = hwreg; + +} + +void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps) +{ + struct regalloc_state s; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + s.NumHwTemporaries = maxtemps; + s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register)); + memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register)); + + compute_live_intervals(&s); + + c->AllocateHwInputs(c, &alloc_input, &s); + + do_regalloc(&s); +} diff --git a/r300/compiler/radeon_pair_schedule.c b/r300/compiler/radeon_pair_schedule.c new file mode 100644 index 0000000..2890c00 --- /dev/null +++ b/r300/compiler/radeon_pair_schedule.c @@ -0,0 +1,508 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct schedule_instruction { + struct rc_instruction * Instruction; + + /** Next instruction in the linked list of ready instructions. */ + struct schedule_instruction *NextReady; + + /** Values that this instruction reads and writes */ + struct reg_value * WriteValues[4]; + struct reg_value * ReadValues[12]; + unsigned int NumWriteValues:3; + unsigned int NumReadValues:4; + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + unsigned int NumDependencies:5; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + struct schedule_instruction *Reader; + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * RC_FILE_TEMPORARY. + */ +struct reg_value { + struct schedule_instruction * Writer; + + /** + * Unordered linked list of instructions that read from this value. + * When this value becomes available, we increase all readers' + * dependency count. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is decremented each time + * a reader of the value is committed. + * When the reader cound reaches zero, the dependency count + * of the instruction writing \ref Next is decremented. + */ + unsigned int NumReaders; + + struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ +}; + +struct register_state { + struct reg_value * Values[4]; +}; + +struct schedule_state { + struct radeon_compiler * C; + struct schedule_instruction * Current; + + struct register_state Temporary[RC_REGISTER_MAX_INDEX]; + + /** + * Linked lists of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + /*@{*/ + struct schedule_instruction *ReadyFullALU; + struct schedule_instruction *ReadyRGB; + struct schedule_instruction *ReadyAlpha; + struct schedule_instruction *ReadyTEX; + /*@}*/ +}; + +static struct reg_value ** get_reg_valuep(struct schedule_state * s, + rc_register_file file, unsigned int index, unsigned int chan) +{ + if (file != RC_FILE_TEMPORARY) + return 0; + + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->Temporary[index].Values[chan]; +} + +static struct reg_value * get_reg_value(struct schedule_state * s, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + if (!pv) + return 0; + return *pv; +} + +static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) +{ + inst->NextReady = *list; + *list = inst; +} + +static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i is now ready\n", sinst->Instruction->IP); + + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) + add_inst_to_list(&s->ReadyTEX, sinst); + else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyRGB, sinst); + else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyAlpha, sinst); + else + add_inst_to_list(&s->ReadyFullALU, sinst); +} + +static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) +{ + assert(sinst->NumDependencies > 0); + sinst->NumDependencies--; + if (!sinst->NumDependencies) + instruction_ready(s, sinst); +} + +static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst) +{ + unsigned int i; + + DBG("%i: commit\n", sinst->Instruction->IP); + + for(i = 0; i < sinst->NumReadValues; ++i) { + struct reg_value * v = sinst->ReadValues[i]; + assert(v->NumReaders > 0); + v->NumReaders--; + if (!v->NumReaders) { + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } + + for(i = 0; i < sinst->NumWriteValues; ++i) { + struct reg_value * v = sinst->WriteValues[i]; + struct reg_value_reader * r; + + if (v->NumReaders) { + for(r = v->Readers; r; r = r->Next) { + decrease_dependencies(s, r->Reader); + } + } else { + /* This happens in instruction sequences of the type + * OP r.x, ...; + * OP r.x, r.x, ...; + * See also the subtlety in how instructions that both + * read and write the same register are scanned. + */ + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } +} + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + */ +static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) +{ + struct schedule_instruction *readytex; + + assert(s->ReadyTEX); + + /* Don't let the ready list change under us! */ + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + + /* Node marker for R300 */ + struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev); + inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; + + /* Link texture instructions back in */ + while(readytex) { + struct schedule_instruction * tex = readytex; + readytex = readytex->NextReady; + + rc_insert_instruction(before->Prev, tex->Instruction); + commit_instruction(s, tex); + } +} + + +static int destructive_merge_instructions( + struct rc_pair_instruction * rgb, + struct rc_pair_instruction * alpha) +{ + unsigned int arg; + + assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); + assert(alpha->RGB.Opcode == RC_OPCODE_NOP); + + /* Copy alpha args into rgb */ + const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode); + + for(arg = 0; arg < opcode->NumSrcRegs; ++arg) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; + rc_register_file file = 0; + unsigned int index = 0; + + if (alpha->Alpha.Arg[arg].Swizzle < 3) { + srcrgb = 1; + file = alpha->RGB.Src[oldsrc].File; + index = alpha->RGB.Src[oldsrc].Index; + } else if (alpha->Alpha.Arg[arg].Swizzle < 4) { + srcalpha = 1; + file = alpha->Alpha.Src[oldsrc].File; + index = alpha->Alpha.Src[oldsrc].Index; + } + + int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); + if (source < 0) + return 0; + + rgb->Alpha.Arg[arg].Source = source; + rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; + rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; + rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; + } + + /* Copy alpha opcode into rgb */ + rgb->Alpha.Opcode = alpha->Alpha.Opcode; + rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; + rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; + rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; + rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; + rgb->Alpha.Saturate = alpha->Alpha.Saturate; + + /* Merge ALU result writing */ + if (alpha->WriteALUResult) { + if (rgb->WriteALUResult) + return 0; + + rgb->WriteALUResult = alpha->WriteALUResult; + rgb->ALUResultCompare = alpha->ALUResultCompare; + } + + return 1; +} + +/** + * Try to merge the given instructions into the rgb instructions. + * + * Return true on success; on failure, return false, and keep + * the instructions untouched. + */ +static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) +{ + struct rc_pair_instruction backup; + + memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); + + if (destructive_merge_instructions(rgb, alpha)) + return 1; + + memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); + return 0; +} + + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before) +{ + struct schedule_instruction * sinst; + + if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + } else if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } + + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_instruction(s, sinst); + } else { + struct schedule_instruction **prgb; + struct schedule_instruction **palpha; + + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + struct schedule_instruction * psirgb = *prgb; + struct schedule_instruction * psialpha = *palpha; + + if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P)) + continue; + + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + rc_insert_instruction(before->Prev, psirgb->Instruction); + commit_instruction(s, psirgb); + commit_instruction(s, psialpha); + goto success; + } + } + + /* No success in pairing; just take the first RGB instruction */ + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_instruction(s, sinst); + success: ; + } +} + +static void scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value * v = get_reg_value(s, file, index, chan); + + if (!v) + return; + + if (v->Writer == s->Current) { + /* The instruction reads and writes to a register component. + * In this case, we only want to increment dependencies by one. + */ + return; + } + + DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); + reader->Reader = s->Current; + reader->Next = v->Readers; + v->Readers = reader; + v->NumReaders++; + + s->Current->NumDependencies++; + + if (s->Current->NumReadValues >= 12) { + rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); + } else { + s->Current->ReadValues[s->Current->NumReadValues++] = v; + } +} + +static void scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + + if (!pv) + return; + + DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); + memset(newv, 0, sizeof(*newv)); + + newv->Writer = s->Current; + + if (*pv) { + (*pv)->Next = newv; + s->Current->NumDependencies++; + } + + *pv = newv; + + if (s->Current->NumWriteValues >= 4) { + rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); + } else { + s->Current->WriteValues[s->Current->NumWriteValues++] = newv; + } +} + +static void schedule_block(struct r300_fragment_program_compiler * c, + struct rc_instruction * begin, struct rc_instruction * end) +{ + struct schedule_state s; + struct rc_instruction * inst; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + + /* Scan instructions for data dependencies */ + unsigned int ip = 0; + for(inst = begin; inst != end; inst = inst->Next) { + s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); + memset(s.Current, 0, sizeof(struct schedule_instruction)); + + s.Current->Instruction = inst; + inst->IP = ip++; + + DBG("%i: Scanning\n", inst->IP); + + /* The order of things here is subtle and maybe slightly + * counter-intuitive, to account for the case where an + * instruction writes to the same register as it reads + * from. */ + rc_for_all_writes(inst, &scan_write, &s); + rc_for_all_reads(inst, &scan_read, &s); + + DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); + + if (!s.Current->NumDependencies) + instruction_ready(&s, s.Current); + } + + /* Temporarily unlink all instructions */ + begin->Prev->Next = end; + end->Prev = begin->Prev; + + /* Schedule instructions back */ + while(!s.C->Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s, end); + + while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) + emit_one_alu(&s, end); + } +} + +static int is_controlflow(struct rc_instruction * inst) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + return opcode->IsFlowControl; + } + return 0; +} + +void rc_pair_schedule(struct r300_fragment_program_compiler *c) +{ + struct rc_instruction * inst = c->Base.Program.Instructions.Next; + while(inst != &c->Base.Program.Instructions) { + if (is_controlflow(inst)) { + inst = inst->Next; + continue; + } + + struct rc_instruction * first = inst; + + while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) + inst = inst->Next; + + DBG("Schedule one block\n"); + schedule_block(c, first, inst); + } +} diff --git a/r300/compiler/radeon_pair_translate.c b/r300/compiler/radeon_pair_translate.c new file mode 100644 index 0000000..933cf13 --- /dev/null +++ b/r300/compiler/radeon_pair_translate.c @@ -0,0 +1,255 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler.h" + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct rc_sub_instruction *inst) +{ + struct rc_src_register tmp; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[1].Negate = RC_MASK_NONE; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case RC_OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_MUL: + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct rc_sub_instruction * inst, + int * needrgb, int * needalpha, int * istranscendent) +{ + *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; + *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; + *istranscendent = 0; + + if (inst->WriteALUResult == RC_ALURESULT_X) + *needrgb = 1; + else if (inst->WriteALUResult == RC_ALURESULT_W) + *needalpha = 1; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + case RC_OPCODE_CMP: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_FRC: + case RC_OPCODE_MAD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MOV: + case RC_OPCODE_MUL: + break; + case RC_OPCODE_COS: + case RC_OPCODE_EX2: + case RC_OPCODE_LG2: + case RC_OPCODE_RCP: + case RC_OPCODE_RSQ: + case RC_OPCODE_SIN: + *istranscendent = 1; + *needalpha = 1; + break; + case RC_OPCODE_DP4: + *needalpha = 1; + /* fall through */ + case RC_OPCODE_DP3: + *needrgb = 1; + break; + default: + break; + } +} + + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static void set_pair_instruction(struct r300_fragment_program_compiler *c, + struct rc_pair_instruction * pair, + struct rc_sub_instruction * inst) +{ + memset(pair, 0, sizeof(struct rc_pair_instruction)); + + int needrgb, needalpha, istranscendent; + classify_instruction(inst, &needrgb, &needalpha, &istranscendent); + + if (needrgb) { + if (istranscendent) + pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (needalpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + int nargs = opcode->NumSrcRegs; + int i; + + /* Special case for DDX/DDY (MDH/MDV). */ + if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) { + nargs++; + } + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + int source; + if (needrgb && !istranscendent) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + int j; + for(j = 0; j < 3; ++j) { + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz < 3) + srcrgb = 1; + else if (swz < 4) + srcalpha = 1; + } + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); + } + if (needalpha) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3); + if (swz < 3) + srcrgb = 1; + else if (swz < 4) + srcalpha = 1; + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = swz; + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); + } + } + + /* Destination handling */ + if (inst->DstReg.File == RC_FILE_OUTPUT) { + if (inst->DstReg.Index == c->OutputColor) { + pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; + pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else if (inst->DstReg.Index == c->OutputDepth) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } else { + if (needrgb) { + pair->RGB.DestIndex = inst->DstReg.Index; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; + } + if (needalpha) { + pair->Alpha.DestIndex = inst->DstReg.Index; + pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } + + if (inst->WriteALUResult) { + pair->WriteALUResult = inst->WriteALUResult; + pair->ALUResultCompare = inst->ALUResultCompare; + } +} + + +/** + * Translate all ALU instructions into corresponding pair instructions, + * performing no other changes. + */ +void rc_pair_translate(struct r300_fragment_program_compiler *c) +{ + struct rc_instruction *inst; + + for(inst = c->Base.Program.Instructions.Next; + inst != &c->Base.Program.Instructions; + inst = inst->Next) { + if (inst->Type != RC_INSTRUCTION_NORMAL) + continue; + + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) + continue; + + struct rc_sub_instruction copy = inst->U.I; + + final_rewrite(©); + inst->Type = RC_INSTRUCTION_PAIR; + set_pair_instruction(c, &inst->U.P, ©); + } +} diff --git a/r300/compiler/radeon_program.c b/r300/compiler/radeon_program.c index 605edb6..fb4752f 100644 --- a/r300/compiler/radeon_program.c +++ b/r300/compiler/radeon_program.c @@ -27,9 +27,9 @@ #include "radeon_program.h" +#include <stdio.h> + #include "radeon_compiler.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" /** @@ -69,38 +69,58 @@ void radeonLocalTransform( } } +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} -GLint rc_find_free_temporary(struct radeon_compiler * c) +unsigned int rc_find_free_temporary(struct radeon_compiler * c) { struct rc_instruction * rcinst; - GLboolean used[MAX_PROGRAM_TEMPS]; - GLuint i; + char used[RC_REGISTER_MAX_INDEX]; + unsigned int i; memset(used, 0, sizeof(used)); for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { - const struct prog_instruction *inst = &rcinst->I; - const GLuint nsrc = _mesa_num_inst_src_regs(inst->Opcode); - const GLuint ndst = _mesa_num_inst_dst_regs(inst->Opcode); - GLuint k; - - for (k = 0; k < nsrc; k++) { - if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) - used[inst->SrcReg[k].Index] = GL_TRUE; + const struct rc_sub_instruction *inst = &rcinst->U.I; + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); + unsigned int k; + + for (k = 0; k < opcode->NumSrcRegs; k++) { + if (inst->SrcReg[k].File == RC_FILE_TEMPORARY) + used[inst->SrcReg[k].Index] = 1; } - if (ndst) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) - used[inst->DstReg.Index] = GL_TRUE; + if (opcode->HasDstReg) { + if (inst->DstReg.File == RC_FILE_TEMPORARY) + used[inst->DstReg.Index] = 1; } } - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) { if (!used[i]) return i; } - return -1; + rc_error(c, "Ran out of temporary registers\n"); + return 0; } @@ -108,24 +128,31 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) { struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); - inst->Prev = 0; - inst->Next = 0; + memset(inst, 0, sizeof(struct rc_instruction)); - _mesa_init_instructions(&inst->I, 1); + inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; return inst; } - -struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst) { - struct rc_instruction * inst = rc_alloc_instruction(c); - inst->Prev = after; inst->Next = after->Next; inst->Prev->Next = inst; inst->Next->Prev = inst; +} + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ + struct rc_instruction * inst = rc_alloc_instruction(c); + + rc_insert_instruction(after, inst); return inst; } @@ -136,76 +163,21 @@ void rc_remove_instruction(struct rc_instruction * inst) inst->Next->Prev = inst->Prev; } - -void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program) -{ - struct prog_instruction *source; - unsigned int i; - - for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) { - struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - dest->I = *source; - } - - c->Program.ShadowSamplers = program->ShadowSamplers; - c->Program.InputsRead = program->InputsRead; - c->Program.OutputsWritten = program->OutputsWritten; - - int isNVProgram = 0; - - if (program->Target == GL_VERTEX_PROGRAM_ARB) { - struct gl_vertex_program * vp = (struct gl_vertex_program *) program; - isNVProgram = vp->IsNVProgram; - } - - if (isNVProgram) { - /* NV_vertex_program has a fixed-sized constant environment. - * This could be handled more efficiently for programs that - * do not use relative addressing. - */ - for(i = 0; i < 96; ++i) { - struct rc_constant constant; - - constant.Type = RC_CONSTANT_EXTERNAL; - constant.Size = 4; - constant.u.External = i; - - rc_constants_add(&c->Program.Constants, &constant); - } - } else { - for(i = 0; i < program->Parameters->NumParameters; ++i) { - struct rc_constant constant; - - constant.Type = RC_CONSTANT_EXTERNAL; - constant.Size = 4; - constant.u.External = i; - - rc_constants_add(&c->Program.Constants, &constant); - } - } -} - - /** - * Print program to stderr, default options. + * Return the number of instructions in the program. */ -void rc_print_program(const struct rc_program *prog) +unsigned int rc_recompute_ips(struct radeon_compiler * c) { - GLuint indent = 0; - GLuint linenum = 1; - struct rc_instruction *inst; - - fprintf(stderr, "# Radeon Compiler Program\n"); + unsigned int ip = 0; + struct rc_instruction * inst; - for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { - fprintf(stderr, "%3d: ", linenum); + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + inst->IP = ip++; + } - /* Massive hack: We rely on the fact that the printers do not actually - * use the gl_program argument (last argument) in debug mode */ - indent = _mesa_fprint_instruction_opt( - stderr, &inst->I, - indent, PROG_PRINT_DEBUG, 0); + c->Program.Instructions.IP = 0xcafedead; - linenum++; - } + return ip; } diff --git a/r300/compiler/radeon_program.h b/r300/compiler/radeon_program.h index 5619586..0359288 100644 --- a/r300/compiler/radeon_program.h +++ b/r300/compiler/radeon_program.h @@ -28,37 +28,144 @@ #ifndef __RADEON_PROGRAM_H_ #define __RADEON_PROGRAM_H_ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" -#include "shader/prog_instruction.h" +#include <stdint.h> +#include <string.h> + +#include "radeon_opcodes.h" +#include "radeon_code.h" +#include "radeon_program_constants.h" +#include "radeon_program_pair.h" struct radeon_compiler; -struct rc_instruction; -struct rc_program; -enum { - PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ +struct rc_src_register { + rc_register_file File:3; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int Swizzle:12; + + /** Take the component-wise absolute value */ + unsigned int Abs:1; + + /** Post-Abs negation. */ + unsigned int Negate:4; +}; + +struct rc_dst_register { + rc_register_file File:3; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int WriteMask:4; +}; + +/** + * Instructions are maintained by the compiler in a doubly linked list + * of these structures. + * + * This instruction format is intended to be expanded for hardware-specific + * trickery. At different stages of compilation, a different set of + * instruction types may be valid. + */ +struct rc_sub_instruction { + struct rc_src_register SrcReg[3]; + struct rc_dst_register DstReg; + + /** + * Opcode of this instruction, according to \ref rc_opcode enums. + */ + rc_opcode Opcode:8; + + /** + * Saturate each value of the result to the range [0,1] or [-1,1], + * according to \ref rc_saturate_mode enums. + */ + rc_saturate_mode SaturateMode:2; + + /** + * Writing to the special register RC_SPECIAL_ALU_RESULT + */ + /*@{*/ + rc_write_aluresult WriteALUResult:2; + rc_compare_func ALUResultCompare:3; + /*@}*/ + + /** + * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. + */ + /*@{*/ + /** Source texture unit. */ + unsigned int TexSrcUnit:5; + + /** Source texture target, one of the \ref rc_texture_target enums */ + rc_texture_target TexSrcTarget:3; + + /** True if tex instruction should do shadow comparison */ + unsigned int TexShadow:1; + /*@}*/ +}; + +typedef enum { + RC_INSTRUCTION_NORMAL = 0, + RC_INSTRUCTION_PAIR +} rc_instruction_type; + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + + rc_instruction_type Type; + union { + struct rc_sub_instruction I; + struct rc_pair_instruction P; + } U; + + /** + * Warning: IPs are not stable. If you want to use them, + * you need to recompute them at the beginning of each pass + * using \ref rc_recompute_ips + */ + unsigned int IP; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; }; enum { - OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */ + OPCODE_REPL_ALPHA = MAX_RC_OPCODE /**< used in paired instructions */ }; -#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) -#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) -static inline GLuint get_swz(GLuint swz, GLuint idx) +static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) { if (idx & 0x4) return idx; return GET_SWZ(swz, idx); } -static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) +static inline unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) { - GLuint ret = 0; + unsigned int ret = 0; ret |= get_swz(src, swz_x); ret |= get_swz(src, swz_y) << 3; @@ -68,22 +175,24 @@ static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, G return ret; } -static inline GLuint combine_swizzles(GLuint src, GLuint swz) +static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) { - GLuint ret = 0; + unsigned int ret = 0; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X)); - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; return ret; } -static INLINE void reset_srcreg(struct prog_src_register* reg) +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +static inline void reset_srcreg(struct rc_src_register* reg) { - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; + memset(reg, 0, sizeof(struct rc_src_register)); + reg->Swizzle = RC_SWIZZLE_XYZW; } @@ -92,13 +201,13 @@ static INLINE void reset_srcreg(struct prog_src_register* reg) * * The function will be called once for each instruction. * It has to either emit the appropriate transformed code for the instruction - * and return GL_TRUE, or return GL_FALSE if it doesn't understand the + * and return true, or return false if it doesn't understand the * instruction. * * The function gets passed the userData as last parameter. */ struct radeon_program_transformation { - GLboolean (*function)( + int (*function)( struct radeon_compiler*, struct rc_instruction*, void*); @@ -110,12 +219,15 @@ void radeonLocalTransform( int num_transformations, struct radeon_program_transformation* transformations); -GLint rc_find_free_temporary(struct radeon_compiler * c); +unsigned int rc_find_free_temporary(struct radeon_compiler * c); struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst); void rc_remove_instruction(struct rc_instruction * inst); +unsigned int rc_recompute_ips(struct radeon_compiler * c); + void rc_print_program(const struct rc_program *prog); #endif diff --git a/r300/compiler/radeon_program_alu.c b/r300/compiler/radeon_program_alu.c index f23ce30..ced66af 100644 --- a/r300/compiler/radeon_program_alu.c +++ b/r300/compiler/radeon_program_alu.c @@ -40,175 +40,164 @@ static struct rc_instruction *emit1( struct radeon_compiler * c, struct rc_instruction * after, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg) + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->I.Opcode = Opcode; - fpi->I.SaturateMode = Saturate; - fpi->I.DstReg = DstReg; - fpi->I.SrcReg[0] = SrcReg; + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg; return fpi; } static struct rc_instruction *emit2( struct radeon_compiler * c, struct rc_instruction * after, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->I.Opcode = Opcode; - fpi->I.SaturateMode = Saturate; - fpi->I.DstReg = DstReg; - fpi->I.SrcReg[0] = SrcReg0; - fpi->I.SrcReg[1] = SrcReg1; + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; return fpi; } static struct rc_instruction *emit3( struct radeon_compiler * c, struct rc_instruction * after, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, - struct prog_src_register SrcReg2) + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, + struct rc_src_register SrcReg2) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->I.Opcode = Opcode; - fpi->I.SaturateMode = Saturate; - fpi->I.DstReg = DstReg; - fpi->I.SrcReg[0] = SrcReg0; - fpi->I.SrcReg[1] = SrcReg1; - fpi->I.SrcReg[2] = SrcReg2; + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + fpi->U.I.SrcReg[2] = SrcReg2; return fpi; } -static struct prog_dst_register dstreg(int file, int index) +static struct rc_dst_register dstreg(int file, int index) { - struct prog_dst_register dst; + struct rc_dst_register dst; dst.File = file; dst.Index = index; - dst.WriteMask = WRITEMASK_XYZW; - dst.CondMask = COND_TR; + dst.WriteMask = RC_MASK_XYZW; dst.RelAddr = 0; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; return dst; } -static struct prog_dst_register dstregtmpmask(int index, int mask) +static struct rc_dst_register dstregtmpmask(int index, int mask) { - struct prog_dst_register dst = {0}; - dst.File = PROGRAM_TEMPORARY; + struct rc_dst_register dst = {0}; + dst.File = RC_FILE_TEMPORARY; dst.Index = index; dst.WriteMask = mask; dst.RelAddr = 0; - dst.CondMask = COND_TR; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; return dst; } -static const struct prog_src_register builtin_zero = { - .File = PROGRAM_BUILTIN, +static const struct rc_src_register builtin_zero = { + .File = RC_FILE_NONE, .Index = 0, - .Swizzle = SWIZZLE_0000 + .Swizzle = RC_SWIZZLE_0000 }; -static const struct prog_src_register builtin_one = { - .File = PROGRAM_BUILTIN, +static const struct rc_src_register builtin_one = { + .File = RC_FILE_NONE, .Index = 0, - .Swizzle = SWIZZLE_1111 + .Swizzle = RC_SWIZZLE_1111 }; -static const struct prog_src_register srcreg_undefined = { - .File = PROGRAM_UNDEFINED, +static const struct rc_src_register srcreg_undefined = { + .File = RC_FILE_NONE, .Index = 0, - .Swizzle = SWIZZLE_NOOP + .Swizzle = RC_SWIZZLE_XYZW }; -static struct prog_src_register srcreg(int file, int index) +static struct rc_src_register srcreg(int file, int index) { - struct prog_src_register src = srcreg_undefined; + struct rc_src_register src = srcreg_undefined; src.File = file; src.Index = index; return src; } -static struct prog_src_register srcregswz(int file, int index, int swz) +static struct rc_src_register srcregswz(int file, int index, int swz) { - struct prog_src_register src = srcreg_undefined; + struct rc_src_register src = srcreg_undefined; src.File = file; src.Index = index; src.Swizzle = swz; return src; } -static struct prog_src_register absolute(struct prog_src_register reg) +static struct rc_src_register absolute(struct rc_src_register reg) { - struct prog_src_register newreg = reg; + struct rc_src_register newreg = reg; newreg.Abs = 1; - newreg.Negate = NEGATE_NONE; + newreg.Negate = RC_MASK_NONE; return newreg; } -static struct prog_src_register negate(struct prog_src_register reg) +static struct rc_src_register negate(struct rc_src_register reg) { - struct prog_src_register newreg = reg; - newreg.Negate = newreg.Negate ^ NEGATE_XYZW; + struct rc_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; return newreg; } -static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) +static struct rc_src_register swizzle(struct rc_src_register reg, + rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) { - struct prog_src_register swizzled = reg; - swizzled.Swizzle = MAKE_SWIZZLE4( - x >= 4 ? x : GET_SWZ(reg.Swizzle, x), - y >= 4 ? y : GET_SWZ(reg.Swizzle, y), - z >= 4 ? z : GET_SWZ(reg.Swizzle, z), - w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); + struct rc_src_register swizzled = reg; + swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); return swizzled; } -static struct prog_src_register scalar(struct prog_src_register reg) +static struct rc_src_register scalar(struct rc_src_register reg) { - return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); + return swizzle(reg, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X); } static void transform_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { - struct prog_src_register src = inst->I.SrcReg[0]; + struct rc_src_register src = inst->U.I.SrcReg[0]; src.Abs = 1; - src.Negate = NEGATE_NONE; - emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src); + src.Negate = RC_MASK_NONE; + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src); rc_remove_instruction(inst); } static void transform_DP3(struct radeon_compiler* c, struct rc_instruction* inst) { - struct prog_src_register src0 = inst->I.SrcReg[0]; - struct prog_src_register src1 = inst->I.SrcReg[1]; - src0.Negate &= ~NEGATE_W; + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= SWIZZLE_ZERO << (3 * 3); - src1.Negate &= ~NEGATE_W; + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; src1.Swizzle &= ~(7 << (3 * 3)); - src1.Swizzle |= SWIZZLE_ZERO << (3 * 3); - emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); rc_remove_instruction(inst); } static void transform_DPH(struct radeon_compiler* c, struct rc_instruction* inst) { - struct prog_src_register src0 = inst->I.SrcReg[0]; - src0.Negate &= ~NEGATE_W; + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + src0.Negate &= ~RC_MASK_W; src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= SWIZZLE_ONE << (3 * 3); - emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]); + src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); rc_remove_instruction(inst); } @@ -219,9 +208,9 @@ static void transform_DPH(struct radeon_compiler* c, static void transform_DST(struct radeon_compiler* c, struct rc_instruction* inst) { - emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg, - swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), - swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); + emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); rc_remove_instruction(inst); } @@ -229,9 +218,9 @@ static void transform_FLR(struct radeon_compiler* c, struct rc_instruction* inst) { int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]); - emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg, - inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); rc_remove_instruction(inst); } @@ -256,64 +245,64 @@ static void transform_FLR(struct radeon_compiler* c, static void transform_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { - GLuint constant; - GLuint constant_swizzle; - GLuint temp; - struct prog_src_register srctemp; + unsigned int constant; + unsigned int constant_swizzle; + unsigned int temp; + struct rc_src_register srctemp; constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); - if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) { + if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { struct rc_instruction * inst_mov; inst_mov = emit1(c, inst, - OPCODE_MOV, 0, inst->I.DstReg, - srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c))); + RC_OPCODE_MOV, 0, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } - temp = inst->I.DstReg.Index; - srctemp = srcreg(PROGRAM_TEMPORARY, temp); + temp = inst->U.I.DstReg.Index; + srctemp = srcreg(RC_FILE_TEMPORARY, temp); // tmp.x = max(0.0, Src.x); // tmp.y = max(0.0, Src.y); // tmp.w = clamp(Src.z, -128+eps, 128-eps); - emit2(c, inst->Prev, OPCODE_MAX, 0, - dstregtmpmask(temp, WRITEMASK_XYW), - inst->I.SrcReg[0], - swizzle(srcreg(PROGRAM_CONSTANT, constant), - SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); - emit2(c, inst->Prev, OPCODE_MIN, 0, - dstregtmpmask(temp, WRITEMASK_Z), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(temp, RC_MASK_XYW), + inst->U.I.SrcReg[0], + swizzle(srcreg(RC_FILE_CONSTANT, constant), + RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, + dstregtmpmask(temp, RC_MASK_Z), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); // tmp.w = Pow(tmp.y, tmp.w) - emit1(c, inst->Prev, OPCODE_LG2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit2(c, inst->Prev, OPCODE_MUL, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); - emit1(c, inst->Prev, OPCODE_EX2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle(srctemp, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y)); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srctemp, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)); + emit1(c, inst->Prev, RC_OPCODE_EX2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W)); // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 - emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, - dstregtmpmask(temp, WRITEMASK_Z), - negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_Z), + negate(swizzle(srctemp, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), builtin_zero); // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 - emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, - dstregtmpmask(temp, WRITEMASK_XYW), - swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_XYW), + swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); rc_remove_instruction(inst); } @@ -323,12 +312,12 @@ static void transform_LRP(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_ADD, 0, - dstreg(PROGRAM_TEMPORARY, tempreg), - inst->I.SrcReg[1], negate(inst->I.SrcReg[2])); - emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, - inst->I.DstReg, - inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + dstreg(RC_FILE_TEMPORARY, tempreg), + inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, + inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]); rc_remove_instruction(inst); } @@ -337,14 +326,14 @@ static void transform_POW(struct radeon_compiler* c, struct rc_instruction* inst) { int tempreg = rc_find_free_temporary(c); - struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); - struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); - tempdst.WriteMask = WRITEMASK_W; - tempsrc.Swizzle = SWIZZLE_WWWW; + struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg); + tempdst.WriteMask = RC_MASK_W; + tempsrc.Swizzle = RC_SWIZZLE_WWWW; - emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0])); - emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1])); - emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc); + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, scalar(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->U.I.SrcReg[1])); + emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); rc_remove_instruction(inst); } @@ -352,7 +341,26 @@ static void transform_POW(struct radeon_compiler* c, static void transform_RSQ(struct radeon_compiler* c, struct rc_instruction* inst) { - inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]); + inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); +} + +static void transform_SEQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SFL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero); + rc_remove_instruction(inst); } static void transform_SGE(struct radeon_compiler* c, @@ -360,9 +368,33 @@ static void transform_SGE(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); - emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -372,9 +404,21 @@ static void transform_SLT(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); - emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SNE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -382,14 +426,14 @@ static void transform_SLT(struct radeon_compiler* c, static void transform_SUB(struct radeon_compiler* c, struct rc_instruction* inst) { - inst->I.Opcode = OPCODE_ADD; - inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]); + inst->U.I.Opcode = RC_OPCODE_ADD; + inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); } static void transform_SWZ(struct radeon_compiler* c, struct rc_instruction* inst) { - inst->I.Opcode = OPCODE_MOV; + inst->U.I.Opcode = RC_OPCODE_MOV; } static void transform_XPD(struct radeon_compiler* c, @@ -397,13 +441,13 @@ static void transform_XPD(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); - emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg, - swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), - swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + negate(srcreg(RC_FILE_TEMPORARY, tempreg))); rc_remove_instruction(inst); } @@ -414,7 +458,7 @@ static void transform_XPD(struct radeon_compiler* c, * no userData necessary. * * Eliminates the following ALU instructions: - * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD + * ABS, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD * using: * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP * @@ -423,27 +467,32 @@ static void transform_XPD(struct radeon_compiler* c, * * @note should be applicable to R300 and R500 fragment programs. */ -GLboolean radeonTransformALU( +int radeonTransformALU( struct radeon_compiler * c, struct rc_instruction* inst, void* unused) { - switch(inst->I.Opcode) { - case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; - case OPCODE_DST: transform_DST(c, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; - case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; - case OPCODE_POW: transform_POW(c, inst); return GL_TRUE; - case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE; - case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE; - case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_DST: transform_DST(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_POW: transform_POW(c, inst); return 1; + case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; + case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; + case RC_OPCODE_SGT: transform_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; + case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; + case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; default: - return GL_FALSE; + return 0; } } @@ -452,37 +501,37 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { /* Note: r500 can take absolute values, but r300 cannot. */ - inst->I.Opcode = OPCODE_MAX; - inst->I.SrcReg[1] = inst->I.SrcReg[0]; - inst->I.SrcReg[1].Negate ^= NEGATE_XYZW; + inst->U.I.Opcode = RC_OPCODE_MAX; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; } /** * For use with radeonLocalTransform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. */ -GLboolean r300_transform_vertex_alu( +int r300_transform_vertex_alu( struct radeon_compiler * c, struct rc_instruction* inst, void* unused) { - switch(inst->I.Opcode) { - case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE; - case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; default: - return GL_FALSE; + return 0; } } -static void sincos_constants(struct radeon_compiler* c, GLuint *constants) +static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) { - static const GLfloat SinCosConsts[2][4] = { + static const float SinCosConsts[2][4] = { { 1.273239545, // 4/PI -0.405284735, // -4/(PI*PI) @@ -511,26 +560,26 @@ static void sincos_constants(struct radeon_compiler* c, GLuint *constants) * MAD dest, tmp.y, weight, tmp.x */ static void sin_approx( - struct radeon_compiler* c, struct rc_instruction * before, - struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) -{ - GLuint tempreg = rc_find_free_temporary(c); - - emit2(c, before->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcreg(PROGRAM_CONSTANT, constants[0])); - emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); - emit3(c, before->Prev, OPCODE_MAD, 0, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + struct radeon_compiler* c, struct rc_instruction * inst, + struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) +{ + unsigned int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + srcreg(RC_FILE_CONSTANT, constants[0])); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y), + absolute(swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + absolute(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)), + negate(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst, + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y), + swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)); } /** @@ -538,81 +587,81 @@ static void sin_approx( * using only the basic instructions * MOV, ADD, MUL, MAD, FRC */ -GLboolean radeonTransformTrigSimple(struct radeon_compiler* c, +int radeonTransformTrigSimple(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != OPCODE_COS && - inst->I.Opcode != OPCODE_SIN && - inst->I.Opcode != OPCODE_SCS) - return GL_FALSE; + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; - GLuint constants[2]; - GLuint tempreg = rc_find_free_temporary(c); + unsigned int constants[2]; + unsigned int tempreg = rc_find_free_temporary(c); sincos_constants(c, constants); - if (inst->I.Opcode == OPCODE_COS) { + if (inst->U.I.Opcode == RC_OPCODE_COS) { // MAD tmp.x, src, 1/(2*PI), 0.75 // FRC tmp.x, tmp.x // MAD tmp.z, tmp.x, 2*PI, -PI - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - sin_approx(c, inst, inst->I.DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), constants); - } else if (inst->I.Opcode == OPCODE_SIN) { - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - sin_approx(c, inst, inst->I.DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), constants); } else { - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg)); - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - struct prog_dst_register dst = inst->I.DstReg; - - dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X; + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); + + struct rc_dst_register dst = inst->U.I.DstReg; + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; sin_approx(c, inst, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), constants); - dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y; + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; sin_approx(c, inst, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y), constants); } rc_remove_instruction(inst); - return GL_TRUE; + return 1; } @@ -624,53 +673,53 @@ GLboolean radeonTransformTrigSimple(struct radeon_compiler* c, * * @warning This transformation implicitly changes the semantics of SIN and COS! */ -GLboolean radeonTransformTrigScale(struct radeon_compiler* c, +int radeonTransformTrigScale(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != OPCODE_COS && - inst->I.Opcode != OPCODE_SIN && - inst->I.Opcode != OPCODE_SCS) - return GL_FALSE; + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; - static const GLfloat RCP_2PI = 0.15915494309189535; - GLuint temp; - GLuint constant; - GLuint constant_swizzle; + static const float RCP_2PI = 0.15915494309189535; + unsigned int temp; + unsigned int constant; + unsigned int constant_swizzle; temp = rc_find_free_temporary(c); constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); - emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), - srcreg(PROGRAM_TEMPORARY, temp)); - - if (inst->I.Opcode == OPCODE_COS) { - emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->I.Opcode == OPCODE_SIN) { - emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, - inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->I.Opcode == OPCODE_SCS) { - struct prog_dst_register moddst = inst->I.DstReg; - - if (inst->I.DstReg.WriteMask & WRITEMASK_X) { - moddst.WriteMask = WRITEMASK_X; - emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + + if (inst->U.I.Opcode == RC_OPCODE_COS) { + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, + inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { + struct rc_dst_register moddst = inst->U.I.DstReg; + + if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { + moddst.WriteMask = RC_MASK_X; + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); } - if (inst->I.DstReg.WriteMask & WRITEMASK_Y) { - moddst.WriteMask = WRITEMASK_Y; - emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { + moddst.WriteMask = RC_MASK_Y; + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); } } rc_remove_instruction(inst); - return GL_TRUE; + return 1; } /** @@ -681,15 +730,15 @@ GLboolean radeonTransformTrigScale(struct radeon_compiler* c, * @warning This explicitly changes the form of DDX and DDY! */ -GLboolean radeonTransformDeriv(struct radeon_compiler* c, +int radeonTransformDeriv(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY) - return GL_FALSE; + if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) + return 0; - inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); - inst->I.SrcReg[1].Negate = NEGATE_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE); + inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; - return GL_TRUE; + return 1; } diff --git a/r300/compiler/radeon_program_alu.h b/r300/compiler/radeon_program_alu.h index 147efec..7cb5f84 100644 --- a/r300/compiler/radeon_program_alu.h +++ b/r300/compiler/radeon_program_alu.h @@ -30,27 +30,27 @@ #include "radeon_program.h" -GLboolean radeonTransformALU( +int radeonTransformALU( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean r300_transform_vertex_alu( +int r300_transform_vertex_alu( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean radeonTransformTrigSimple( +int radeonTransformTrigSimple( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean radeonTransformTrigScale( +int radeonTransformTrigScale( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean radeonTransformDeriv( +int radeonTransformDeriv( struct radeon_compiler * c, struct rc_instruction * inst, void*); diff --git a/r300/compiler/radeon_program_constants.h b/r300/compiler/radeon_program_constants.h new file mode 100644 index 0000000..7c0d672 --- /dev/null +++ b/r300/compiler/radeon_program_constants.h @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_PROGRAM_CONSTANTS_H +#define RADEON_PROGRAM_CONSTANTS_H + +typedef enum { + RC_SATURATE_NONE = 0, + RC_SATURATE_ZERO_ONE, + RC_SATURATE_MINUS_PLUS_ONE +} rc_saturate_mode; + +typedef enum { + RC_TEXTURE_2D_ARRAY, + RC_TEXTURE_1D_ARRAY, + RC_TEXTURE_CUBE, + RC_TEXTURE_3D, + RC_TEXTURE_RECT, + RC_TEXTURE_2D, + RC_TEXTURE_1D +} rc_texture_target; + +typedef enum { + /** + * Used to indicate unused register descriptions and + * source register that use a constant swizzle. + */ + RC_FILE_NONE = 0, + RC_FILE_TEMPORARY, + + /** + * Input register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_INPUT, + + /** + * Output register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_OUTPUT, + RC_FILE_ADDRESS, + + /** + * Indicates a constant from the \ref rc_constant_list . + */ + RC_FILE_CONSTANT, + + /** + * Indicates a special register, see RC_SPECIAL_xxx. + */ + RC_FILE_SPECIAL +} rc_register_file; + +enum { + /** R500 fragment program ALU result "register" */ + RC_SPECIAL_ALU_RESULT = 0, + + /** Must be last */ + RC_NUM_SPECIAL_REGISTERS +}; + +#define RC_REGISTER_INDEX_BITS 10 +#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) + +typedef enum { + RC_SWIZZLE_X = 0, + RC_SWIZZLE_Y, + RC_SWIZZLE_Z, + RC_SWIZZLE_W, + RC_SWIZZLE_ZERO, + RC_SWIZZLE_ONE, + RC_SWIZZLE_HALF, + RC_SWIZZLE_UNUSED +} rc_swizzle; + +#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) +#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) +#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) +#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) +#define SET_SWZ(swz, idx, newv) \ + do { \ + (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \ + } while(0) + +#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) +#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) +#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) +#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) +#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) +#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) + +/** + * \name Bitmasks for components of vectors. + * + * Used for write masks, negation masks, etc. + */ +/*@{*/ +#define RC_MASK_NONE 0 +#define RC_MASK_X 1 +#define RC_MASK_Y 2 +#define RC_MASK_Z 4 +#define RC_MASK_W 8 +#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) +#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) +#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) +#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) +/*@}*/ + +typedef enum { + RC_ALURESULT_NONE = 0, + RC_ALURESULT_X, + RC_ALURESULT_W +} rc_write_aluresult; + +#endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/r300/compiler/radeon_program_pair.c b/r300/compiler/radeon_program_pair.c index 4c26db5..ee83959 100644 --- a/r300/compiler/radeon_program_pair.c +++ b/r300/compiler/radeon_program_pair.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Nicolai Haehnle. + * Copyright (C) 2008-2009 Nicolai Haehnle. * * All Rights Reserved. * @@ -25,584 +25,29 @@ * */ -/** - * @file - * - * Perform temporary register allocation and attempt to pair off instructions - * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction - * vs. ALU instruction scheduling. - */ - #include "radeon_program_pair.h" -#include "memory_pool.h" -#include "radeon_compiler.h" -#include "shader/prog_print.h" - -#define error(fmt, args...) do { \ - rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ -} while(0) - -struct pair_state_instruction { - struct prog_instruction Instruction; - GLuint IP; /**< Position of this instruction in original program */ - - GLuint IsTex:1; /**< Is a texture instruction */ - GLuint NeedRGB:1; /**< Needs the RGB ALU */ - GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ - GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ - - /** - * Number of (read and write) dependencies that must be resolved before - * this instruction can be scheduled. - */ - GLuint NumDependencies:5; - - /** - * Next instruction in the linked list of ready instructions. - */ - struct pair_state_instruction *NextReady; - - /** - * Values that this instruction writes - */ - struct reg_value *Values[4]; -}; - - -/** - * Used to keep track of which instructions read a value. - */ -struct reg_value_reader { - struct pair_state_instruction *Reader; - struct reg_value_reader *Next; -}; - -/** - * Used to keep track which values are stored in each component of a - * PROGRAM_TEMPORARY. - */ -struct reg_value { - struct pair_state_instruction *Writer; - struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ - - /** - * Unordered linked list of instructions that read from this value. - */ - struct reg_value_reader *Readers; - - /** - * Number of readers of this value. This is calculated during @ref scan_instructions - * and continually decremented during code emission. - * When this count reaches zero, the instruction that writes the @ref Next value - * can be scheduled. - */ - GLuint NumReaders; -}; - -/** - * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register - * to the proper hardware temporary. - */ -struct pair_register_translation { - GLuint Allocated:1; - GLuint HwIndex:8; - GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ - - /** - * Notes the value that is currently contained in each component - * (only used for PROGRAM_TEMPORARY registers). - */ - struct reg_value *Value[4]; -}; - -struct pair_state { - struct r300_fragment_program_compiler * Compiler; - const struct radeon_pair_handler *Handler; - GLboolean Verbose; - void *UserData; - - /** - * Translate Mesa registers to hardware registers - */ - struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; - struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; - - struct { - GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ - } HwTemps[128]; - - /** - * Linked list of instructions that can be scheduled right now, - * based on which ALU/TEX resources they require. - */ - struct pair_state_instruction *ReadyFullALU; - struct pair_state_instruction *ReadyRGB; - struct pair_state_instruction *ReadyAlpha; - struct pair_state_instruction *ReadyTEX; -}; - - -static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index) -{ - switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_INPUT: return &s->Inputs[index]; - default: return 0; - } -} - -static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex) -{ - struct pair_register_translation *t = get_register(s, file, index); - ASSERT(!s->HwTemps[hwindex].RefCount); - ASSERT(!t->Allocated); - s->HwTemps[hwindex].RefCount = t->RefCount; - t->Allocated = 1; - t->HwIndex = hwindex; -} - -static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) -{ - GLuint hwindex; - - struct pair_register_translation *t = get_register(s, file, index); - if (!t) { - error("get_hw_reg: %i[%i]\n", file, index); - return 0; - } - - if (t->Allocated) - return t->HwIndex; - - for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) - if (!s->HwTemps[hwindex].RefCount) - break; - - if (hwindex >= s->Handler->MaxHwTemps) { - error("Ran out of hardware temporaries"); - return 0; - } - - alloc_hw_reg(s, file, index, hwindex); - return hwindex; -} - - -static void deref_hw_reg(struct pair_state *s, GLuint hwindex) -{ - if (!s->HwTemps[hwindex].RefCount) { - error("Hwindex %i refcount error", hwindex); - return; - } - - s->HwTemps[hwindex].RefCount--; -} - -static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst) -{ - pairinst->NextReady = *list; - *list = pairinst; -} - -/** - * The given instruction has become ready. Link it into the ready - * instructions. - */ -static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst) -{ - if (s->Verbose) - _mesa_printf("instruction_ready(%i)\n", pairinst->IP); - - if (pairinst->IsTex) - add_pairinst_to_list(&s->ReadyTEX, pairinst); - else if (!pairinst->NeedAlpha) - add_pairinst_to_list(&s->ReadyRGB, pairinst); - else if (!pairinst->NeedRGB) - add_pairinst_to_list(&s->ReadyAlpha, pairinst); - else - add_pairinst_to_list(&s->ReadyFullALU, pairinst); -} - - -/** - * Finally rewrite ADD, MOV, MUL as the appropriate native instruction - * and reverse the order of arguments for CMP. - */ -static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) -{ - struct prog_src_register tmp; - - switch(inst->Opcode) { - case OPCODE_ADD: - inst->SrcReg[2] = inst->SrcReg[1]; - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[1].Negate = NEGATE_NONE; - inst->Opcode = OPCODE_MAD; - break; - case OPCODE_CMP: - tmp = inst->SrcReg[2]; - inst->SrcReg[2] = inst->SrcReg[0]; - inst->SrcReg[0] = tmp; - break; - case OPCODE_MOV: - /* AMD say we should use CMP. - * However, when we transform - * KIL -r0; - * into - * CMP tmp, -r0, -r0, 0; - * KIL tmp; - * we get incorrect behaviour on R500 when r0 == 0.0. - * It appears that the R500 KIL hardware treats -0.0 as less - * than zero. - */ - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; - break; - case OPCODE_MUL: - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; - break; - default: - /* nothing to do */ - break; - } -} - - -/** - * Classify an instruction according to which ALUs etc. it needs - */ -static void classify_instruction(struct pair_state *s, - struct pair_state_instruction *psi) -{ - psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; - psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; - - switch(psi->Instruction.Opcode) { - case OPCODE_ADD: - case OPCODE_CMP: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MAD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MOV: - case OPCODE_MUL: - break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - psi->IsTranscendent = 1; - psi->NeedAlpha = 1; - break; - case OPCODE_DP4: - psi->NeedAlpha = 1; - /* fall through */ - case OPCODE_DP3: - psi->NeedRGB = 1; - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - case OPCODE_END: - psi->IsTex = 1; - break; - default: - error("Unknown opcode %d\n", psi->Instruction.Opcode); - break; - } -} - - -/** - * Count which (input, temporary) register is read and written how often, - * and scan the instruction stream to find dependencies. - */ -static void scan_instructions(struct pair_state *s) -{ - struct rc_instruction *source; - GLuint ip; - - for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0; - source != &s->Compiler->Base.Program.Instructions; - source = source->Next, ++ip) { - struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst)); - memset(pairinst, 0, sizeof(struct pair_state_instruction)); - - pairinst->Instruction = source->I; - pairinst->IP = ip; - final_rewrite(s, &pairinst->Instruction); - classify_instruction(s, pairinst); - - int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode); - int j; - for(j = 0; j < nsrc; j++) { - struct pair_register_translation *t = - get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index); - if (!t) - continue; - - t->RefCount++; - - if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) { - int i; - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i); - if (swz >= 4) - continue; /* constant or NIL swizzle */ - if (!t->Value[swz]) - continue; /* this is an undefined read */ - - /* Do not add a dependency if this instruction - * also rewrites the value. The code below adds - * a dependency for the DstReg, which is a superset - * of the SrcReg dependency. */ - if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY && - pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index && - GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz)) - continue; - - struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r)); - pairinst->NumDependencies++; - t->Value[swz]->NumReaders++; - r->Reader = pairinst; - r->Next = t->Value[swz]->Readers; - t->Value[swz]->Readers = r; - } - } - } - - int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode); - if (ndst) { - struct pair_register_translation *t = - get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index); - if (t) { - t->RefCount++; - - if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) { - int j; - for(j = 0; j < 4; ++j) { - if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j)) - continue; - - struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v)); - memset(v, 0, sizeof(struct reg_value)); - v->Writer = pairinst; - if (t->Value[j]) { - pairinst->NumDependencies++; - t->Value[j]->Next = v; - } - t->Value[j] = v; - pairinst->Values[j] = v; - } - } - } - } - - if (s->Verbose) - _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); - - if (!pairinst->NumDependencies) - instruction_ready(s, pairinst); - } - - /* Clear the PROGRAM_TEMPORARY state */ - int i, j; - for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { - for(j = 0; j < 4; ++j) - s->Temps[i].Value[j] = 0; - } -} - - -static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst) -{ - ASSERT(pairinst->NumDependencies > 0); - if (!--pairinst->NumDependencies) - instruction_ready(s, pairinst); -} /** - * Update the dependency tracking state based on what the instruction - * at the given IP does. + * Return the source slot where we installed the given register access, + * or -1 if no slot was free anymore. */ -static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst) -{ - struct prog_instruction *inst = &pairinst->Instruction; - - if (s->Verbose) - _mesa_printf("commit_instruction(%i)\n", pairinst->IP); - - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; - deref_hw_reg(s, t->HwIndex); - - int i; - for(i = 0; i < 4; ++i) { - if (!GET_BIT(inst->DstReg.WriteMask, i)) - continue; - - t->Value[i] = pairinst->Values[i]; - if (t->Value[i]->NumReaders) { - struct reg_value_reader *r; - for(r = pairinst->Values[i]->Readers; r; r = r->Next) - decrement_dependencies(s, r->Reader); - } else if (t->Value[i]->Next) { - /* This happens when the only reader writes - * the register at the same time */ - decrement_dependencies(s, t->Value[i]->Next->Writer); - } - } - } - - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); - int i; - for(i = 0; i < nsrc; i++) { - struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); - if (!t) - continue; - - deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); - - if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) - continue; - - int j; - for(j = 0; j < 4; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz >= 4) - continue; - if (!t->Value[swz]) - continue; - - /* Do not free a dependency if this instruction - * also rewrites the value. See scan_instructions. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == inst->SrcReg[i].Index && - GET_BIT(inst->DstReg.WriteMask, swz)) - continue; - - if (!--t->Value[swz]->NumReaders) { - if (t->Value[swz]->Next) - decrement_dependencies(s, t->Value[swz]->Next->Writer); - } - } - } -} - - -/** - * Emit all ready texture instructions in a single block. - * - * Emit as a single block to (hopefully) sample many textures in parallel, - * and to avoid hardware indirections on R300. - * - * In R500, we don't really know when the result of a texture instruction - * arrives. So allocate all destinations first, to make sure they do not - * arrive early and overwrite a texture coordinate we're going to use later - * in the block. - */ -static void emit_all_tex(struct pair_state *s) -{ - struct pair_state_instruction *readytex; - struct pair_state_instruction *pairinst; - - ASSERT(s->ReadyTEX); - - // Don't let the ready list change under us! - readytex = s->ReadyTEX; - s->ReadyTEX = 0; - - // Allocate destination hardware registers in one block to avoid conflicts. - for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - struct prog_instruction *inst = &pairinst->Instruction; - if (inst->Opcode != OPCODE_KIL) - get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - } - - if (s->Compiler->Base.Debug) - _mesa_printf(" BEGIN_TEX\n"); - - if (s->Handler->BeginTexBlock) - s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData); - - for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - struct prog_instruction *inst = &pairinst->Instruction; - commit_instruction(s, pairinst); - - if (inst->Opcode != OPCODE_KIL) - inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); - - if (s->Compiler->Base.Debug) { - _mesa_printf(" "); - _mesa_print_instruction(inst); - fflush(stderr); - } - - struct radeon_pair_texture_instruction rpti; - - switch(inst->Opcode) { - case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break; - case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break; - case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break; - default: - case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break; - } - - rpti.DestIndex = inst->DstReg.Index; - rpti.WriteMask = inst->DstReg.WriteMask; - rpti.TexSrcUnit = inst->TexSrcUnit; - rpti.TexSrcTarget = inst->TexSrcTarget; - rpti.SrcIndex = inst->SrcReg[0].Index; - rpti.SrcSwizzle = inst->SrcReg[0].Swizzle; - - s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti); - } - - if (s->Compiler->Base.Debug) - _mesa_printf(" END_TEX\n"); -} - - -static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, - struct prog_src_register src, GLboolean rgb, GLboolean alpha) +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index) { int candidate = -1; int candidate_quality = -1; int i; - if (!rgb && !alpha) + if ((!rgb && !alpha) || file == RC_FILE_NONE) return 0; - GLuint constant; - GLuint index; - - if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { - constant = 0; - index = get_hw_reg(s, src.File, src.Index); - } else { - constant = 1; - index = src.Index; - } - for(i = 0; i < 3; ++i) { int q = 0; if (rgb) { if (pair->RGB.Src[i].Used) { - if (pair->RGB.Src[i].Constant != constant || + if (pair->RGB.Src[i].File != file || pair->RGB.Src[i].Index != index) continue; q++; @@ -610,7 +55,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio } if (alpha) { if (pair->Alpha.Src[i].Used) { - if (pair->Alpha.Src[i].Constant != constant || + if (pair->Alpha.Src[i].File != file || pair->Alpha.Src[i].Index != index) continue; q++; @@ -625,334 +70,15 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio if (candidate >= 0) { if (rgb) { pair->RGB.Src[candidate].Used = 1; - pair->RGB.Src[candidate].Constant = constant; + pair->RGB.Src[candidate].File = file; pair->RGB.Src[candidate].Index = index; } if (alpha) { pair->Alpha.Src[candidate].Used = 1; - pair->Alpha.Src[candidate].Constant = constant; + pair->Alpha.Src[candidate].File = file; pair->Alpha.Src[candidate].Index = index; } } return candidate; } - -/** - * Fill the given ALU instruction's opcodes and source operands into the given pair, - * if possible. - */ -static GLboolean fill_instruction_into_pair( - struct pair_state *s, - struct radeon_pair_instruction *pair, - struct pair_state_instruction *pairinst) -{ - struct prog_instruction *inst = &pairinst->Instruction; - - ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); - ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); - - if (pairinst->NeedRGB) { - if (pairinst->IsTranscendent) - pair->RGB.Opcode = OPCODE_REPL_ALPHA; - else - pair->RGB.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - pair->RGB.Saturate = 1; - } - if (pairinst->NeedAlpha) { - pair->Alpha.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - pair->Alpha.Saturate = 1; - } - - int nargs = _mesa_num_inst_src_regs(inst->Opcode); - int i; - - /* Special case for DDX/DDY (MDH/MDV). */ - if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { - if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) - return GL_FALSE; - else - nargs++; - } - - for(i = 0; i < nargs; ++i) { - int source; - if (pairinst->NeedRGB && !pairinst->IsTranscendent) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; - int j; - for(j = 0; j < 3; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz < 3) - srcrgb = GL_TRUE; - else if (swz < 4) - srcalpha = GL_TRUE; - } - source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); - if (source < 0) - return GL_FALSE; - pair->RGB.Arg[i].Source = source; - pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; - pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z)); - } - if (pairinst->NeedAlpha) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); - if (swz < 3) - srcrgb = GL_TRUE; - else if (swz < 4) - srcalpha = GL_TRUE; - source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); - if (source < 0) - return GL_FALSE; - pair->Alpha.Arg[i].Source = source; - pair->Alpha.Arg[i].Swizzle = swz; - pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W); - } - } - - return GL_TRUE; -} - - -/** - * Fill in the destination register information. - * - * This is split from filling in source registers because we want - * to avoid allocating hardware temporaries for destinations until - * we are absolutely certain that we're going to emit a certain - * instruction pairing. - */ -static void fill_dest_into_pair( - struct pair_state *s, - struct radeon_pair_instruction *pair, - struct pair_state_instruction *pairinst) -{ - struct prog_instruction *inst = &pairinst->Instruction; - - if (inst->DstReg.File == PROGRAM_OUTPUT) { - if (inst->DstReg.Index == s->Compiler->OutputColor) { - pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; - pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == s->Compiler->OutputDepth) { - pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } - } else { - GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - if (pairinst->NeedRGB) { - pair->RGB.DestIndex = hwindex; - pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; - } - if (pairinst->NeedAlpha) { - pair->Alpha.DestIndex = hwindex; - pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } - } -} - - -/** - * Find a good ALU instruction or pair of ALU instruction and emit it. - * - * Prefer emitting full ALU instructions, so that when we reach a point - * where no full ALU instruction can be emitted, we have more candidates - * for RGB/Alpha pairing. - */ -static void emit_alu(struct pair_state *s) -{ - struct radeon_pair_instruction pair; - struct pair_state_instruction *psi; - - if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - if (s->ReadyFullALU) { - psi = s->ReadyFullALU; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - } else if (s->ReadyRGB) { - psi = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else { - psi = s->ReadyAlpha; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } - - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, psi); - fill_dest_into_pair(s, &pair, psi); - commit_instruction(s, psi); - } else { - struct pair_state_instruction **prgb; - struct pair_state_instruction **palpha; - - /* Some pairings might fail because they require too - * many source slots; try all possible pairings if necessary */ - for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { - for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { - struct pair_state_instruction * psirgb = *prgb; - struct pair_state_instruction * psialpha = *palpha; - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, psirgb); - if (!fill_instruction_into_pair(s, &pair, psialpha)) - continue; - *prgb = (*prgb)->NextReady; - *palpha = (*palpha)->NextReady; - fill_dest_into_pair(s, &pair, psirgb); - fill_dest_into_pair(s, &pair, psialpha); - commit_instruction(s, psirgb); - commit_instruction(s, psialpha); - goto success; - } - } - - /* No success in pairing; just take the first RGB instruction */ - psi = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; - - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, psi); - fill_dest_into_pair(s, &pair, psi); - commit_instruction(s, psi); - success: ; - } - - if (s->Compiler->Base.Debug) - radeonPrintPairInstruction(&pair); - - s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair); -} - -/* Callback function for assigning input registers to hardware registers */ -static void alloc_helper(void * data, unsigned input, unsigned hwreg) -{ - struct pair_state * s = data; - alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg); -} - -void radeonPairProgram( - struct r300_fragment_program_compiler * compiler, - const struct radeon_pair_handler* handler, void *userdata) -{ - struct pair_state s; - - _mesa_bzero(&s, sizeof(s)); - s.Compiler = compiler; - s.Handler = handler; - s.UserData = userdata; - s.Verbose = GL_FALSE && s.Compiler->Base.Debug; - - if (s.Compiler->Base.Debug) - _mesa_printf("Emit paired program\n"); - - scan_instructions(&s); - s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s); - - while(!s.Compiler->Base.Error && - (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { - if (s.ReadyTEX) - emit_all_tex(&s); - - while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) - emit_alu(&s); - } - - if (s.Compiler->Base.Debug) - _mesa_printf(" END\n"); -} - - -static void print_pair_src(int i, struct radeon_pair_instruction_source* src) -{ - _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); -} - -static const char* opcode_string(GLuint opcode) -{ - if (opcode == OPCODE_REPL_ALPHA) - return "SOP"; - else - return _mesa_opcode_string(opcode); -} - -static int num_pairinst_args(GLuint opcode) -{ - if (opcode == OPCODE_REPL_ALPHA) - return 0; - else - return _mesa_num_inst_src_regs(opcode); -} - -static char swizzle_char(GLuint swz) -{ - switch(swz) { - case SWIZZLE_X: return 'x'; - case SWIZZLE_Y: return 'y'; - case SWIZZLE_Z: return 'z'; - case SWIZZLE_W: return 'w'; - case SWIZZLE_ZERO: return '0'; - case SWIZZLE_ONE: return '1'; - case SWIZZLE_NIL: return '_'; - default: return '?'; - } -} - -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) -{ - int nargs; - int i; - - _mesa_printf(" RGB: "); - for(i = 0; i < 3; ++i) { - if (inst->RGB.Src[i].Used) - print_pair_src(i, inst->RGB.Src + i); - } - _mesa_printf("\n"); - _mesa_printf(" Alpha:"); - for(i = 0; i < 3; ++i) { - if (inst->Alpha.Src[i].Used) - print_pair_src(i, inst->Alpha.Src + i); - } - _mesa_printf("\n"); - - _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); - if (inst->RGB.WriteMask) - _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, - (inst->RGB.WriteMask & 1) ? "x" : "", - (inst->RGB.WriteMask & 2) ? "y" : "", - (inst->RGB.WriteMask & 4) ? "z" : ""); - if (inst->RGB.OutputWriteMask) - _mesa_printf(" COLOR.%s%s%s", - (inst->RGB.OutputWriteMask & 1) ? "x" : "", - (inst->RGB.OutputWriteMask & 2) ? "y" : "", - (inst->RGB.OutputWriteMask & 4) ? "z" : ""); - nargs = num_pairinst_args(inst->RGB.Opcode); - for(i = 0; i < nargs; ++i) { - const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; - const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), - abs); - } - _mesa_printf("\n"); - - _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); - if (inst->Alpha.WriteMask) - _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); - if (inst->Alpha.OutputWriteMask) - _mesa_printf(" COLOR.w"); - if (inst->Alpha.DepthWriteMask) - _mesa_printf(" DEPTH.w"); - nargs = num_pairinst_args(inst->Alpha.Opcode); - for(i = 0; i < nargs; ++i) { - const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; - const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, - swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); - } - _mesa_printf("\n"); -} diff --git a/r300/compiler/radeon_program_pair.h b/r300/compiler/radeon_program_pair.h index ff76178..1600598 100644 --- a/r300/compiler/radeon_program_pair.h +++ b/r300/compiler/radeon_program_pair.h @@ -28,116 +28,97 @@ #ifndef __RADEON_PROGRAM_PAIR_H_ #define __RADEON_PROGRAM_PAIR_H_ -#include "radeon_program.h" +#include "radeon_code.h" +#include "radeon_opcodes.h" +#include "radeon_program_constants.h" struct r300_fragment_program_compiler; /** - * Represents a paired instruction, as found in R300 and R500 + * \file + * Represents a paired ALU instruction, as found in R300 and R500 * fragment programs. + * + * Note that this representation is taking some liberties as far + * as register files are concerned, to allow separate register + * allocation. + * + * Also note that there are some subtleties in that the semantics + * of certain opcodes are implicitly changed in this representation; + * see \ref rc_pair_translate */ + + struct radeon_pair_instruction_source { - GLuint Index:8; - GLuint Constant:1; - GLuint Used:1; + unsigned int Used:1; + rc_register_file File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; }; struct radeon_pair_instruction_rgb { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:3; - GLuint OutputWriteMask:3; - GLuint Saturate:1; + rc_opcode Opcode:8; + unsigned int DestIndex:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:3; + unsigned int OutputWriteMask:3; + unsigned int Saturate:1; struct radeon_pair_instruction_source Src[3]; struct { - GLuint Source:2; - GLuint Swizzle:9; - GLuint Abs:1; - GLuint Negate:1; + unsigned int Source:2; + unsigned int Swizzle:9; + unsigned int Abs:1; + unsigned int Negate:1; } Arg[3]; }; struct radeon_pair_instruction_alpha { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:1; - GLuint OutputWriteMask:1; - GLuint DepthWriteMask:1; - GLuint Saturate:1; + rc_opcode Opcode:8; + unsigned int DestIndex:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:1; + unsigned int OutputWriteMask:1; + unsigned int DepthWriteMask:1; + unsigned int Saturate:1; struct radeon_pair_instruction_source Src[3]; struct { - GLuint Source:2; - GLuint Swizzle:3; - GLuint Abs:1; - GLuint Negate:1; + unsigned int Source:2; + unsigned int Swizzle:3; + unsigned int Abs:1; + unsigned int Negate:1; } Arg[3]; }; -struct radeon_pair_instruction { +struct rc_pair_instruction { struct radeon_pair_instruction_rgb RGB; struct radeon_pair_instruction_alpha Alpha; -}; - -enum { - RADEON_OPCODE_TEX = 0, - RADEON_OPCODE_TXB, - RADEON_OPCODE_TXP, - RADEON_OPCODE_KIL + rc_write_aluresult WriteALUResult:2; + rc_compare_func ALUResultCompare:3; }; -struct radeon_pair_texture_instruction { - GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */ - - GLuint DestIndex:8; - GLuint WriteMask:4; - GLuint TexSrcUnit:5; - GLuint TexSrcTarget:3; - - GLuint SrcIndex:8; - GLuint SrcSwizzle:12; -}; +/** + * General helper functions for dealing with the paired instruction format. + */ +/*@{*/ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index); +/*@}*/ /** - * + * Compiler passes that operate with the paired format. */ -struct radeon_pair_handler { - /** - * Write a paired instruction to the hardware. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*); - - /** - * Write a texture instruction to the hardware. - * Register indices have already been rewritten to the allocated - * hardware register numbers. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*); - - /** - * Called before a block of contiguous, independent texture - * instructions is emitted. - */ - GLboolean (*BeginTexBlock)(void*); - - unsigned MaxHwTemps; -}; - -void radeonPairProgram( - struct r300_fragment_program_compiler * compiler, - const struct radeon_pair_handler*, void *userdata); +/*@{*/ +struct radeon_pair_handler; -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); +void rc_pair_translate(struct r300_fragment_program_compiler *c); +void rc_pair_schedule(struct r300_fragment_program_compiler *c); +void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps); +/*@}*/ #endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/r300/compiler/radeon_program_print.c b/r300/compiler/radeon_program_print.c new file mode 100644 index 0000000..c980f5c --- /dev/null +++ b/r300/compiler/radeon_program_print.c @@ -0,0 +1,301 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_program.h" + +#include <stdio.h> + +static const char * textarget_to_string(rc_texture_target target) +{ + switch(target) { + case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; + case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; + case RC_TEXTURE_CUBE: return "CUBE"; + case RC_TEXTURE_3D: return "3D"; + case RC_TEXTURE_RECT: return "RECT"; + case RC_TEXTURE_2D: return "2D"; + case RC_TEXTURE_1D: return "1D"; + default: return "BAD_TEXTURE_TARGET"; + } +} + +static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) +{ + if (func == RC_COMPARE_FUNC_NEVER) { + fprintf(f, "false"); + } else if (func == RC_COMPARE_FUNC_ALWAYS) { + fprintf(f, "true"); + } else { + const char * op; + switch(func) { + case RC_COMPARE_FUNC_LESS: op = "<"; break; + case RC_COMPARE_FUNC_EQUAL: op = "=="; break; + case RC_COMPARE_FUNC_LEQUAL: op = "<="; break; + case RC_COMPARE_FUNC_GREATER: op = ">"; break; + case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break; + case RC_COMPARE_FUNC_GEQUAL: op = ">="; break; + default: op = "???"; break; + } + fprintf(f, "%s %s %s", lhs, op, rhs); + } +} + +static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) +{ + if (file == RC_FILE_NONE) { + fprintf(f, "none"); + } else if (file == RC_FILE_SPECIAL) { + switch(index) { + case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; + default: fprintf(f, "special[%i]", index); break; + } + } else { + const char * filename; + switch(file) { + case RC_FILE_TEMPORARY: filename = "temp"; break; + case RC_FILE_INPUT: filename = "input"; break; + case RC_FILE_OUTPUT: filename = "output"; break; + case RC_FILE_ADDRESS: filename = "addr"; break; + case RC_FILE_CONSTANT: filename = "const"; break; + default: filename = "BAD FILE"; break; + } + fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); + } +} + +static void rc_print_mask(FILE * f, unsigned int mask) +{ + if (mask & RC_MASK_X) fprintf(f, "x"); + if (mask & RC_MASK_Y) fprintf(f, "y"); + if (mask & RC_MASK_Z) fprintf(f, "z"); + if (mask & RC_MASK_W) fprintf(f, "w"); +} + +static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) +{ + rc_print_register(f, dst.File, dst.Index, dst.RelAddr); + if (dst.WriteMask != RC_MASK_XYZW) { + fprintf(f, "."); + rc_print_mask(f, dst.WriteMask); + } +} + +static char rc_swizzle_char(unsigned int swz) +{ + switch(swz) { + case RC_SWIZZLE_X: return 'x'; + case RC_SWIZZLE_Y: return 'y'; + case RC_SWIZZLE_Z: return 'z'; + case RC_SWIZZLE_W: return 'w'; + case RC_SWIZZLE_ZERO: return '0'; + case RC_SWIZZLE_ONE: return '1'; + case RC_SWIZZLE_HALF: return 'H'; + case RC_SWIZZLE_UNUSED: return '_'; + } + return '?'; +} + +static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) +{ + unsigned int comp; + for(comp = 0; comp < 4; ++comp) { + rc_swizzle swz = GET_SWZ(swizzle, comp); + if (GET_BIT(negate, comp)) + fprintf(f, "-"); + fprintf(f, "%c", rc_swizzle_char(swz)); + } +} + +static void rc_print_src_register(FILE * f, struct rc_src_register src) +{ + int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); + + if (src.Negate == RC_MASK_XYZW) + fprintf(f, "-"); + if (src.Abs) + fprintf(f, "|"); + + rc_print_register(f, src.File, src.Index, src.RelAddr); + + if (src.Abs && !trivial_negate) + fprintf(f, "|"); + + if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { + fprintf(f, "."); + rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); + } + + if (src.Abs && trivial_negate) + fprintf(f, "|"); +} + +static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int reg; + + fprintf(f, "%s", opcode->Name); + + switch(inst->U.I.SaturateMode) { + case RC_SATURATE_NONE: break; + case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; + case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; + default: fprintf(f, "_BAD_SAT"); break; + } + + if (opcode->HasDstReg) { + fprintf(f, " "); + rc_print_dst_register(f, inst->U.I.DstReg); + if (opcode->NumSrcRegs) + fprintf(f, ","); + } + + for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { + if (reg > 0) + fprintf(f, ","); + fprintf(f, " "); + rc_print_src_register(f, inst->U.I.SrcReg[reg]); + } + + if (opcode->HasTexture) { + fprintf(f, ", %s%s[%u]", + textarget_to_string(inst->U.I.TexSrcTarget), + inst->U.I.TexShadow ? "SHADOW" : "", + inst->U.I.TexSrcUnit); + } + + fprintf(f, ";"); + + if (inst->U.I.WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, + (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", + inst->U.I.ALUResultCompare, "0"); + fprintf(f, ")]"); + } + + fprintf(f, "\n"); +} + +static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + int printedsrc = 0; + unsigned int src, arg; + + for(src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.xyz = ", src); + rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); + printedsrc = 1; + } + if (inst->Alpha.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.w = ", src); + rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); + printedsrc = 1; + } + } + fprintf(f, "\n"); + + if (inst->RGB.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + + fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + fprintf(f, " color.%s%s%s", + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + if (inst->WriteALUResult == RC_ALURESULT_X) + fprintf(f, " aluresult"); + + for (arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source, + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), + abs); + } + fprintf(f, "\n"); + } + + if (inst->Alpha.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + + fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + fprintf(f, " color.w"); + if (inst->Alpha.DepthWriteMask) + fprintf(f, " depth.w"); + if (inst->WriteALUResult == RC_ALURESULT_W) + fprintf(f, " aluresult"); + + for(arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source, + rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs); + } + fprintf(f, "\n"); + } + + if (inst->WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); + fprintf(f, ")]\n"); + } +} + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog) +{ + unsigned int linenum = 0; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program\n"); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + if (inst->Type == RC_INSTRUCTION_PAIR) + rc_print_pair_instruction(stderr, inst); + else + rc_print_normal_instruction(stderr, inst); + + linenum++; + } +} diff --git a/r300/compiler/radeon_swizzle.h b/r300/compiler/radeon_swizzle.h new file mode 100644 index 0000000..c81d5f7 --- /dev/null +++ b/r300/compiler/radeon_swizzle.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_SWIZZLE_H +#define RADEON_SWIZZLE_H + +#include "radeon_program.h" + +struct rc_swizzle_split { + unsigned char NumPhases; + unsigned char Phase[4]; +}; + +/** + * Describe the swizzling capability of target hardware. + */ +struct rc_swizzle_caps { + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + * + * \return 1 if the swizzle is native for the given opcode + */ + int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); + + /** + * Determine how to split access to the masked channels of the + * given source register to obtain ALU-native swizzles. + */ + void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split); +}; + +#endif /* RADEON_SWIZZLE_H */ diff --git a/r300/r300_cmdbuf.c b/r300/r300_cmdbuf.c index 0fe32a5..ad8db6e 100644 --- a/r300/r300_cmdbuf.c +++ b/r300/r300_cmdbuf.c @@ -46,14 +46,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" #include "r300_ioctl.h" -#include "radeon_reg.h" #include "r300_reg.h" #include "r300_cmdbuf.h" #include "r300_emit.h" #include "radeon_bocs_wrapper.h" #include "radeon_mipmap_tree.h" #include "r300_state.h" -#include "radeon_reg.h" #include "radeon_queryobj.h" /** # of dwords reserved for additional instructions that may need to be written @@ -171,7 +169,7 @@ static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) if (t && !t->image_override) { BEGIN_BATCH_NO_AUTOSTATE(4); OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t), RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); END_BATCH(); } else if (!t) { @@ -279,16 +277,33 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) cbpitch = (rrb->pitch / rrb->cpp); if (rrb->cpp == 4) cbpitch |= R300_COLOR_FORMAT_ARGB8888; - else switch (rrb->base._ActualFormat) { - case GL_RGB5: + else switch (rrb->base.Format) { + case MESA_FORMAT_RGB565: + assert(_mesa_little_endian()); cbpitch |= R300_COLOR_FORMAT_RGB565; break; - case GL_RGBA4: + case MESA_FORMAT_RGB565_REV: + assert(!_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + assert(_mesa_little_endian()); cbpitch |= R300_COLOR_FORMAT_ARGB4444; break; - case GL_RGB5_A1: + case MESA_FORMAT_ARGB4444_REV: + assert(!_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_ARGB4444; + break; + case MESA_FORMAT_ARGB1555: + assert(_mesa_little_endian()); cbpitch |= R300_COLOR_FORMAT_ARGB1555; break; + case MESA_FORMAT_ARGB1555_REV: + assert(!_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_ARGB1555; + break; + default: + _mesa_problem(ctx, "unexpected format in emit_cb_offset()"); } if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) @@ -684,11 +699,7 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9); ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1); - if ((r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) || - ( !r300->radeon.radeonScreen->kernel_mm && ( - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS400) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) ) ) ) { + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) { ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); } else { ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, never, 3, 0); @@ -697,6 +708,14 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); r300->hw.zs.cmd[R300_ZS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3); + if (is_r500) { + if (r300->radeon.radeonScreen->kernel_mm) + ALLOC_STATE(zsb, always, R300_ZSB_CMDSIZE, 0); + else + ALLOC_STATE(zsb, never, R300_ZSB_CMDSIZE, 0); + r300->hw.zsb.cmd[R300_ZSB_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R500_ZB_STENCILREFMASK_BF, 1); + } ALLOC_STATE(zstencil_format, always, 5, 0); r300->hw.zstencil_format.cmd[0] = diff --git a/r300/r300_context.c b/r300/r300_context.c index 2ea1b82..5f07b95 100644 --- a/r300/r300_context.c +++ b/r300/r300_context.c @@ -84,15 +84,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_EXT_framebuffer_object #define need_GL_EXT_fog_coord #define need_GL_EXT_gpu_program_parameters +#define need_GL_EXT_provoking_vertex #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side #define need_GL_ATI_separate_stencil #define need_GL_NV_vertex_program -#include "extension_helper.h" +#include "main/remap_helper.h" -const struct dri_extension card_extensions[] = { +static const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ {"GL_ARB_depth_texture", NULL}, {"GL_ARB_fragment_program", NULL}, @@ -116,6 +117,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_packed_depth_stencil", NULL}, {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, + {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, {"GL_EXT_shadow_funcs", NULL}, {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, @@ -143,7 +145,7 @@ const struct dri_extension card_extensions[] = { }; -const struct dri_extension mm_extensions[] = { +static const struct dri_extension mm_extensions[] = { { "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions }, { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, { NULL, NULL } @@ -153,7 +155,7 @@ const struct dri_extension mm_extensions[] = { * The GL 2.0 functions are needed to make display lists work with * functions added by GL_ATI_separate_stencil. */ -const struct dri_extension gl_20_extension[] = { +static const struct dri_extension gl_20_extension[] = { {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, }; @@ -374,11 +376,21 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) if (screen->chip_family >= CHIP_FAMILY_RV515) { ctx->Const.FragmentProgram.MaxNativeTemps = R500_PFS_NUM_TEMP_REGS; ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ - ctx->Const.FragmentProgram.MaxNativeParameters = R500_PFS_NUM_CONST_REGS; - ctx->Const.FragmentProgram.MaxNativeAluInstructions = R500_PFS_MAX_INST; - ctx->Const.FragmentProgram.MaxNativeTexInstructions = R500_PFS_MAX_INST; - ctx->Const.FragmentProgram.MaxNativeInstructions = R500_PFS_MAX_INST; - ctx->Const.FragmentProgram.MaxNativeTexIndirections = R500_PFS_MAX_INST; + + /* The hardware limits are higher than this, + * but the non-KMS DRM interface artificially limits us + * to this many instructions. + * + * We could of course work around it in the KMS path, + * but it would be a mess, so it seems wiser + * to leave it as is. Going forward, the Gallium driver + * will not be subject to these limitations. + */ + ctx->Const.FragmentProgram.MaxNativeParameters = 255; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = 255; + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 255; + ctx->Const.FragmentProgram.MaxNativeInstructions = 255; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = 255; ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; } else { ctx->Const.FragmentProgram.MaxNativeTemps = R300_PFS_NUM_TEMP_REGS; @@ -427,11 +439,11 @@ static void r300InitGLExtensions(GLcontext *ctx) if (r300->options.stencil_two_side_disabled) _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); - if (r300->options.s3tc_force_enabled) { + if (r300->options.s3tc_force_disabled) { + _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + } else if (ctx->Mesa_DXTn || r300->options.s3tc_force_enabled) { _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); _mesa_enable_extension(ctx, "GL_S3_s3tc"); - } else if (r300->options.s3tc_force_disabled) { - _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) { diff --git a/r300/r300_context.h b/r300/r300_context.h index 1dadcc0..518d5cd 100644 --- a/r300/r300_context.h +++ b/r300/r300_context.h @@ -234,6 +234,10 @@ typedef struct r300_context *r300ContextPtr; #define R300_ZS_CNTL_2 3 #define R300_ZS_CMDSIZE 4 +#define R300_ZSB_CMD_0 0 +#define R300_ZSB_CNTL_0 1 +#define R300_ZSB_CMDSIZE 2 + #define R300_ZB_CMD_0 0 #define R300_ZB_OFFSET 1 #define R300_ZB_PITCH 2 @@ -343,6 +347,7 @@ struct r300_hw_state { struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */ struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */ struct radeon_state_atom zs; /* zstencil control (4F00) */ + struct radeon_state_atom zsb; /* zstencil bf */ struct radeon_state_atom zstencil_format; struct radeon_state_atom zb; /* z buffer (4F20) */ struct radeon_state_atom zb_depthclearvalue; /* (4F28) */ diff --git a/r300/r300_draw.c b/r300/r300_draw.c index e9968f9..06a0490 100644 --- a/r300/r300_draw.c +++ b/r300/r300_draw.c @@ -29,7 +29,7 @@ #include "main/glheader.h" #include "main/context.h" #include "main/state.h" -#include "main/api_validate.h" +/* #include "main/api_validate.h" */ #include "main/enums.h" #include "main/simple_list.h" diff --git a/r300/r300_emit.h b/r300/r300_emit.h index 8e57e35..a456d88 100644 --- a/r300/r300_emit.h +++ b/r300/r300_emit.h @@ -42,7 +42,6 @@ #include "main/glheader.h" #include "r300_context.h" #include "r300_cmdbuf.h" -#include "radeon_reg.h" static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn, int reg, int count) diff --git a/r300/r300_fragprog_common.c b/r300/r300_fragprog_common.c index 0bdc90b..267ee81 100644 --- a/r300/r300_fragprog_common.c +++ b/r300/r300_fragprog_common.c @@ -44,6 +44,7 @@ #include "compiler/radeon_compiler.h" +#include "radeon_mesa_to_rc.h" #include "r300_state.h" @@ -131,7 +132,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, */ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp) { - struct prog_src_register src; + struct rc_src_register src; int i; fp->fog_attr = FRAG_ATTRIB_MAX; @@ -155,7 +156,7 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r } memset(&src, 0, sizeof(src)); - src.File = PROGRAM_INPUT; + src.File = RC_FILE_INPUT; src.Index = fp->fog_attr; src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src); @@ -232,13 +233,26 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog fflush(stderr); } - rc_mesa_to_rc_program(&compiler.Base, &cont->Base.Base); + radeon_mesa_to_rc_program(&compiler.Base, &cont->Base.Base); insert_WPOS_trailer(&compiler, fp); rewriteFog(&compiler, fp); r3xx_compile_fragment_program(&compiler); + + if (compiler.is_r500) { + /* We need to support the non-KMS DRM interface, which + * artificially limits the number of instructions and + * constants which are available to us. + * + * See also the comment in r300_context.c where we + * set the MAX_NATIVE_xxx values. + */ + if (fp->code.code.r500.inst_end >= 255 || fp->code.constants.Count > 255) + rc_error(&compiler.Base, "Program is too big (upgrade to r300g to avoid this limitation).\n"); + } + fp->error = compiler.Base.Error; fp->InputsRead = compiler.Base.Program.InputsRead; diff --git a/r300/r300_reg.h b/r300/r300_reg.h index 39b4b61..ea684e7 100644 --- a/r300/r300_reg.h +++ b/r300/r300_reg.h @@ -1022,15 +1022,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \ R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD) /** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */ # define R300_RE_SHADE_MODEL_FLAT ( \ R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \ R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ - R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD) /* Specifies red & green components of fill color -- S312 format -- Backwards comp. */ #define R300_GA_SOLID_RG 0x427c @@ -1791,6 +1789,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_DSTC_OUTPUT_X (1 << 26) # define R300_ALU_DSTC_OUTPUT_Y (1 << 27) # define R300_ALU_DSTC_OUTPUT_Z (1 << 28) +# define R300_RGB_TARGET(x) ((x) << 29) #define R300_US_ALU_ALPHA_ADDR_0 0x47C0 # define R300_ALU_SRC0A_SHIFT 0 @@ -1808,6 +1807,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_DSTA_REG (1 << 23) # define R300_ALU_DSTA_OUTPUT (1 << 24) # define R300_ALU_DSTA_DEPTH (1 << 27) +# define R300_ALPHA_TARGET(x) ((x) << 25) #define R300_US_ALU_RGB_INST_0 0x48C0 # define R300_ALU_ARGC_SRC0C_XYZ 0 @@ -2315,6 +2315,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_Z_WRITE_ENABLE (1 << 2) # define R300_Z_SIGNED_COMPARE (1 << 3) # define R300_STENCIL_FRONT_BACK (1 << 4) +# define R400_ZSIGNED_MAGNITUDE (1 << 5) +# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6) #define R300_ZB_ZSTENCILCNTL 0x4f04 /* functions */ @@ -3002,6 +3004,8 @@ enum { # define R500_INST_RGB_CLAMP (1 << 19) # define R500_INST_ALPHA_CLAMP (1 << 20) # define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALU_RESULT_SEL_RED (0 << 21) +# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21) # define R500_INST_ALPHA_PRED_INV (1 << 22) # define R500_INST_ALU_RESULT_OP_EQ (0 << 23) # define R500_INST_ALU_RESULT_OP_LT (1 << 23) diff --git a/r300/r300_render.c b/r300/r300_render.c index 3cd3875..4ae593c 100644 --- a/r300/r300_render.c +++ b/r300/r300_render.c @@ -67,8 +67,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "vbo/vbo_split.h" #include "tnl/tnl.h" #include "tnl/t_vp_build.h" -#include "radeon_reg.h" -#include "radeon_macros.h" #include "r300_context.h" #include "r300_ioctl.h" #include "r300_state.h" diff --git a/r300/r300_state.c b/r300/r300_state.c index 9301543..ac20c08 100644 --- a/r300/r300_state.c +++ b/r300/r300_state.c @@ -45,7 +45,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/framebuffer.h" #include "main/simple_list.h" #include "main/api_arrayelt.h" -#include "main/texformat.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" @@ -590,7 +589,9 @@ static void r300SetDepthState(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, zs); - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE|R300_STENCIL_FRONT_BACK; + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= (R300_STENCIL_ENABLE | + R300_STENCIL_FRONT_BACK | + R500_STENCIL_REFMASK_FRONT_BACK); r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); if (ctx->Depth.Test) { @@ -604,11 +605,16 @@ static void r300SetDepthState(GLcontext * ctx) static void r300CatchStencilFallback(GLcontext *ctx) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); const unsigned back = ctx->Stencil._BackFace; - if (ctx->Stencil._Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] - || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back] - || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) { + if (rmesa->radeon.radeonScreen->kernel_mm && + (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)) { + r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE); + } else if (ctx->Stencil._Enabled && + (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] + || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back] + || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) { r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE); } else { r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE); @@ -915,11 +921,24 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (flag << R300_S_BACK_FUNC_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R500_STENCIL_REFMASK_FRONT_BACK; + R300_STATECHANGE(rmesa, zsb); + refmask = ((ctx->Stencil.Ref[back] & 0xff) << R300_STENCILREF_SHIFT) + | ((ctx->Stencil.ValueMask[back] & 0xff) << R300_STENCILMASK_SHIFT); + + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] &= + ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) | + (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT)); + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= refmask; + } } static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) { r300ContextPtr rmesa = R300_CONTEXT(ctx); + const unsigned back = ctx->Stencil._BackFace; r300CatchStencilFallback(ctx); @@ -931,6 +950,13 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) (ctx->Stencil. WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT; + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + R300_STATECHANGE(rmesa, zsb); + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= + (ctx->Stencil. + WriteMask[back] & R300_STENCILREF_MASK) << + R300_STENCILWRITEMASK_SHIFT; + } } static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, @@ -2253,6 +2279,14 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) R300_STATECHANGE(r300, zb); } + if (new_state & (_NEW_LIGHT)) { + R300_STATECHANGE(r300, shade2); + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) + r300->hw.shade2.cmd[1] |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; + else + r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; + } + r300->radeon.NewGLState |= new_state; } diff --git a/r300/r300_tex.c b/r300/r300_tex.c index 433e5a8..726b3ff 100644 --- a/r300/r300_tex.c +++ b/r300/r300_tex.c @@ -40,7 +40,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/image.h" #include "main/mipmap.h" #include "main/simple_list.h" -#include "main/texformat.h" #include "main/texstore.h" #include "main/teximage.h" #include "main/texobj.h" @@ -196,6 +195,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, GLenum pname, const GLfloat * params) { radeonTexObj* t = radeon_tex_obj(texObj); + GLenum texBaseFormat; if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { fprintf(stderr, "%s( %s )\n", __FUNCTION__, @@ -223,23 +223,16 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, case GL_TEXTURE_MAX_LEVEL: case GL_TEXTURE_MIN_LOD: case GL_TEXTURE_MAX_LOD: - /* This isn't the most efficient solution but there doesn't appear to - * be a nice alternative. Since there's no LOD clamping, - * we just have to rely on loading the right subset of mipmap levels - * to simulate a clamped LOD. - */ - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = 0; - t->validated = GL_FALSE; - } + t->validated = GL_FALSE; break; case GL_DEPTH_TEXTURE_MODE: if (!texObj->Image[0][texObj->BaseLevel]) return; - if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat - == GL_DEPTH_COMPONENT) { + texBaseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat; + + if (texBaseFormat == GL_DEPTH_COMPONENT || + texBaseFormat == GL_DEPTH_STENCIL) { r300SetDepthTexMode(texObj); break; } else { @@ -268,7 +261,11 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) if (rmesa) { int i; - radeon_firevertices(&rmesa->radeon); + struct radeon_bo *bo; + bo = !t->mt ? t->bo : t->mt->bo; + if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->radeon.cmdbuf.cs)) { + radeon_firevertices(&rmesa->radeon); + } for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i) if (rmesa->hw.textures[i] == t) @@ -280,10 +277,8 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) t->bo = NULL; } - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = 0; - } + radeon_miptree_unreference(&t->mt); + _mesa_delete_texture_object(ctx, texObj); } diff --git a/r300/r300_texstate.c b/r300/r300_texstate.c index f030451..bbe8b1e 100644 --- a/r300/r300_texstate.c +++ b/r300/r300_texstate.c @@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/context.h" #include "main/macros.h" -#include "main/texformat.h" #include "main/teximage.h" #include "main/texobj.h" #include "main/enums.h" @@ -84,6 +83,7 @@ static const struct tx_table { _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), #endif + _ASSIGN(XRGB8888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)), _ASSIGN(RGB888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)), _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), @@ -138,9 +138,9 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X16), }, { - R300_EASY_TX_FORMAT(X, X, X, ONE, X24_Y8), - R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8), - R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X24_Y8), + R300_EASY_TX_FORMAT(Y, Y, Y, ONE, X24_Y8), + R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8), + R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, Y, X24_Y8), }, { R300_EASY_TX_FORMAT(X, X, X, ONE, X32), @@ -156,11 +156,11 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) t = radeon_tex_obj(tObj); - switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) { + switch (tObj->Image[0][tObj->BaseLevel]->TexFormat) { case MESA_FORMAT_Z16: format = formats[0]; break; - case MESA_FORMAT_Z24_S8: + case MESA_FORMAT_S8_Z24: format = formats[1]; break; case MESA_FORMAT_Z32: @@ -203,19 +203,17 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t) { const struct gl_texture_image *firstImage; - int firstlevel = t->mt ? t->mt->firstLevel : 0; - - firstImage = t->base.Image[0][firstlevel]; + firstImage = t->base.Image[0][t->minLod]; if (!t->image_override - && VALID_FORMAT(firstImage->TexFormat->MesaFormat)) { - if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) { + && VALID_FORMAT(firstImage->TexFormat)) { + if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { r300SetDepthTexMode(&t->base); } else { - t->pp_txformat = tx_table[firstImage->TexFormat->MesaFormat].format; + t->pp_txformat = tx_table[firstImage->TexFormat].format; } - t->pp_txfilter |= tx_table[firstImage->TexFormat->MesaFormat].filter; + t->pp_txfilter |= tx_table[firstImage->TexFormat].filter; } else if (!t->image_override) { _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); @@ -225,10 +223,10 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t) if (t->image_override && t->bo) return; - t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT) - | ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT) - | ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT) - | ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT)); + t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT))) + | ((R300_TX_HEIGHTMASK_MASK & ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT))) + | ((R300_TX_DEPTHMASK_MASK & ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT))) + | ((R300_TX_MAX_MIP_LEVEL_MASK & ((t->maxLod - t->minLod) << R300_TX_MAX_MIP_LEVEL_SHIFT)))); t->tile_bits = 0; @@ -239,7 +237,7 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t) if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) { - unsigned int align = (64 / t->mt->bpp) - 1; + unsigned int align = (64 / _mesa_get_format_bytes(firstImage->TexFormat)) - 1; t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN; if (!t->image_override) t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1; @@ -248,8 +246,12 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t) if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { if (firstImage->Width > 2048) t->pp_txpitch |= R500_TXWIDTH_BIT11; + else + t->pp_txpitch &= ~R500_TXWIDTH_BIT11; if (firstImage->Height > 2048) t->pp_txpitch |= R500_TXHEIGHT_BIT11; + else + t->pp_txpitch &= ~R500_TXHEIGHT_BIT11; } } @@ -434,20 +436,13 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo radeon_bo_unref(rImage->bo); rImage->bo = NULL; } - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = NULL; - } - if (rImage->mt) { - radeon_miptree_unreference(rImage->mt); - rImage->mt = NULL; - } + + radeon_miptree_unreference(&t->mt); + radeon_miptree_unreference(&rImage->mt); + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, rb->base.Width, rb->base.Height, 1, 0, rb->cpp); texImage->RowStride = rb->pitch / rb->cpp; - texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx, - internalFormat, - type, format, 0); rImage->bo = rb->bo; radeon_bo_ref(rImage->bo); t->bo = rb->bo; @@ -479,16 +474,20 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo break; } pitch_val--; - t->pp_txsize = ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT) | - ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT); + t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT))) + | ((R300_TX_HEIGHTMASK_MASK & ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT)))); t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN; t->pp_txpitch |= pitch_val; if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { if (rb->base.Width > 2048) t->pp_txpitch |= R500_TXWIDTH_BIT11; + else + t->pp_txpitch &= ~R500_TXWIDTH_BIT11; if (rb->base.Height > 2048) t->pp_txpitch |= R500_TXHEIGHT_BIT11; + else + t->pp_txpitch &= ~R500_TXHEIGHT_BIT11; } t->validated = GL_TRUE; _mesa_unlock_texture(radeon->glCtx, texObj); diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c index 2f7b67c..c2f96af 100644 --- a/r300/r300_vertprog.c +++ b/r300/r300_vertprog.c @@ -41,7 +41,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "compiler/radeon_compiler.h" -#include "compiler/radeon_nqssadce.h" +#include "radeon_mesa_to_rc.h" #include "r300_context.h" #include "r300_fragprog_common.h" #include "r300_state.h" @@ -62,12 +62,6 @@ static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_progra } } - if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) { - /* Should have checked this earlier... */ - fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); - _mesa_exit(-1); - } - for(i = 0; i < vp->code.constants.Count; ++i) { const float * src = 0; const struct rc_constant * constant = &vp->code.constants.Constants[i]; @@ -217,20 +211,20 @@ static void initialize_NV_registers(struct radeon_compiler * compiler) for(reg = 0; reg < 12; ++reg) { inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); - inst->I.Opcode = OPCODE_MOV; - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = reg; - inst->I.SrcReg[0].File = PROGRAM_BUILTIN; - inst->I.SrcReg[0].Swizzle = SWIZZLE_0000; + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = reg; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; } inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); - inst->I.Opcode = OPCODE_ARL; - inst->I.DstReg.File = PROGRAM_ADDRESS; - inst->I.DstReg.Index = 0; - inst->I.DstReg.WriteMask = WRITEMASK_X; - inst->I.SrcReg[0].File = PROGRAM_BUILTIN; - inst->I.SrcReg[0].Swizzle = SWIZZLE_0000; + inst->U.I.Opcode = RC_OPCODE_ARL; + inst->U.I.DstReg.File = RC_FILE_ADDRESS; + inst->U.I.DstReg.Index = 0; + inst->U.I.DstReg.WriteMask = WRITEMASK_X; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; } static struct r300_vertex_program *build_program(GLcontext *ctx, @@ -261,7 +255,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, _mesa_insert_mvp_code(ctx, vp->Base); } - rc_mesa_to_rc_program(&compiler.Base, &vp->Base->Base); + radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base); if (mesa_vp->IsNVProgram) initialize_NV_registers(&compiler.Base); @@ -281,6 +275,11 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, } r3xx_compile_vertex_program(&compiler); + + if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) { + rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n"); + } + vp->error = compiler.Base.Error; vp->Base->Base.InputsRead = vp->code.InputsRead; @@ -334,7 +333,6 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) #define bump_vpu_count(ptr, new_count) do { \ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \ int _nc=(new_count)/4; \ - assert(_nc < 256); \ if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ } while(0) diff --git a/r300/radeon_context.h b/r300/radeon_context.h index 250570f..da4812d 100644 --- a/r300/radeon_context.h +++ b/r300/radeon_context.h @@ -51,26 +51,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_screen.h" -#if R200_MERGED -extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode); - -#define FALLBACK( radeon, bit, mode ) do { \ - if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ - __FUNCTION__, bit, mode ); \ - radeonFallback( (radeon)->glCtx, bit, mode ); \ -} while (0) -#else #define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__); -#endif /* TCL fallbacks */ extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode); -#if R200_MERGED -#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode ) -#else #define TCL_FALLBACK( ctx, bit, mode ) ; -#endif #endif /* __RADEON_CONTEXT_H__ */ diff --git a/r300/radeon_mesa_to_rc.c b/r300/radeon_mesa_to_rc.c new file mode 100644 index 0000000..9f9dec8 --- /dev/null +++ b/r300/radeon_mesa_to_rc.c @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_mesa_to_rc.h" + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" + +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_program.h" + + +static rc_opcode translate_opcode(gl_inst_opcode opcode) +{ + switch(opcode) { + case OPCODE_NOP: return RC_OPCODE_NOP; + case OPCODE_ABS: return RC_OPCODE_ABS; + case OPCODE_ADD: return RC_OPCODE_ADD; + case OPCODE_ARL: return RC_OPCODE_ARL; + case OPCODE_CMP: return RC_OPCODE_CMP; + case OPCODE_COS: return RC_OPCODE_COS; + case OPCODE_DDX: return RC_OPCODE_DDX; + case OPCODE_DDY: return RC_OPCODE_DDY; + case OPCODE_DP3: return RC_OPCODE_DP3; + case OPCODE_DP4: return RC_OPCODE_DP4; + case OPCODE_DPH: return RC_OPCODE_DPH; + case OPCODE_DST: return RC_OPCODE_DST; + case OPCODE_EX2: return RC_OPCODE_EX2; + case OPCODE_EXP: return RC_OPCODE_EXP; + case OPCODE_FLR: return RC_OPCODE_FLR; + case OPCODE_FRC: return RC_OPCODE_FRC; + case OPCODE_KIL: return RC_OPCODE_KIL; + case OPCODE_LG2: return RC_OPCODE_LG2; + case OPCODE_LIT: return RC_OPCODE_LIT; + case OPCODE_LOG: return RC_OPCODE_LOG; + case OPCODE_LRP: return RC_OPCODE_LRP; + case OPCODE_MAD: return RC_OPCODE_MAD; + case OPCODE_MAX: return RC_OPCODE_MAX; + case OPCODE_MIN: return RC_OPCODE_MIN; + case OPCODE_MOV: return RC_OPCODE_MOV; + case OPCODE_MUL: return RC_OPCODE_MUL; + case OPCODE_POW: return RC_OPCODE_POW; + case OPCODE_RCP: return RC_OPCODE_RCP; + case OPCODE_RSQ: return RC_OPCODE_RSQ; + case OPCODE_SCS: return RC_OPCODE_SCS; + case OPCODE_SEQ: return RC_OPCODE_SEQ; + case OPCODE_SFL: return RC_OPCODE_SFL; + case OPCODE_SGE: return RC_OPCODE_SGE; + case OPCODE_SGT: return RC_OPCODE_SGT; + case OPCODE_SIN: return RC_OPCODE_SIN; + case OPCODE_SLE: return RC_OPCODE_SLE; + case OPCODE_SLT: return RC_OPCODE_SLT; + case OPCODE_SNE: return RC_OPCODE_SNE; + case OPCODE_SUB: return RC_OPCODE_SUB; + case OPCODE_SWZ: return RC_OPCODE_SWZ; + case OPCODE_TEX: return RC_OPCODE_TEX; + case OPCODE_TXB: return RC_OPCODE_TXB; + case OPCODE_TXD: return RC_OPCODE_TXD; + case OPCODE_TXL: return RC_OPCODE_TXL; + case OPCODE_TXP: return RC_OPCODE_TXP; + case OPCODE_XPD: return RC_OPCODE_XPD; + default: return RC_OPCODE_ILLEGAL_OPCODE; + } +} + +static rc_saturate_mode translate_saturate(unsigned int saturate) +{ + switch(saturate) { + default: + case SATURATE_OFF: return RC_SATURATE_NONE; + case SATURATE_ZERO_ONE: return RC_SATURATE_ZERO_ONE; + } +} + +static rc_register_file translate_register_file(unsigned int file) +{ + switch(file) { + case PROGRAM_TEMPORARY: return RC_FILE_TEMPORARY; + case PROGRAM_INPUT: return RC_FILE_INPUT; + case PROGRAM_OUTPUT: return RC_FILE_OUTPUT; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: return RC_FILE_CONSTANT; + case PROGRAM_ADDRESS: return RC_FILE_ADDRESS; + default: return RC_FILE_NONE; + } +} + +static void translate_srcreg(struct rc_src_register * dest, struct prog_src_register * src) +{ + dest->File = translate_register_file(src->File); + dest->Index = src->Index; + dest->RelAddr = src->RelAddr; + dest->Swizzle = src->Swizzle; + dest->Abs = src->Abs; + dest->Negate = src->Negate; +} + +static void translate_dstreg(struct rc_dst_register * dest, struct prog_dst_register * src) +{ + dest->File = translate_register_file(src->File); + dest->Index = src->Index; + dest->RelAddr = src->RelAddr; + dest->WriteMask = src->WriteMask; +} + +static rc_texture_target translate_tex_target(gl_texture_index target) +{ + switch(target) { + case TEXTURE_2D_ARRAY_INDEX: return RC_TEXTURE_2D_ARRAY; + case TEXTURE_1D_ARRAY_INDEX: return RC_TEXTURE_1D_ARRAY; + case TEXTURE_CUBE_INDEX: return RC_TEXTURE_CUBE; + case TEXTURE_3D_INDEX: return RC_TEXTURE_3D; + case TEXTURE_RECT_INDEX: return RC_TEXTURE_RECT; + default: + case TEXTURE_2D_INDEX: return RC_TEXTURE_2D; + case TEXTURE_1D_INDEX: return RC_TEXTURE_1D; + } +} + +static void translate_instruction(struct radeon_compiler * c, + struct rc_instruction * dest, struct prog_instruction * src) +{ + const struct rc_opcode_info * opcode; + unsigned int i; + + dest->U.I.Opcode = translate_opcode(src->Opcode); + if (dest->U.I.Opcode == RC_OPCODE_ILLEGAL_OPCODE) { + rc_error(c, "Unsupported opcode %i\n", src->Opcode); + return; + } + dest->U.I.SaturateMode = translate_saturate(src->SaturateMode); + + opcode = rc_get_opcode_info(dest->U.I.Opcode); + + for(i = 0; i < opcode->NumSrcRegs; ++i) + translate_srcreg(&dest->U.I.SrcReg[i], &src->SrcReg[i]); + + if (opcode->HasDstReg) + translate_dstreg(&dest->U.I.DstReg, &src->DstReg); + + if (opcode->HasTexture) { + dest->U.I.TexSrcUnit = src->TexSrcUnit; + dest->U.I.TexSrcTarget = translate_tex_target(src->TexSrcTarget); + dest->U.I.TexShadow = src->TexShadow; + } +} + +void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program) +{ + struct prog_instruction *source; + unsigned int i; + + for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) { + struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + translate_instruction(c, dest, source); + } + + c->Program.ShadowSamplers = program->ShadowSamplers; + c->Program.InputsRead = program->InputsRead; + c->Program.OutputsWritten = program->OutputsWritten; + + int isNVProgram = 0; + + if (program->Target == GL_VERTEX_PROGRAM_ARB) { + struct gl_vertex_program * vp = (struct gl_vertex_program *) program; + isNVProgram = vp->IsNVProgram; + } + + if (isNVProgram) { + /* NV_vertex_program has a fixed-sized constant environment. + * This could be handled more efficiently for programs that + * do not use relative addressing. + */ + for(i = 0; i < 96; ++i) { + struct rc_constant constant; + + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + + rc_constants_add(&c->Program.Constants, &constant); + } + } else { + for(i = 0; i < program->Parameters->NumParameters; ++i) { + struct rc_constant constant; + + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + + rc_constants_add(&c->Program.Constants, &constant); + } + } +} diff --git a/r300/radeon_mesa_to_rc.h b/r300/radeon_mesa_to_rc.h new file mode 100644 index 0000000..9511a04 --- /dev/null +++ b/r300/radeon_mesa_to_rc.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_MESA_TO_RC_H +#define RADEON_MESA_TO_RC_H + +struct gl_program; +struct radeon_compiler; + +void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program); + +#endif /* RADEON_MESA_TO_RC_H */ diff --git a/r600/Makefile.am b/r600/Makefile.am index 6b2091e..c2ce9c8 100644 --- a/r600/Makefile.am +++ b/r600/Makefile.am @@ -1,7 +1,6 @@ AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING -R600_CFLAGS = -DCOMPILE_R600 -DR200_MERGED=0 -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R600 -R600_CFLAGS += -I../radeon -I../radeon/server +R600_CFLAGS = -DRADEON_R600 -I../radeon -I../radeon/server r600_dri_la_LTLIBRARIES = r600_dri.la r600_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(R600_CFLAGS) @@ -11,6 +10,7 @@ r600_dri_ladir = @libdir@/dri r600_dri_la_SOURCES = \ ../radeon/radeon_bo_legacy.c \ ../radeon/radeon_common_context.c \ + ../radeon/radeon_buffer_objects.c \ ../radeon/radeon_common.c \ ../radeon/radeon_cs_legacy.c \ ../radeon/radeon_dma.c \ @@ -44,5 +44,7 @@ if HAVE_LIBDRM_RADEON r600_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS) r600_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS) r600_dri_la_SOURCES += \ - ../radeon/radeon_cs_space_drm.c + ../radeon/radeon_cs_space_drm.c \ + ../radeon/radeon_bo.c \ + ../radeon/radeon_cs.c endif diff --git a/r600/r600_cmdbuf.c b/r600/r600_cmdbuf.c index 3cfe03a..370bb04 100644 --- a/r600/r600_cmdbuf.c +++ b/r600/r600_cmdbuf.c @@ -52,29 +52,49 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_mipmap_tree.h" #include "radeon_reg.h" +#ifdef HAVE_LIBDRM_RADEON +#include "radeon_cs_int.h" +#else +#include "radeon_cs_int_drm.h" +#endif +struct r600_cs_manager_legacy +{ + struct radeon_cs_manager base; + struct radeon_context *ctx; + /* hack for scratch stuff */ + uint32_t pending_age; + uint32_t pending_count; +}; + +struct r600_cs_reloc_legacy { + struct radeon_cs_reloc base; + uint32_t cindices; + uint32_t *indices; + uint32_t *reloc_indices; +}; -static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm, - uint32_t ndw) +static struct radeon_cs_int *r600_cs_create(struct radeon_cs_manager *csm, + uint32_t ndw) { - struct radeon_cs *cs; + struct radeon_cs_int *csi; - cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs)); - if (cs == NULL) { + csi = (struct radeon_cs_int*)calloc(1, sizeof(struct radeon_cs_int)); + if (csi == NULL) { return NULL; } - cs->csm = csm; - cs->ndw = (ndw + 0x3FF) & (~0x3FF); - cs->packets = (uint32_t*)malloc(4*cs->ndw); - if (cs->packets == NULL) { - free(cs); + csi->csm = csm; + csi->ndw = (ndw + 0x3FF) & (~0x3FF); + csi->packets = (uint32_t*)malloc(4*csi->ndw); + if (csi->packets == NULL) { + free(csi); return NULL; } - cs->relocs_total_size = 0; - return cs; + csi->relocs_total_size = 0; + return csi; } -static int r600_cs_write_reloc(struct radeon_cs *cs, +static int r600_cs_write_reloc(struct radeon_cs_int *csi, struct radeon_bo *bo, uint32_t read_domain, uint32_t write_domain, @@ -83,7 +103,7 @@ static int r600_cs_write_reloc(struct radeon_cs *cs, struct r600_cs_reloc_legacy *relocs; int i; - relocs = (struct r600_cs_reloc_legacy *)cs->relocs; + relocs = (struct r600_cs_reloc_legacy *)csi->relocs; /* check domains */ if ((read_domain && write_domain) || (!read_domain && !write_domain)) { /* in one CS a bo can only be in read or write domain but not @@ -98,7 +118,7 @@ static int r600_cs_write_reloc(struct radeon_cs *cs, return -EINVAL; } /* check if bo is already referenced */ - for(i = 0; i < cs->crelocs; i++) { + for(i = 0; i < csi->crelocs; i++) { uint32_t *indices; uint32_t *reloc_indices; @@ -129,109 +149,108 @@ static int r600_cs_write_reloc(struct radeon_cs *cs, } relocs[i].indices = indices; relocs[i].reloc_indices = reloc_indices; - relocs[i].indices[relocs[i].cindices - 1] = cs->cdw; - relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->cdw; - cs->section_cdw += 2; - cs->cdw += 2; + relocs[i].indices[relocs[i].cindices - 1] = csi->cdw; + relocs[i].reloc_indices[relocs[i].cindices - 1] = csi->cdw; + csi->section_cdw += 2; + csi->cdw += 2; return 0; } } /* add bo to reloc */ relocs = (struct r600_cs_reloc_legacy*) - realloc(cs->relocs, - sizeof(struct r600_cs_reloc_legacy) * (cs->crelocs + 1)); + realloc(csi->relocs, + sizeof(struct r600_cs_reloc_legacy) * (csi->crelocs + 1)); if (relocs == NULL) { return -ENOMEM; } - cs->relocs = relocs; - relocs[cs->crelocs].base.bo = bo; - relocs[cs->crelocs].base.read_domain = read_domain; - relocs[cs->crelocs].base.write_domain = write_domain; - relocs[cs->crelocs].base.flags = flags; - relocs[cs->crelocs].indices = (uint32_t*)malloc(4); - relocs[cs->crelocs].reloc_indices = (uint32_t*)malloc(4); - if ( (relocs[cs->crelocs].indices == NULL) || (relocs[cs->crelocs].reloc_indices == NULL) ) + csi->relocs = relocs; + relocs[csi->crelocs].base.bo = bo; + relocs[csi->crelocs].base.read_domain = read_domain; + relocs[csi->crelocs].base.write_domain = write_domain; + relocs[csi->crelocs].base.flags = flags; + relocs[csi->crelocs].indices = (uint32_t*)malloc(4); + relocs[csi->crelocs].reloc_indices = (uint32_t*)malloc(4); + if ( (relocs[csi->crelocs].indices == NULL) || (relocs[csi->crelocs].reloc_indices == NULL) ) { return -ENOMEM; } - relocs[cs->crelocs].indices[0] = cs->cdw; - relocs[cs->crelocs].reloc_indices[0] = cs->cdw; - cs->section_cdw += 2; - cs->cdw += 2; - relocs[cs->crelocs].cindices = 1; - cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo); - cs->crelocs++; + relocs[csi->crelocs].indices[0] = csi->cdw; + relocs[csi->crelocs].reloc_indices[0] = csi->cdw; + csi->section_cdw += 2; + csi->cdw += 2; + relocs[csi->crelocs].cindices = 1; + csi->relocs_total_size += radeon_bo_legacy_relocs_size(bo); + csi->crelocs++; radeon_bo_ref(bo); return 0; } -static int r600_cs_begin(struct radeon_cs *cs, +static int r600_cs_begin(struct radeon_cs_int *csi, uint32_t ndw, const char *file, const char *func, int line) { - if (cs->section) { + if (csi->section_ndw) { fprintf(stderr, "CS already in a section(%s,%s,%d)\n", - cs->section_file, cs->section_func, cs->section_line); + csi->section_file, csi->section_func, csi->section_line); fprintf(stderr, "CS can't start section(%s,%s,%d)\n", file, func, line); return -EPIPE; } - cs->section = 1; - cs->section_ndw = ndw; - cs->section_cdw = 0; - cs->section_file = file; - cs->section_func = func; - cs->section_line = line; + csi->section_ndw = ndw; + csi->section_cdw = 0; + csi->section_file = file; + csi->section_func = func; + csi->section_line = line; - if (cs->cdw + ndw > cs->ndw) { + if (csi->cdw + ndw > csi->ndw) { uint32_t tmp, *ptr; int num = (ndw > 0x400) ? ndw : 0x400; - tmp = (cs->cdw + num + 0x3FF) & (~0x3FF); - ptr = (uint32_t*)realloc(cs->packets, 4 * tmp); + tmp = (csi->cdw + num + 0x3FF) & (~0x3FF); + ptr = (uint32_t*)realloc(csi->packets, 4 * tmp); if (ptr == NULL) { return -ENOMEM; } - cs->packets = ptr; - cs->ndw = tmp; + csi->packets = ptr; + csi->ndw = tmp; } return 0; } -static int r600_cs_end(struct radeon_cs *cs, +static int r600_cs_end(struct radeon_cs_int *csi, const char *file, const char *func, int line) { - if (!cs->section) { + if (!csi->section_ndw) { fprintf(stderr, "CS no section to end at (%s,%s,%d)\n", file, func, line); return -EPIPE; } - cs->section = 0; - if ( cs->section_ndw != cs->section_cdw ) { + if ( csi->section_ndw != csi->section_cdw ) { fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n", - cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw); - fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n", - cs->section_ndw, cs->cdw, cs->section_cdw); + csi->section_file, csi->section_func, csi->section_line, csi->section_ndw, csi->section_cdw); + fprintf(stderr, "csi->section_ndw = %d, csi->cdw = %d, csi->section_cdw = %d \n", + csi->section_ndw, csi->cdw, csi->section_cdw); fprintf(stderr, "CS section end at (%s,%s,%d)\n", file, func, line); return -EPIPE; } + csi->section_ndw = 0; - if (cs->cdw > cs->ndw) { + if (csi->cdw > csi->ndw) { fprintf(stderr, "CS section overflow at (%s,%s,%d) cdw %d ndw %d\n", - cs->section_file, cs->section_func, cs->section_line,cs->cdw,cs->ndw); + csi->section_file, csi->section_func, csi->section_line,csi->cdw,csi->ndw); fprintf(stderr, "CS section end at (%s,%s,%d)\n", file, func, line); assert(0); @@ -240,21 +259,21 @@ static int r600_cs_end(struct radeon_cs *cs, return 0; } -static int r600_cs_process_relocs(struct radeon_cs *cs, +static int r600_cs_process_relocs(struct radeon_cs_int *csi, uint32_t * reloc_chunk, uint32_t * length_dw_reloc_chunk) { - struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm; + struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm; struct r600_cs_reloc_legacy *relocs; int i, j, r; uint32_t offset_dw = 0; - csm = (struct r600_cs_manager_legacy*)cs->csm; - relocs = (struct r600_cs_reloc_legacy *)cs->relocs; + csm = (struct r600_cs_manager_legacy*)csi->csm; + relocs = (struct r600_cs_reloc_legacy *)csi->relocs; restart: - for (i = 0; i < cs->crelocs; i++) { - uint32_t soffset, eoffset, asicoffset; + for (i = 0; i < csi->crelocs; i++) { + uint32_t soffset, eoffset; r = radeon_bo_legacy_validate(relocs[i].base.bo, &soffset, &eoffset); @@ -262,32 +281,20 @@ restart: goto restart; } if (r) { - fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", + fprintf(stderr, "invalid bo(%p) [0x%08X, 0x%08X]\n", relocs[i].base.bo, soffset, eoffset); return r; } - asicoffset = soffset; for (j = 0; j < relocs[i].cindices; j++) { - if (asicoffset >= eoffset) { - /* radeon_bo_debug(relocs[i].base.bo, 12); */ - fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", - relocs[i].base.bo, soffset, eoffset); - fprintf(stderr, "above end: %p 0x%08X 0x%08X\n", - relocs[i].base.bo, - cs->packets[relocs[i].indices[j]], - eoffset); - exit(0); - return -EINVAL; - } /* pkt3 nop header in ib chunk */ - cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000; + csi->packets[relocs[i].reloc_indices[j]] = 0xC0001000; /* reloc index in ib chunk */ - cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw; + csi->packets[relocs[i].reloc_indices[j] + 1] = offset_dw; } /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */ - reloc_chunk[offset_dw] = asicoffset; + reloc_chunk[offset_dw] = soffset; reloc_chunk[offset_dw + 3] = 0; offset_dw += 4; @@ -298,14 +305,14 @@ restart: return 0; } -static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */ +static int r600_cs_set_age(struct radeon_cs_int *csi) /* -------------- */ { - struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm; + struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm; struct r600_cs_reloc_legacy *relocs; int i; - relocs = (struct r600_cs_reloc_legacy *)cs->relocs; - for (i = 0; i < cs->crelocs; i++) { + relocs = (struct r600_cs_reloc_legacy *)csi->relocs; + for (i = 0; i < csi->crelocs; i++) { radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age); radeon_bo_unref(relocs[i].base.bo); } @@ -313,21 +320,21 @@ static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */ } #if 0 -static void dump_cmdbuf(struct radeon_cs *cs) +static void dump_cmdbuf(struct radeon_cs_int *csi) { int i; fprintf(stderr,"--start--\n"); - for (i = 0; i < cs->cdw; i++){ - fprintf(stderr,"0x%08x\n", cs->packets[i]); + for (i = 0; i < csi->cdw; i++){ + fprintf(stderr,"0x%08x\n", csi->packets[i]); } fprintf(stderr,"--end--\n"); } #endif -static int r600_cs_emit(struct radeon_cs *cs) +static int r600_cs_emit(struct radeon_cs_int *csi) { - struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm; + struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm; struct drm_radeon_cs cs_cmd; struct drm_radeon_cs_chunk cs_chunk[2]; uint32_t length_dw_reloc_chunk; @@ -341,9 +348,9 @@ static int r600_cs_emit(struct radeon_cs *cs) csm->pending_count = 1; - reloc_chunk = (uint32_t*)calloc(1, cs->crelocs * 4 * 4); + reloc_chunk = (uint32_t*)calloc(1, csi->crelocs * 4 * 4); - r = r600_cs_process_relocs(cs, reloc_chunk, &length_dw_reloc_chunk); + r = r600_cs_process_relocs(csi, reloc_chunk, &length_dw_reloc_chunk); if (r) { free(reloc_chunk); return 0; @@ -351,8 +358,8 @@ static int r600_cs_emit(struct radeon_cs *cs) /* raw ib chunk */ cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB; - cs_chunk[0].length_dw = cs->cdw; - cs_chunk[0].chunk_data = (unsigned long)(cs->packets); + cs_chunk[0].length_dw = csi->cdw; + cs_chunk[0].chunk_data = (unsigned long)(csi->packets); /* reloc chaunk */ cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS; @@ -370,7 +377,7 @@ static int r600_cs_emit(struct radeon_cs *cs) do { - r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd)); + r = drmCommandWriteRead(csi->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd)); retry++; } while (r == -EAGAIN && retry < 1000); @@ -381,11 +388,11 @@ static int r600_cs_emit(struct radeon_cs *cs) csm->pending_age = cs_cmd.cs_id; - r600_cs_set_age(cs); + r600_cs_set_age(csi); - cs->csm->read_used = 0; - cs->csm->vram_write_used = 0; - cs->csm->gart_write_used = 0; + csi->csm->read_used = 0; + csi->csm->vram_write_used = 0; + csi->csm->gart_write_used = 0; free(reloc_chunk); @@ -405,35 +412,34 @@ static void inline r600_cs_free_reloc(void *relocs_p, int crelocs) } } -static int r600_cs_destroy(struct radeon_cs *cs) +static int r600_cs_destroy(struct radeon_cs_int *csi) { - r600_cs_free_reloc(cs->relocs, cs->crelocs); - free(cs->relocs); - free(cs->packets); - free(cs); + r600_cs_free_reloc(csi->relocs, csi->crelocs); + free(csi->relocs); + free(csi->packets); + free(csi); return 0; } -static int r600_cs_erase(struct radeon_cs *cs) +static int r600_cs_erase(struct radeon_cs_int *csi) { - r600_cs_free_reloc(cs->relocs, cs->crelocs); - free(cs->relocs); - cs->relocs_total_size = 0; - cs->relocs = NULL; - cs->crelocs = 0; - cs->cdw = 0; - cs->section = 0; + r600_cs_free_reloc(csi->relocs, csi->crelocs); + free(csi->relocs); + csi->relocs_total_size = 0; + csi->relocs = NULL; + csi->crelocs = 0; + csi->cdw = 0; return 0; } -static int r600_cs_need_flush(struct radeon_cs *cs) +static int r600_cs_need_flush(struct radeon_cs_int *csi) { /* this function used to flush when the BO usage got to * a certain size, now the higher levels handle this better */ return 0; } -static void r600_cs_print(struct radeon_cs *cs, FILE *file) +static void r600_cs_print(struct radeon_cs_int *csi, FILE *file) { } diff --git a/r600/r600_cmdbuf.h b/r600/r600_cmdbuf.h index eba43d3..dff0009 100644 --- a/r600/r600_cmdbuf.h +++ b/r600/r600_cmdbuf.h @@ -118,22 +118,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R600_IT_SET_CTL_CONST 0x00006F00 #define R600_IT_SURFACE_BASE_UPDATE 0x00007300 -struct r600_cs_manager_legacy -{ - struct radeon_cs_manager base; - struct radeon_context *ctx; - /* hack for scratch stuff */ - uint32_t pending_age; - uint32_t pending_count; -}; - -struct r600_cs_reloc_legacy { - struct radeon_cs_reloc base; - uint32_t cindices; - uint32_t *indices; - uint32_t *reloc_indices; -}; - struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx); /** diff --git a/r600/r600_context.c b/r600/r600_context.c index e0b77d4..dbd2337 100644 --- a/r600/r600_context.c +++ b/r600/r600_context.c @@ -59,10 +59,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_debug.h" #include "r600_context.h" #include "radeon_common_context.h" +#include "radeon_buffer_objects.h" #include "radeon_span.h" #include "r600_cmdbuf.h" #include "r600_emit.h" #include "radeon_bocs_wrapper.h" +#include "radeon_queryobj.h" #include "r700_state.h" #include "r700_ioctl.h" @@ -72,11 +74,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ -/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */ -int future_hw_tcl_on = 1; -int hw_tcl_on = 1; - #define need_GL_VERSION_2_0 +#define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_vertex_program #define need_GL_EXT_blend_equation_separate @@ -85,19 +84,20 @@ int hw_tcl_on = 1; #define need_GL_EXT_framebuffer_object #define need_GL_EXT_fog_coord #define need_GL_EXT_gpu_program_parameters +#define need_GL_EXT_provoking_vertex #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side #define need_GL_ATI_separate_stencil #define need_GL_NV_vertex_program -#include "extension_helper.h" - -extern const struct tnl_pipeline_stage *r700_pipeline[]; +#include "main/remap_helper.h" -const struct dri_extension card_extensions[] = { +static const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ + {"GL_ARB_depth_clamp", NULL}, {"GL_ARB_depth_texture", NULL}, {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, {"GL_ARB_multitexture", NULL}, {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, {"GL_ARB_shadow", NULL}, @@ -117,6 +117,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_packed_depth_stencil", NULL}, {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, + {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, {"GL_EXT_shadow_funcs", NULL}, {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, @@ -128,6 +129,8 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_texture_lod_bias", NULL}, {"GL_EXT_texture_mirror_clamp", NULL}, {"GL_EXT_texture_rectangle", NULL}, + {"GL_EXT_vertex_array_bgra", NULL}, + {"GL_EXT_texture_sRGB", NULL}, {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions}, {"GL_ATI_texture_env_combine3", NULL}, {"GL_ATI_texture_mirror_once", NULL}, @@ -142,7 +145,7 @@ const struct dri_extension card_extensions[] = { }; -const struct dri_extension mm_extensions[] = { +static const struct dri_extension mm_extensions[] = { { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, { NULL, NULL } }; @@ -151,21 +154,24 @@ const struct dri_extension mm_extensions[] = { * The GL 2.0 functions are needed to make display lists work with * functions added by GL_ATI_separate_stencil. */ -const struct dri_extension gl_20_extension[] = { +static const struct dri_extension gl_20_extension[] = { {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, }; - -static void r600RunPipeline(GLcontext * ctx) -{ - _mesa_lock_context_textures(ctx); - - if (ctx->NewState) - _mesa_update_state_locked(ctx); - - _tnl_run_pipeline(ctx); - _mesa_unlock_context_textures(ctx); -} +static const struct tnl_pipeline_stage *r600_pipeline[] = { + /* Catch any t&l fallbacks + */ + &_tnl_vertex_transform_stage, + &_tnl_normal_transform_stage, + &_tnl_lighting_stage, + &_tnl_fog_coordinate_stage, + &_tnl_texgen_stage, + &_tnl_texture_transform_stage, + &_tnl_point_attenuation_stage, + &_tnl_vertex_program_stage, + &_tnl_render_stage, + 0, +}; static void r600_get_lock(radeonContextPtr rmesa) { @@ -176,7 +182,7 @@ static void r600_get_lock(radeonContextPtr rmesa) if (!rmesa->radeonScreen->kernel_mm) radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom); } -} +} static void r600_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) { @@ -198,6 +204,24 @@ static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) context->radeon.Fallback &= ~bit; } +static void r600_emit_query_finish(radeonContextPtr radeon) +{ + context_t *context = (context_t*) radeon; + BATCH_LOCALS(&context->radeon); + + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(4 + 2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2)); + R600_OUT_BATCH(ZPASS_DONE); + R600_OUT_BATCH(query->curr_offset + 8); /* hw writes qwords */ + R600_OUT_BATCH(0x00000000); + R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + static void r600_init_vtbl(radeonContextPtr radeon) { radeon->vtbl.get_lock = r600_get_lock; @@ -206,6 +230,101 @@ static void r600_init_vtbl(radeonContextPtr radeon) radeon->vtbl.swtcl_flush = NULL; radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms; radeon->vtbl.fallback = r600_fallback; + radeon->vtbl.emit_query_finish = r600_emit_query_finish; +} + +static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen) +{ + context_t *r600 = R700_CONTEXT(ctx); + + ctx->Const.MaxTextureImageUnits = + driQueryOptioni(&r600->radeon.optionCache, "texture_image_units"); + ctx->Const.MaxTextureCoordUnits = + driQueryOptioni(&r600->radeon.optionCache, "texture_coord_units"); + ctx->Const.MaxTextureUnits = + MIN2(ctx->Const.MaxTextureImageUnits, + ctx->Const.MaxTextureCoordUnits); + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + ctx->Const.MaxTextureLodBias = 16.0; + + ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ + ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ + + ctx->Const.MinPointSize = 0x0001 / 8.0; + ctx->Const.MinPointSizeAA = 0x0001 / 8.0; + ctx->Const.MaxPointSize = 0xffff / 8.0; + ctx->Const.MaxPointSizeAA = 0xffff / 8.0; + + ctx->Const.MinLineWidth = 0x0001 / 8.0; + ctx->Const.MinLineWidthAA = 0x0001 / 8.0; + ctx->Const.MaxLineWidth = 0xffff / 8.0; + ctx->Const.MaxLineWidthAA = 0xffff / 8.0; + + ctx->Const.MaxDrawBuffers = 1; /* hw supports 8 */ + + /* 256 for reg-based consts, inline consts also supported */ + ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ + ctx->Const.VertexProgram.MaxNativeInstructions = 8192; + ctx->Const.VertexProgram.MaxNativeAttribs = 160; + ctx->Const.VertexProgram.MaxTemps = 128; + ctx->Const.VertexProgram.MaxNativeTemps = 128; + ctx->Const.VertexProgram.MaxNativeParameters = 256; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ + + ctx->Const.FragmentProgram.MaxNativeTemps = 128; + ctx->Const.FragmentProgram.MaxNativeAttribs = 32; + ctx->Const.FragmentProgram.MaxNativeParameters = 256; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; + /* 8 per clause on r6xx, 16 on rv670/r7xx */ + if ((screen->chip_family == CHIP_FAMILY_RV670) || + (screen->chip_family >= CHIP_FAMILY_RV770)) + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16; + else + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8; + ctx->Const.FragmentProgram.MaxNativeInstructions = 8192; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */ + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ +} + +static void r600ParseOptions(context_t *r600, radeonScreenPtr screen) +{ + /* Parse configuration files. + * Do this here so that initialMaxAnisotropy is set before we create + * the default textures. + */ + driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache, + screen->driScreen->myNum, "r600"); + + r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache, + "def_max_anisotropy"); + +} + +static void r600InitGLExtensions(GLcontext *ctx) +{ + context_t *r600 = R700_CONTEXT(ctx); + + driInitExtensions(ctx, card_extensions, GL_TRUE); + if (r600->radeon.radeonScreen->kernel_mm) + driInitExtensions(ctx, mm_extensions, GL_FALSE); + + if (driQueryOptionb + (&r600->radeon.optionCache, "disable_stencil_two_side")) + _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); + + if (r600->radeon.glCtx->Mesa_DXTn + && !driQueryOptionb(&r600->radeon.optionCache, "disable_s3tc")) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + _mesa_enable_extension(ctx, "GL_S3_s3tc"); + } else + if (driQueryOptionb(&r600->radeon.optionCache, "force_s3tc_enable")) + { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + } + + /* XXX: RV740 only seems to report results from half of its DBs */ + if (r600->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV740) + _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); } /* Create the device specific rendering context. @@ -231,19 +350,10 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, return GL_FALSE; } - if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) - hw_tcl_on = future_hw_tcl_on = 0; + r600ParseOptions(r600, screen); + r600->radeon.radeonScreen = screen; r600_init_vtbl(&r600->radeon); - /* Parse configuration files. - * Do this here so that initialMaxAnisotropy is set before we create - * the default textures. - */ - driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache, - screen->driScreen->myNum, "r600"); - - r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache, - "def_max_anisotropy"); /* Init default driver functions then plug in our R600-specific functions * (the texture functions are especially important) @@ -253,7 +363,9 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, r700InitStateFuncs(&functions); r600InitTextureFuncs(&functions); r700InitShaderFuncs(&functions); + radeonInitQueryObjFunctions(&functions); r700InitIoctlFuncs(&functions); + radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r600->radeon, &functions, glVisual, driContextPriv, @@ -263,44 +375,14 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, return GL_FALSE; } - /* Init r600 context data */ - /* Set the maximum texture size small enough that we can guarentee that - * all texture units can bind a maximal texture and have them both in - * texturable memory at once. - */ - ctx = r600->radeon.glCtx; - ctx->Const.MaxTextureImageUnits = - driQueryOptioni(&r600->radeon.optionCache, "texture_image_units"); - ctx->Const.MaxTextureCoordUnits = - driQueryOptioni(&r600->radeon.optionCache, "texture_coord_units"); - ctx->Const.MaxTextureUnits = - MIN2(ctx->Const.MaxTextureImageUnits, - ctx->Const.MaxTextureCoordUnits); - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - ctx->Const.MaxTextureLodBias = 16.0; - - ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ - ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ - - ctx->Const.MinPointSize = 0x0001 / 8.0; - ctx->Const.MinPointSizeAA = 0x0001 / 8.0; - ctx->Const.MaxPointSize = 0xffff / 8.0; - ctx->Const.MaxPointSizeAA = 0xffff / 8.0; - - ctx->Const.MinLineWidth = 0x0001 / 8.0; - ctx->Const.MinLineWidthAA = 0x0001 / 8.0; - ctx->Const.MaxLineWidth = 0xffff / 8.0; - ctx->Const.MaxLineWidthAA = 0xffff / 8.0; + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - /* Needs further modifications */ -#if 0 - ctx->Const.MaxArrayLockSize = - ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); -#endif + r600InitConstValues(ctx, screen); - ctx->Const.MaxDrawBuffers = 1; + _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); /* Initialize the software rasterizer and helper modules. */ @@ -309,16 +391,12 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_CreateContext(ctx); _swsetup_CreateContext(ctx); _swsetup_Wakeup(ctx); - _ae_create_context(ctx); /* Install the customized pipeline: */ _tnl_destroy_pipeline(ctx); - _tnl_install_pipeline(ctx, r700_pipeline); - - /* Try and keep materials and vertices separate: - */ -/* _tnl_isolate_materials(ctx, GL_TRUE); */ + _tnl_install_pipeline(ctx, r600_pipeline); + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; /* Configure swrast and TNL to match hardware characteristics: */ @@ -327,66 +405,16 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_allow_pixel_fog(ctx, GL_FALSE); _tnl_allow_vertex_fog(ctx, GL_TRUE); - /* 256 for reg-based consts, inline consts also supported */ - ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ - ctx->Const.VertexProgram.MaxNativeInstructions = 8192; - ctx->Const.VertexProgram.MaxNativeAttribs = 160; - ctx->Const.VertexProgram.MaxTemps = 128; - ctx->Const.VertexProgram.MaxNativeTemps = 128; - ctx->Const.VertexProgram.MaxNativeParameters = 256; - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ - - ctx->Const.FragmentProgram.MaxNativeTemps = 128; - ctx->Const.FragmentProgram.MaxNativeAttribs = 32; - ctx->Const.FragmentProgram.MaxNativeParameters = 256; - ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; - /* 8 per clause on r6xx, 16 on rv670/r7xx */ - if ((screen->chip_family == CHIP_FAMILY_RV670) || - (screen->chip_family >= CHIP_FAMILY_RV770)) - ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16; - else - ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8; - ctx->Const.FragmentProgram.MaxNativeInstructions = 8192; - ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */ - ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ - ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - radeon_init_debug(); - driInitExtensions(ctx, card_extensions, GL_TRUE); - if (r600->radeon.radeonScreen->kernel_mm) - driInitExtensions(ctx, mm_extensions, GL_FALSE); - - if (driQueryOptionb - (&r600->radeon.optionCache, "disable_stencil_two_side")) - _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); - - if (r600->radeon.glCtx->Mesa_DXTn - && !driQueryOptionb(&r600->radeon.optionCache, "disable_s3tc")) { - _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); - _mesa_enable_extension(ctx, "GL_S3_s3tc"); - } else - if (driQueryOptionb(&r600->radeon.optionCache, "force_s3tc_enable")) - { - _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); - } + r700InitDraw(ctx); radeon_fbo_init(&r600->radeon); radeonInitSpanFuncs( ctx ); - r600InitCmdBuf(r600); - r700InitState(r600->radeon.glCtx); - TNL_CONTEXT(ctx)->Driver.RunPipeline = r600RunPipeline; - - if (driQueryOptionb(&r600->radeon.optionCache, "no_rast")) { - radeon_warning("disabling 3D acceleration\n"); -#if R200_MERGED - FALLBACK(&r600->radeon, RADEON_FALLBACK_DISABLE, 1); -#endif - } + r600InitGLExtensions(ctx); return GL_TRUE; } diff --git a/r600/r600_context.h b/r600/r600_context.h index 9397ecd..394fd75 100644 --- a/r600/r600_context.h +++ b/r600/r600_context.h @@ -58,29 +58,6 @@ typedef struct r600_context context_t; #include "main/mm.h" -/************ DMA BUFFERS **************/ - -/* The blit width for texture uploads - */ -#define R600_BLIT_WIDTH_BYTES 1024 -#define R600_MAX_TEXTURE_UNITS 8 - -struct r600_texture_state { - int tc_count; /* number of incoming texture coordinates from VAP */ -}; - -/* Perhaps more if we store programs in vmem? */ -/* drm_r600_cmd_header_t->vpu->count is unsigned char */ -#define VSF_MAX_FRAGMENT_LENGTH (255*4) - -/* Can be tested with colormat currently. */ -#define VSF_MAX_FRAGMENT_TEMPS (14) - -#define STATE_R600_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) -#define STATE_R600_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) - -extern int hw_tcl_on; - #define COLOR_IS_RGBA #define TAG(x) r600##x #include "tnl_dd/t_dd_vertex.h" @@ -126,6 +103,32 @@ struct r600_hw_state { struct radeon_state_atom tx_brdr_clr; }; +typedef struct StreamDesc +{ + GLint size; //number of data element + GLenum type; //data element type + GLsizei stride; + + struct radeon_bo *bo; + GLint bo_offset; + + GLuint dwords; + GLuint dst_loc; + GLuint _signed; + GLboolean normalize; + GLboolean is_named_bo; + GLubyte element; +} StreamDesc; + +typedef struct r700_index_buffer +{ + struct radeon_bo *bo; + int bo_offset; + + GLboolean is_32bit; + GLuint count; +} r700_index_buffer; + /** * \brief R600 context structure. */ @@ -141,9 +144,9 @@ struct r600_context { /* Vertex buffers */ - GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; - GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; - + GLint nNumActiveAos; + StreamDesc stream_desc[VERT_ATTRIB_MAX]; + struct r700_index_buffer ind_buf; }; #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) @@ -174,16 +177,13 @@ extern GLboolean r700SyncSurf(context_t *context, uint32_t write_domain, uint32_t sync_type); -extern void r700SetupStreams(GLcontext * ctx); extern void r700Start3D(context_t *context); extern void r600InitAtoms(context_t *context); +extern void r700InitDraw(GLcontext *ctx); #define RADEON_D_CAPTURE 0 #define RADEON_D_PLAYBACK 1 #define RADEON_D_PLAYBACK_RAW 2 #define RADEON_D_T 3 -#define r600PackFloat32 radeonPackFloat32 -#define r600PackFloat24 radeonPackFloat24 - #endif /* __R600_CONTEXT_H__ */ diff --git a/r600/r600_reg_r6xx.h b/r600/r600_reg_r6xx.h index f7702c4..74af7b4 100644 --- a/r600/r600_reg_r6xx.h +++ b/r600/r600_reg_r6xx.h @@ -415,11 +415,11 @@ enum { ALPHA_TO_MASK_ENABLE = 1 << 0, ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8, ALPHA_TO_MASK_OFFSET0_shift = 8, - ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 10, ALPHA_TO_MASK_OFFSET1_shift = 10, - ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 12, ALPHA_TO_MASK_OFFSET2_shift = 12, - ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 14, ALPHA_TO_MASK_OFFSET3_shift = 14, // SQ_VTX_CONSTANT_WORD2_0 = 0x00038008, diff --git a/r600/r600_reg_r7xx.h b/r600/r600_reg_r7xx.h index e5c01c8..eb169bd 100644 --- a/r600/r600_reg_r7xx.h +++ b/r600/r600_reg_r7xx.h @@ -143,6 +143,8 @@ enum { // SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c, R7xx_TRUNCATE_COORD_bit = 1 << 9, R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10, +// DB_RENDER_CONTROL = 0x00028d0c, + PERFECT_ZPASS_COUNTS_bit = 1 << 15, } ; diff --git a/r600/r600_tex.c b/r600/r600_tex.c index d105b90..9d83a64 100644 --- a/r600/r600_tex.c +++ b/r600/r600_tex.c @@ -40,7 +40,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/image.h" #include "main/mipmap.h" #include "main/simple_list.h" -#include "main/texformat.h" #include "main/texstore.h" #include "main/teximage.h" #include "main/texobj.h" @@ -286,6 +285,7 @@ static void r600TexParameter(GLcontext * ctx, GLenum target, GLenum pname, const GLfloat * params) { radeonTexObj* t = radeon_tex_obj(texObj); + GLenum baseFormat; radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __FUNCTION__, @@ -312,23 +312,15 @@ static void r600TexParameter(GLcontext * ctx, GLenum target, case GL_TEXTURE_MAX_LEVEL: case GL_TEXTURE_MIN_LOD: case GL_TEXTURE_MAX_LOD: - /* This isn't the most efficient solution but there doesn't appear to - * be a nice alternative. Since there's no LOD clamping, - * we just have to rely on loading the right subset of mipmap levels - * to simulate a clamped LOD. - */ - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = 0; - t->validated = GL_FALSE; - } + t->validated = GL_FALSE; break; case GL_DEPTH_TEXTURE_MODE: if (!texObj->Image[0][texObj->BaseLevel]) return; - if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat - == GL_DEPTH_COMPONENT) { + baseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat; + if (baseFormat == GL_DEPTH_COMPONENT || + baseFormat == GL_DEPTH_STENCIL) { r600SetDepthTexMode(texObj); break; } else { @@ -368,10 +360,8 @@ static void r600DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) t->bo = NULL; } - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = 0; - } + radeon_miptree_unreference(&t->mt); + _mesa_delete_texture_object(ctx, texObj); } diff --git a/r600/r600_texstate.c b/r600/r600_texstate.c index bcb8d7c..4ec315b 100644 --- a/r600/r600_texstate.c +++ b/r600/r600_texstate.c @@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/context.h" #include "main/macros.h" -#include "main/texformat.h" #include "main/teximage.h" #include "main/texobj.h" #include "main/enums.h" @@ -78,7 +77,7 @@ void r600UpdateTextureState(GLcontext * ctx) } } -static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_format) +static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa_format) { radeonTexObj *t = radeon_tex_obj(tObj); @@ -87,9 +86,19 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + switch (mesa_format) /* This is mesa format. */ { case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_SIGNED_RGBA8888: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); @@ -101,8 +110,19 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) { + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } break; case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_SIGNED_RGBA8888_REV: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); @@ -114,6 +134,16 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) { + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } break; case MESA_FORMAT_ARGB8888: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, @@ -480,13 +510,21 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_Z16: + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: case MESA_FORMAT_Z24_S8: case MESA_FORMAT_Z32: + case MESA_FORMAT_S8: switch (mesa_format) { case MESA_FORMAT_Z16: SETfield(t->SQ_TEX_RESOURCE1, FMT_16, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); break; + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: + SETfield(t->SQ_TEX_RESOURCE1, FMT_8_24, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + break; case MESA_FORMAT_Z24_S8: SETfield(t->SQ_TEX_RESOURCE1, FMT_24_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); @@ -495,6 +533,12 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo SETfield(t->SQ_TEX_RESOURCE1, FMT_32, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); break; + case MESA_FORMAT_S8: + SETfield(t->SQ_TEX_RESOURCE1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + break; + default: + break; }; switch (tObj->DepthMode) { case GL_LUMINANCE: /* X, X, X, ONE */ @@ -531,6 +575,49 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo return GL_FALSE; } break; + /* EXT_texture_sRGB */ + case MESA_FORMAT_SRGBA8: + SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SLA8: + SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SL8: /* X, X, X, ONE */ + SETfield(t->SQ_TEX_RESOURCE1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; default: /* Not supported format */ return GL_FALSE; @@ -548,7 +635,7 @@ void r600SetDepthTexMode(struct gl_texture_object *tObj) t = radeon_tex_obj(tObj); - r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat); + r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat); } @@ -562,7 +649,6 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex { radeonTexObj *t = radeon_tex_obj(texObj); const struct gl_texture_image *firstImage; - int firstlevel = t->mt ? t->mt->firstLevel : 0; GLuint uTexelPitch, row_align; if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled && @@ -570,10 +656,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex t->bo) return; - firstImage = t->base.Image[0][firstlevel]; + firstImage = t->base.Image[0][t->minLod]; if (!t->image_override) { - if (!r600GetTexFormat(texObj, firstImage->TexFormat->MesaFormat)) { + if (!r600GetTexFormat(texObj, firstImage->TexFormat)) { radeon_error("unexpected texture format in %s\n", __FUNCTION__); return; @@ -605,7 +691,8 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex } row_align = rmesa->radeon.texture_row_align - 1; - uTexelPitch = ((firstImage->Width * t->mt->bpp + row_align) & ~row_align) / t->mt->bpp; + uTexelPitch = (_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align; + uTexelPitch = uTexelPitch / _mesa_get_format_bytes(firstImage->TexFormat); uTexelPitch = (uTexelPitch + R700_TEXEL_PITCH_ALIGNMENT_MASK) & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; @@ -619,10 +706,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex SETfield(t->SQ_TEX_RESOURCE1, firstImage->Height - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask); - if ((t->mt->lastLevel - t->mt->firstLevel) > 0) { - t->SQ_TEX_RESOURCE3 = t->mt->levels[0].size / 256; - SETfield(t->SQ_TEX_RESOURCE4, t->mt->firstLevel, BASE_LEVEL_shift, BASE_LEVEL_mask); - SETfield(t->SQ_TEX_RESOURCE5, t->mt->lastLevel, LAST_LEVEL_shift, LAST_LEVEL_mask); + if ((t->maxLod - t->minLod) > 0) { + t->SQ_TEX_RESOURCE3 = t->mt->levels[t->minLod].size / 256; + SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); + SETfield(t->SQ_TEX_RESOURCE5, t->maxLod - t->minLod, LAST_LEVEL_shift, LAST_LEVEL_mask); } } @@ -721,7 +808,8 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->radeon.glCtx, texname); radeonTexObjPtr t = radeon_tex_obj(tObj); - uint32_t pitch_val, size; + const struct gl_texture_image *firstImage; + uint32_t pitch_val, size, row_align; if (!tObj) return; @@ -731,7 +819,9 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, if (!offset) return; - size = pitch;//h * w * (depth / 8); + firstImage = t->base.Image[0][t->minLod]; + row_align = rmesa->radeon.texture_row_align - 1; + size = ((_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align) * firstImage->Height; if (t->bo) { radeon_bo_unref(t->bo); t->bo = NULL; @@ -854,20 +944,14 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo radeon_bo_unref(rImage->bo); rImage->bo = NULL; } - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = NULL; - } - if (rImage->mt) { - radeon_miptree_unreference(rImage->mt); - rImage->mt = NULL; - } + + radeon_miptree_unreference(&t->mt); + radeon_miptree_unreference(&rImage->mt); + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, rb->base.Width, rb->base.Height, 1, 0, rb->cpp); texImage->RowStride = rb->pitch / rb->cpp; - texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx, - internalFormat, - type, format, 0); + rImage->bo = rb->bo; radeon_bo_ref(rImage->bo); t->bo = rb->bo; diff --git a/r600/r700_assembler.c b/r600/r700_assembler.c index 00eda54..67e0ee7 100644 --- a/r600/r700_assembler.c +++ b/r600/r700_assembler.c @@ -336,7 +336,8 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) switch (pAsm->D.dst.opcode) { - case SQ_OP2_INST_ADD: + case SQ_OP2_INST_ADD: + case SQ_OP2_INST_KILLGT: case SQ_OP2_INST_MUL: case SQ_OP2_INST_MAX: case SQ_OP2_INST_MIN: @@ -354,9 +355,9 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) return 2; case SQ_OP2_INST_MOV: + case SQ_OP2_INST_MOVA_FLOOR: case SQ_OP2_INST_FRACT: case SQ_OP2_INST_FLOOR: - case SQ_OP2_INST_KILLGT: case SQ_OP2_INST_EXP_IEEE: case SQ_OP2_INST_LOG_CLAMPED: case SQ_OP2_INST_LOG_IEEE: @@ -790,6 +791,133 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod) +{ + GLuint client_size_inbyte; + GLuint data_format; + GLuint mega_fetch_count; + GLuint is_mega_fetch_flag; + + R700VertexGenericFetch* vfetch_instruction_ptr; + R700VertexGenericFetch* assembled_vfetch_instruction_ptr + = pAsm->vfetch_instruction_ptr_array[element]; + + if (assembled_vfetch_instruction_ptr == NULL) + { + vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch); + if (vfetch_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700VertexGenericFetch(vfetch_instruction_ptr); + } + else + { + vfetch_instruction_ptr = assembled_vfetch_instruction_ptr; + } + + data_format = GetSurfaceFormat(type, size, &client_size_inbyte); + + if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here + { + //TODO : mini fetch + } + else + { + mega_fetch_count = MEGA_FETCH_BYTES - 1; + is_mega_fetch_flag = 0x1; + pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte; + } + + vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH; + vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA; + vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + vfetch_instruction_ptr->m_Word0.f.buffer_id = element; + vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0; + vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; + vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; + + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + + vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; + vfetch_instruction_ptr->m_Word1.f.data_format = data_format; + vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE; + + if(1 == _signed) + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED; + } + else + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED; + } + + if(GL_TRUE == normalize) + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM; + } + else + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT; + } + + // Destination register + vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; + vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE; + + vfetch_instruction_ptr->m_Word2.f.offset = 0; + vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0; + + vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag; + + if (assembled_vfetch_instruction_ptr == NULL) + { + if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) + { + return GL_FALSE; + } + + if (pAsm->vfetch_instruction_ptr_array[element] != NULL) + { + return GL_FALSE; + } + else + { + pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr; + } + } + + return GL_TRUE; +} + +GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm) +{ + GLint i; + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + pAsm->cf_current_vtx_clause_ptr = NULL; + + for (i=0; i<VERT_ATTRIB_MAX; i++) + { + pAsm->vfetch_instruction_ptr_array[ i ] = NULL; + } + + cleanup_vfetch_shaderinst(pAsm->pR700Shader); + + return GL_TRUE; +} + GLuint gethelpr(r700_AssemblerBase* pAsm) { GLuint r = pAsm->uHelpReg; @@ -1180,8 +1308,10 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) case PROGRAM_INPUT: switch (pILInst->SrcReg[0].Index) { + case FRAG_ATTRIB_WPOS: case FRAG_ATTRIB_COL0: case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_FOGC: case FRAG_ATTRIB_TEX0: case FRAG_ATTRIB_TEX1: case FRAG_ATTRIB_TEX2: @@ -1194,7 +1324,16 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; + break; + case FRAG_ATTRIB_FACE: + fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); + break; + case FRAG_ATTRIB_PNTC: + fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); + break; + case FRAG_ATTRIB_VAR0: + fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n"); + break; } break; } @@ -1951,9 +2090,9 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) GLuint contiguous_slots_needed; GLuint uNumSrc = r700GetNumOperands(pAsm); - GLuint channel_swizzle, j; - GLuint chan_counter[4] = {0, 0, 0, 0}; - PVSSRC * pSource[3]; + //GLuint channel_swizzle, j; + //GLuint chan_counter[4] = {0, 0, 0, 0}; + //PVSSRC * pSource[3]; GLboolean bSplitInst = GL_FALSE; if (1 == pAsm->D.dst.math) @@ -2053,7 +2192,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; + alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X; if( (is_single_scalar_operation == GL_TRUE) || (GL_TRUE == bSplitInst) ) @@ -2387,6 +2526,35 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm) return GL_TRUE; } +GLboolean assemble_ARL(r700_AssemblerBase *pAsm) +{ /* TODO: ar values dont' persist between clauses */ + if( GL_FALSE == checkop1(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean assemble_BAD(char *opcode_str) { radeon_error("Not yet implemented instruction (%s)\n", opcode_str); @@ -2508,7 +2676,7 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm) } else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) { - onecomp_PVSSRC(&(pAsm->S[1].src), 3); + onecomp_PVSSRC(&(pAsm->S[0].src), 3); } if ( GL_FALSE == next_ins(pAsm) ) @@ -2561,6 +2729,133 @@ GLboolean assemble_EX2(r700_AssemblerBase *pAsm) { return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE); } + +GLboolean assemble_EXP(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // FLOOR tmp.x, a.x + // EX2 dst.x tmp.x + + if (pAsm->pILInst->DstReg.WriteMask & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // FRACT dst.y a.x + + if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // EX2 dst.z, a.x + + if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // MOV dst.w 1.0 + + if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} GLboolean assemble_FLR(r700_AssemblerBase *pAsm) { @@ -2617,15 +2912,15 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) GLboolean assemble_KIL(r700_AssemblerBase *pAsm) { + /* TODO: doc says KILL has to be last(end) ALU clause */ + checkop1(pAsm); pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT; - - if ( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; @@ -2638,20 +2933,11 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm) setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - - if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File) + if ( GL_FALSE == assemble_src(pAsm, 0, 1) ) { - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number; - } - else - { //PROGRAM_OUTPUT - pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index]; + return GL_FALSE; } - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - noswizzle_PVSSRC(&(pAsm->S[1].src)); - if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; @@ -2751,6 +3037,217 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm) return GL_TRUE; } +GLboolean assemble_LOG(r700_AssemblerBase *pAsm) +{ + BITS tmp1, tmp2, tmp3; + + checkop1(pAsm); + + tmp1 = gethelpr(pAsm); + tmp2 = gethelpr(pAsm); + tmp3 = gethelpr(pAsm); + + // FIXME: The hardware can do fabs() directly on input + // elements, but the compiler doesn't have the + // capability to use that. + + // MAX tmp1.x, a.x, -a.x (fabs(a.x)) + + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // Entire algo: + // + // LG2 tmp2.x, tmp1.x + // FLOOR tmp3.x, tmp2.x + // MOV dst.x, tmp3.x + // ADD tmp3.x, tmp2.x, -tmp3.x + // EX2 dst.y, tmp3.x + // MOV dst.z, tmp2.x + // MOV dst.w, 1.0 + + // LG2 tmp2.x, tmp1.x + // FLOOR tmp3.x, tmp2.x + + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp3; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.x, tmp3.x + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // ADD tmp3.x, tmp2.x, -tmp3.x + // EX2 dst.y, tmp3.x + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp3; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = DST_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + neg_PVSSRC(&(pAsm->S[1].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.z, tmp2.x + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.w 1.0 + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) { int tmp, ii; @@ -2908,6 +3405,7 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = tmp; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); @@ -3417,22 +3915,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) need_barrier = GL_TRUE; } - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) - { - case OPCODE_TEX: - break; - case OPCODE_TXB: - radeon_error("do not support TXB yet\n"); - return GL_FALSE; - break; - case OPCODE_TXP: - break; - default: - radeon_error("Internal error: bad texture op (not TEX)\n"); - return GL_FALSE; - break; - } - if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { GLuint tmp = gethelpr(pAsm); @@ -3611,7 +4093,15 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) } - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB) + { + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; + } + else + { + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + } + pAsm->is_tex = GL_TRUE; if ( GL_TRUE == need_barrier ) { @@ -3809,8 +4299,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, break; case OPCODE_ARL: - radeon_error("Not yet implemented instruction OPCODE_ARL \n"); - //if ( GL_FALSE == assemble_BAD("ARL") ) + if ( GL_FALSE == assemble_ARL(pR700AsmCode) ) return GL_FALSE; break; case OPCODE_ARR: @@ -3845,10 +4334,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_EXP: - radeon_error("Not yet implemented instruction OPCODE_EXP \n"); - //if ( GL_FALSE == assemble_BAD("EXP") ) + if ( GL_FALSE == assemble_EXP(pR700AsmCode) ) return GL_FALSE; - break; // approx of EX2 + break; case OPCODE_FLR: if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) @@ -3881,10 +4369,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_LOG: - radeon_error("Not yet implemented instruction OPCODE_LOG \n"); - //if ( GL_FALSE == assemble_BAD("LOG") ) + if ( GL_FALSE == assemble_LOG(pR700AsmCode) ) return GL_FALSE; - break; // approx of LG2 + break; case OPCODE_MAD: if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) @@ -4155,6 +4642,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten) { unsigned int unBit; + GLuint export_count = 0; if(pR700AsmCode->depth_export_register_number >= 0) { @@ -4176,6 +4664,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, { return GL_FALSE; } + export_count++; } unBit = 1 << FRAG_RESULT_DEPTH; if(OutputsWritten & unBit) @@ -4189,8 +4678,15 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, { return GL_FALSE; } + export_count++; } - + /* Need to export something, otherwise we'll hang + * results are undefined anyway */ + if(export_count == 0) + { + Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE); + } + if(pR700AsmCode->cf_last_export_ptr != NULL) { pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; diff --git a/r600/r700_assembler.h b/r600/r700_assembler.h index 73bb8ba..c66db50 100644 --- a/r600/r700_assembler.h +++ b/r600/r700_assembler.h @@ -415,6 +415,15 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, GLuint number_of_elements, GLenum dataElementType, VTX_FETCH_METHOD* pFetchMethod); +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod); +GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm); GLuint gethelpr(r700_AssemblerBase* pAsm); void resethelpr(r700_AssemblerBase* pAsm); void checkop_init(r700_AssemblerBase* pAsm); @@ -461,18 +470,21 @@ GLboolean next_ins(r700_AssemblerBase *pAsm); GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode); GLboolean assemble_ABS(r700_AssemblerBase *pAsm); GLboolean assemble_ADD(r700_AssemblerBase *pAsm); +GLboolean assemble_ARL(r700_AssemblerBase *pAsm); GLboolean assemble_BAD(char *opcode_str); GLboolean assemble_CMP(r700_AssemblerBase *pAsm); GLboolean assemble_COS(r700_AssemblerBase *pAsm); GLboolean assemble_DOT(r700_AssemblerBase *pAsm); GLboolean assemble_DST(r700_AssemblerBase *pAsm); GLboolean assemble_EX2(r700_AssemblerBase *pAsm); +GLboolean assemble_EXP(r700_AssemblerBase *pAsm); GLboolean assemble_FLR(r700_AssemblerBase *pAsm); GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm); GLboolean assemble_FRC(r700_AssemblerBase *pAsm); GLboolean assemble_KIL(r700_AssemblerBase *pAsm); GLboolean assemble_LG2(r700_AssemblerBase *pAsm); GLboolean assemble_LRP(r700_AssemblerBase *pAsm); +GLboolean assemble_LOG(r700_AssemblerBase *pAsm); GLboolean assemble_MAD(r700_AssemblerBase *pAsm); GLboolean assemble_LIT(r700_AssemblerBase *pAsm); GLboolean assemble_MAX(r700_AssemblerBase *pAsm); diff --git a/r600/r700_chip.c b/r600/r700_chip.c index 06d7e9c..02c56b9 100644 --- a/r600/r700_chip.c +++ b/r600/r700_chip.c @@ -54,11 +54,15 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { radeonTexObj *t = r700->textures[i]; + uint32_t offset; if (t) { - if (!t->image_override) + if (!t->image_override) { bo = t->mt->bo; - else + offset = get_base_teximage_offset(t); + } else { bo = t->bo; + offset = 0; + } if (bo) { r700SyncSurf(context, bo, @@ -77,7 +81,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, bo, - 0, + offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, bo, @@ -141,17 +145,15 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom } } +extern int getTypeSize(GLenum type); static void r700SetupVTXConstants(GLcontext * ctx, - unsigned int nStreamID, void * pAos, - unsigned int size, /* number of elements in vector */ - unsigned int stride, - unsigned int count) /* number of vectors in stream */ + StreamDesc * pStreamDesc) { context_t *context = R700_CONTEXT(ctx); struct radeon_aos * paos = (struct radeon_aos *)pAos; + unsigned int nVBsize; BATCH_LOCALS(&context->radeon); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); unsigned int uSQ_VTX_CONSTANT_WORD0_0; unsigned int uSQ_VTX_CONSTANT_WORD1_0; @@ -171,18 +173,40 @@ static void r700SetupVTXConstants(GLcontext * ctx, else r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + if(0 == pStreamDesc->stride) + { + nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type); + } + else + { + nVBsize = paos->count * pStreamDesc->stride; + } + uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; - uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1; + uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1; SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ - SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, + SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); - SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL), + SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL), SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ - SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, - SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + + if(GL_TRUE == pStreamDesc->normalize) + { + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + } + //else + //{ + // SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT, + // SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + //} + + if(1 == pStreamDesc->_signed) + { + SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + } SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, @@ -191,7 +215,7 @@ static void r700SetupVTXConstants(GLcontext * ctx, BEGIN_BATCH_NO_AUTOSTATE(9 + 2); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); @@ -208,31 +232,6 @@ static void r700SetupVTXConstants(GLcontext * ctx, } -void r700SetupStreams(GLcontext *ctx) -{ - context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vp = context->selected_vp; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - unsigned int i, j = 0; - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - - R600_STATECHANGE(context, vtx); - - for(i=0; i<VERT_ATTRIB_MAX; i++) { - if(vp->mesa_program->Base.InputsRead & (1 << i)) { - rcommon_emit_vector(ctx, - &context->radeon.tcl.aos[j], - vb->AttribPtr[i]->data, - vb->AttribPtr[i]->size, - vb->AttribPtr[i]->stride, - vb->Count); - j++; - } - } - context->radeon.tcl.aos_count = j; -} - static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); @@ -256,15 +255,12 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); for(i=0; i<VERT_ATTRIB_MAX; i++) { - if(vp->mesa_program->Base.InputsRead & (1 << i)) { - /* currently aos are packed */ - r700SetupVTXConstants(ctx, - i, - (void*)(&context->radeon.tcl.aos[j]), - (unsigned int)context->radeon.tcl.aos[j].components, - (unsigned int)context->radeon.tcl.aos[j].stride * 4, - (unsigned int)context->radeon.tcl.aos[j].count); - j++; + if(vp->mesa_program->Base.InputsRead & (1 << i)) + { + r700SetupVTXConstants(ctx, + (void*)(&context->radeon.tcl.aos[j]), + &(context->stream_desc[j])); + j++; } } } @@ -366,7 +362,6 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a rrb = radeon_get_depthbuffer(&context->radeon); if (!rrb || !rrb->bo) { - fprintf(stderr, "no rrb\n"); return; } @@ -408,7 +403,6 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * rrb = radeon_get_colorbuffer(&context->radeon); if (!rrb || !rrb->bo) { - fprintf(stderr, "no rrb\n"); return; } @@ -794,8 +788,7 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom) BATCH_LOCALS(&context->radeon); radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - BEGIN_BATCH_NO_AUTOSTATE(23); - R600_OUT_BATCH_REGVAL(DB_HTILE_DATA_BASE, r700->DB_HTILE_DATA_BASE.u32All); + BEGIN_BATCH_NO_AUTOSTATE(17); R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2); R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All); @@ -808,7 +801,6 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom) R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All); R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All); - R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All); R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All); END_BATCH(); @@ -1108,6 +1100,32 @@ static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); } +static void r700SendQueryBegin(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + BATCH_LOCALS(radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + /* clear the buffer */ + radeon_bo_map(query->bo, GL_FALSE); + memset(query->bo->ptr, 0, 4 * 2 * sizeof(uint64_t)); /* 4 DBs, 2 qwords each */ + radeon_bo_unmap(query->bo); + + radeon_cs_space_check_with_bo(radeon->cmdbuf.cs, + query->bo, + 0, RADEON_GEM_DOMAIN_GTT); + + BEGIN_BATCH_NO_AUTOSTATE(4 + 2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2)); + R600_OUT_BATCH(ZPASS_DONE); + R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */ + R600_OUT_BATCH(0x00000000); + R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + query->emitted_begin = GL_TRUE; +} + static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) { return atom->cmd_size; @@ -1136,7 +1154,11 @@ static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom) count += 3; if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { - for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) { + /* targets are enabled in r700SetRenderTarget but state + size is calculated before that. Until MRT's are done + hardcode target0 as enabled. */ + count += 3; + for (ui = 1; ui < R700_MAX_RENDER_TARGETS; ui++) { if (r700->render_target[ui].enabled) count += 3; } @@ -1216,6 +1238,20 @@ static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom) return count; } +static int check_queryobj(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + int count; + + if (!query || query->emitted_begin) + count = 0; + else + count = atom->cmd_size; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + return count; +} + #define ALLOC_STATE( ATOM, CHK, SZ, EMIT ) \ do { \ context->atoms.ATOM.cmd_size = (SZ); \ @@ -1229,6 +1265,19 @@ do { \ insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \ } while (0) +static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ) +{ + radeon->query.queryobj.cmd_size = (SZ); + radeon->query.queryobj.cmd = NULL; + radeon->query.queryobj.name = "queryobj"; + radeon->query.queryobj.idx = 0; + radeon->query.queryobj.check = check_queryobj; + radeon->query.queryobj.dirty = GL_FALSE; + radeon->query.queryobj.emit = r700SendQueryBegin; + radeon->hw.max_state_size += (SZ); + insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj); +} + void r600InitAtoms(context_t *context) { radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context); @@ -1239,7 +1288,7 @@ void r600InitAtoms(context_t *context) context->radeon.hw.atomlist.name = "atom-list"; ALLOC_STATE(sq, always, 34, r700SendSQConfig); - ALLOC_STATE(db, always, 23, r700SendDBState); + ALLOC_STATE(db, always, 17, r700SendDBState); ALLOC_STATE(stencil, always, 4, r700SendStencilState); ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState); ALLOC_STATE(sc, always, 15, r700SendSCState); @@ -1252,9 +1301,9 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(poly, always, 10, r700SendPolyState); ALLOC_STATE(cb, cb, 18, r700SendCBState); ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); + ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState); ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); - ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState); ALLOC_STATE(sx, always, 9, r700SendSXState); ALLOC_STATE(vgt, always, 41, r700SendVGTState); ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState); @@ -1268,6 +1317,7 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState); ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState); ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState); + r600_init_query_stateobj(&context->radeon, 6 * 2); context->radeon.hw.is_dirty = GL_TRUE; context->radeon.hw.all_dirty = GL_TRUE; diff --git a/r600/r700_fragprog.c b/r600/r700_fragprog.c index 78ce3ae..ccafd43 100644 --- a/r600/r700_fragprog.c +++ b/r600/r700_fragprog.c @@ -135,15 +135,19 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, { GLuint i, j; GLint * puiTEMPwrites; + GLint * puiTEMPreads; struct prog_instruction * pILInst; InstDeps *pInstDeps; struct prog_instruction * texcoord_DepInst; GLint nDepInstID; puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries); + puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries); + for(i=0; i<mesa_fp->Base.NumTemporaries; i++) { puiTEMPwrites[i] = -1; + puiTEMPreads[i] = -1; } pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions); @@ -167,6 +171,11 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, { //Set dep. pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index]; + //Set first read + if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 ) + { + puiTEMPreads[pILInst->SrcReg[j].Index] = i; + } } else { @@ -177,8 +186,6 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, fp->r700AsmCode.pInstDeps = pInstDeps; - FREE(puiTEMPwrites); - //Find dep for tex inst for(i=0; i<mesa_fp->Base.NumInstructions; i++) { @@ -203,9 +210,25 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, { //... other deps? } } + // make sure that we dont overwrite src used earlier + nDepInstID = puiTEMPreads[pILInst->DstReg.Index]; + if(nDepInstID < i) + { + pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index]; + texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]); + if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) ) + { + pInstDeps[nDepInstID].nDstDep = i; + } + + } + } } + FREE(puiTEMPwrites); + FREE(puiTEMPreads); + return GL_TRUE; } @@ -251,7 +274,15 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, number_of_colors_exported--; } - fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled; + /* illegal to set this to 0 */ + if(number_of_colors_exported || z_enabled) + { + fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled; + } + else + { + fp->r700Shader.exportMode = (1 << 1); + } fp->translated = GL_TRUE; @@ -341,6 +372,11 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); } + else + { + CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); + CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); + } ui = (unNumOfReg < ui) ? ui : unNumOfReg; @@ -357,26 +393,6 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode, EXPORT_MODE_shift, EXPORT_MODE_mask); - R600_STATECHANGE(context, db); - - if(fp->r700Shader.killIsUsed) - { - SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); - } - else - { - CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); - } - - if(fp->r700Shader.depthIsExported) - { - SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); - } - else - { - CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); - } - // emit ps input map unBit = 1 << FRAG_ATTRIB_WPOS; if(mesa_fp->Base.InputsRead & unBit) @@ -443,9 +459,12 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) } } - R600_STATECHANGE(context, cb); exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); - r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1; + if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1)) + { + R600_STATECHANGE(context, cb); + r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1; + } /* sent out shader constants. */ paramList = fp->mesa_program.Base.Parameters; diff --git a/r600/r700_oglprog.c b/r600/r700_oglprog.c index 5290ef3..0d476fc 100644 --- a/r600/r700_oglprog.c +++ b/r600/r700_oglprog.c @@ -40,6 +40,24 @@ #include "r700_vertprog.h" +static void freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *cache) +{ + struct r700_vertex_program *tmp, *vp = cache->progs; + + while (vp) { + tmp = vp->next; + /* Release DMA region */ + r600DeleteShader(ctx, vp->shaderbo); + /* Clean up */ + Clean_Up_Assembler(&(vp->r700AsmCode)); + Clean_Up_Shader(&(vp->r700Shader)); + + _mesa_reference_vertprog(ctx, &vp->mesa_program, NULL); + _mesa_free(vp); + vp = tmp; + } +} + static struct gl_program *r700NewProgram(GLcontext * ctx, GLenum target, GLuint id) @@ -84,8 +102,7 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { - struct r700_vertex_program_cont * vpc; - struct r700_vertex_program *vp, *tmp; + struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog; struct r700_fragment_program * fp; radeon_print(RADEON_SHADER, RADEON_VERBOSE, @@ -95,20 +112,7 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { case GL_VERTEX_STATE_PROGRAM_NV: case GL_VERTEX_PROGRAM_ARB: - vpc = (struct r700_vertex_program_cont*)prog; - vp = vpc->progs; - while (vp) { - tmp = vp->next; - /* Release DMA region */ - - r600DeleteShader(ctx, vp->shaderbo); - - /* Clean up */ - Clean_Up_Assembler(&(vp->r700AsmCode)); - Clean_Up_Shader(&(vp->r700Shader)); - _mesa_free(vp); - vp = tmp; - } + freeVertProgCache(ctx, vpc); break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: @@ -131,7 +135,24 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) static void r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { - + struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog; + struct r700_fragment_program * fp = (struct r700_fragment_program*)prog; + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + freeVertProgCache(ctx, vpc); + vpc->progs = NULL; + break; + case GL_FRAGMENT_PROGRAM_ARB: + r600DeleteShader(ctx, fp->shaderbo); + Clean_Up_Assembler(&(fp->r700AsmCode)); + Clean_Up_Shader(&(fp->r700Shader)); + fp->translated = GL_FALSE; + fp->loaded = GL_FALSE; + fp->shaderbo = NULL; + break; + } + } static GLboolean r700IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) diff --git a/r600/r700_render.c b/r600/r700_render.c index b1c3648..47f89c9 100644 --- a/r600/r700_render.c +++ b/r600/r700_render.c @@ -43,6 +43,7 @@ #include "tnl/t_context.h" #include "tnl/t_vertex.h" #include "tnl/t_pipeline.h" +#include "vbo/vbo_context.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -53,13 +54,12 @@ #include "r700_fragprog.h" #include "r700_state.h" +#include "radeon_buffer_objects.h" #include "radeon_common_context.h" void r700WaitForIdle(context_t *context); void r700WaitForIdleClean(context_t *context); -GLboolean r700SendTextureState(context_t *context); static unsigned int r700PrimitiveType(int prim); -void r600UpdateTextureState(GLcontext * ctx); GLboolean r700SyncSurf(context_t *context, struct radeon_bo *pbo, uint32_t read_domain, @@ -249,113 +249,635 @@ static int r700NumVerts(int num_verts, int prim) static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) { - context_t *context = R700_CONTEXT(ctx); - BATCH_LOCALS(&context->radeon); - int type, i, total_emit; - int num_indices; - uint32_t vgt_draw_initiator = 0; - uint32_t vgt_index_type = 0; - uint32_t vgt_primitive_type = 0; - uint32_t vgt_num_indices = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - - type = r700PrimitiveType(prim); - num_indices = r700NumVerts(end - start, prim); - - radeon_print(RADEON_RENDER, RADEON_TRACE, - "%s type %x num_indices %d\n", - __func__, type, num_indices); - - if (type < 0 || num_indices <= 0) - return; + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, total_emit; + int num_indices; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; + + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + + if(GL_TRUE != context->ind_buf.is_32bit) + { + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 5 + 2; /* DRAW_INDEX */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // draw packet + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); + R600_OUT_BATCH(context->ind_buf.bo_offset); + R600_OUT_BATCH(0); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset, + context->ind_buf.bo, + context->ind_buf.bo_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, int prim) +{ + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, i; + uint32_t num_indices, total_emit = 0; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; - total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + num_indices + 3; /* DRAW_INDEX_IMMD */ - - BEGIN_BATCH_NO_AUTOSTATE(total_emit); - // prim - SETfield(vgt_primitive_type, type, - VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R600_OUT_BATCH(vgt_primitive_type); - - // index type - SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); - R600_OUT_BATCH(vgt_index_type); - - // num instances - R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); - R600_OUT_BATCH(1); - - // draw packet - vgt_num_indices = num_indices; - SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + + if (num_indices > 0xffff) + { + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + else + { + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + if (start == 0) + { + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + else + { + if (num_indices > 0xffff) + { + total_emit += num_indices; + } + else + { + total_emit += (num_indices + 1) / 2; + } + SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + + total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 3; /* DRAW */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // draw packet + if(start == 0) + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); R600_OUT_BATCH(vgt_num_indices); R600_OUT_BATCH(vgt_draw_initiator); - - for (i = start; i < (start + num_indices); i++) { - if(vb->Elts) - R600_OUT_BATCH(vb->Elts[i]); + } + else + { + if (num_indices > 0xffff) + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + for (i = start; i < (start + num_indices); i++) + { + R600_OUT_BATCH(i); + } + } + else + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + for (i = start; i < (start + num_indices); i += 2) + { + if ((i + 1) == (start + num_indices)) + { + R600_OUT_BATCH(i); + } else - R600_OUT_BATCH(i); - } - END_BATCH(); - COMMIT_BATCH(); + { + R600_OUT_BATCH(((i + 1) << 16) | (i)); + } + } + } + } + END_BATCH(); + COMMIT_BATCH(); } /* start 3d, idle, cb/db flush */ #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 -static GLuint r700PredictRenderSize(GLcontext* ctx) +static GLuint r700PredictRenderSize(GLcontext* ctx, + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + GLuint nr_prims) { context_t *context = R700_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct r700_vertex_program *vp = context->selected_vp; - struct vertex_buffer *vb = &tnl->vb; GLboolean flushed; GLuint dwords, i; GLuint state_size; - /* pre calculate aos count so state prediction works */ - context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead); dwords = PRE_EMIT_STATE_BUFSZ; - for (i = 0; i < vb->PrimitiveCount; i++) - dwords += vb->Primitive[i].count + 10; + if (ib) + dwords += nr_prims * 14; + else { + for (i = 0; i < nr_prims; ++i) + { + if (prim[i].start == 0) + dwords += 10; + else if (prim[i].count > 0xffff) + dwords += prim[i].count + 10; + else + dwords += ((prim[i].count + 1) / 2) + 10; + } + } + state_size = radeonCountStateEmitSize(&context->radeon); flushed = rcommonEnsureCmdBufSpace(&context->radeon, - dwords + state_size, __FUNCTION__); - + dwords + state_size, + __FUNCTION__); if (flushed) - dwords += radeonCountStateEmitSize(&context->radeon); + dwords += radeonCountStateEmitSize(&context->radeon); else - dwords += state_size; + dwords += state_size; - radeon_print(RADEON_RENDER, RADEON_VERBOSE, - "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); return dwords; + +} + +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j, sz; \ + sz = input->Size; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = MACRO(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = (GLfloat)(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } \ +} while (0) + +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r700ConvertAttrib(GLcontext *ctx, int count, + const struct gl_client_array *input, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + { + count = 1; + } + + if (input->BufferObj->Name) + { + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + } + else + { + src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, + sizeof(GLfloat) * input->Size * count, 32); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + assert(src_ptr != NULL); + + switch (input->Type) + { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void r700AlignDataToDword(GLcontext *ctx, + const struct gl_client_array *input, + int count, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) + { + _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + GLuint stride; + int ret; + int i, index; + + R600_STATECHANGE(context, vtx); + + for(index = 0; index < context->nNumActiveAos; index++) + { + struct radeon_aos *aos = &context->radeon.tcl.aos[index]; + i = context->stream_desc[index].element; + + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) + { + r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); + } + else + { + if (input[i]->BufferObj->Name) + { + if (stride % 4 != 0) + { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); + context->stream_desc[index].is_named_bo = GL_FALSE; + } + else + { + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; + } + } + else + { + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) + { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } + else + { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, + &context->stream_desc[index].bo_offset, size, 32); + assert(context->stream_desc[index].bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, + context->stream_desc[index].bo_offset); + + switch (context->stream_desc[index].dwords) + { + case 1: + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 2: + radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 3: + radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 4: + radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + default: + assert(0); + break; + } + } + } + + aos->count = context->stream_desc[index].stride == 0 ? 1 : count; + aos->stride = context->stream_desc[index].stride / sizeof(float); + aos->components = context->stream_desc[index].dwords; + aos->bo = context->stream_desc[index].bo; + aos->offset = context->stream_desc[index].bo_offset; + + if(context->stream_desc[index].is_named_bo) + { + radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs, + context->stream_desc[index].bo, + RADEON_GEM_DOMAIN_GTT, 0); + } + } + + ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, + first_elem(&context->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); +} + +static void r700FreeData(GLcontext *ctx) +{ + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + context_t *context = R700_CONTEXT(ctx); + + int i; + + for (i = 0; i < context->nNumActiveAos; i++) + { + if (!context->stream_desc[i].is_named_bo) + { + radeon_bo_unref(context->stream_desc[i].bo); + } + context->radeon.tcl.aos[i].bo = NULL; + } + + if (context->ind_buf.bo != NULL) + { + radeon_bo_unref(context->ind_buf.bo); + } +} + +static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; + assert(mesa_ind_buf->obj->Pointer != NULL); + } + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) + { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + GLubyte *in = (GLubyte *)src_ptr; + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } + +#if MESA_BIG_ENDIAN + } + else + { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ + GLushort *in = (GLushort *)src_ptr; + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } +#endif + } + + context->ind_buf.is_32bit = GL_FALSE; + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } +} + +static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + + if (!mesa_ind_buf) { + context->ind_buf.bo = NULL; + return; + } + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) + { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) + { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + assert(context->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + _mesa_memcpy(dst_ptr, src_ptr, size); + + context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } + else + { + r700FixupIndexBuffer(ctx, mesa_ind_buf); + } } -static GLboolean r700RunRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) +static GLboolean r700TryDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) { context_t *context = R700_CONTEXT(ctx); radeonContextPtr radeon = &context->radeon; - unsigned int i, id = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; + GLuint i, id = 0; struct radeon_renderbuffer *rrb; - radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n", - __func__, context->radeon.cmdbuf.cs->cdw); + if (ctx->NewState) + _mesa_update_state( ctx ); + + _tnl_UpdateFixedFunctionProgram(ctx); + r700SetVertexFormat(ctx, arrays, max_index + 1); + /* shaders need to be updated before buffers are validated */ + r700UpdateShaders(ctx); + if (!r600ValidateBuffers(ctx)) + return GL_FALSE; /* always emit CB base to prevent * lock ups on some chips. @@ -367,21 +889,29 @@ static GLboolean r700RunRender(GLcontext * ctx, r700SetScissor(context); r700SetupVertexProgram(ctx); r700SetupFragmentProgram(ctx); - r600UpdateTextureState(ctx); + r700UpdateShaderStates(ctx); - GLuint emit_end = r700PredictRenderSize(ctx) - + context->radeon.cmdbuf.cs->cdw; - r700SetupStreams(ctx); + GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims) + + context->radeon.cmdbuf.cs->cdw; + + r700SetupIndexBuffer(ctx, ib); + r700SetupStreams(ctx, arrays, max_index + 1); radeonEmitState(radeon); radeon_debug_add_indent(); - /* richard test code */ - for (i = 0; i < vb->PrimitiveCount; i++) { - GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); - GLuint start = vb->Primitive[i].start; - GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - r700RunRenderPrimitive(ctx, start, end, prim); + for (i = 0; i < nr_prims; ++i) + { + if (context->ind_buf.bo) + r700RunRenderPrimitive(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); + else + r700RunRenderPrimitiveImmediate(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); } radeon_debug_remove_indent(); @@ -398,83 +928,54 @@ static GLboolean r700RunRender(GLcontext * ctx, r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); - radeonReleaseArrays(ctx, ~0); - - radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n", - __func__, context->radeon.cmdbuf.cs->cdw); + r700FreeData(ctx); - if ( emit_end < context->radeon.cmdbuf.cs->cdw ) - WARN_ONCE("Rendering was %d commands larger than predicted size." - " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); - - return GL_FALSE; -} + if (emit_end < context->radeon.cmdbuf.cs->cdw) + { + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); + } -static GLboolean r700RunNonTCLRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) /* -------------------- */ -{ - GLboolean bRet = GL_TRUE; - - return bRet; + return GL_TRUE; } -static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ - struct tnl_pipeline_stage *stage) +static void r700DrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) { - GLboolean bRet = GL_FALSE; + GLboolean retval = GL_FALSE; - /* TODO : sw fallback */ - - /* Need shader bo's setup before bo check */ - r700UpdateShaders(ctx); - /** + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } - * Ensure all enabled and complete textures are uploaded along with any buffers being used. - */ - if(!r600ValidateBuffers(ctx)) - { - return GL_TRUE; - } + if (min_index) { + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims ); + return; + } - bRet = r700RunRender(ctx, stage); + /* Make an attempt at drawing */ + retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - return bRet; - //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline - //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success. + /* If failed run tnl pipeline - it should take care of fallbacks */ + if (!retval) + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); } -const struct tnl_pipeline_stage _r700_render_stage = { - "r700 Hardware Rasterization", - NULL, - NULL, - NULL, - NULL, - r700RunNonTCLRender -}; - -const struct tnl_pipeline_stage _r700_tcl_stage = { - "r700 Hardware Transform, Clipping and Lighting", - NULL, - NULL, - NULL, - NULL, - r700RunTCLRender -}; - -const struct tnl_pipeline_stage *r700_pipeline[] = +void r700InitDraw(GLcontext *ctx) { - &_r700_tcl_stage, - &_tnl_vertex_transform_stage, - &_tnl_normal_transform_stage, - &_tnl_lighting_stage, - &_tnl_fog_coordinate_stage, - &_tnl_texgen_stage, - &_tnl_texture_transform_stage, - &_tnl_vertex_program_stage, - - &_r700_render_stage, - &_tnl_render_stage, - 0, -}; + struct vbo_context *vbo = vbo_context(ctx); + + /* to be enabled */ + vbo->draw_prims = r700DrawPrims; +} diff --git a/r600/r700_shader.c b/r600/r700_shader.c index b4fd51c..955ea4e 100644 --- a/r600/r700_shader.c +++ b/r600/r700_shader.c @@ -60,6 +60,55 @@ void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * plstCFInstructions->uNumOfNode++; } +void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst) +{ + GLuint ulIndex = 0; + GLboolean bFound = GL_FALSE; + R700ShaderInstruction * pPrevInst = NULL; + R700ShaderInstruction * pCurInst = plstCFInstructions->pHead; + + /* Need go thro list to make sure pInst is there. */ + while(NULL != pCurInst) + { + if(pCurInst == pInst) + { + bFound = GL_TRUE; + break; + } + + pPrevInst = pCurInst; + pCurInst = pCurInst->pNextInst; + } + if(GL_TRUE == bFound) + { + plstCFInstructions->uNumOfNode--; + + pCurInst = pInst->pNextInst; + ulIndex = pInst->m_uIndex; + while(NULL != pCurInst) + { + pCurInst->m_uIndex = ulIndex; + ulIndex++; + pCurInst = pCurInst->pNextInst; + } + + if(plstCFInstructions->pHead == pInst) + { + plstCFInstructions->pHead = pInst->pNextInst; + } + if(plstCFInstructions->pTail == pInst) + { + plstCFInstructions->pTail = pPrevInst; + } + if(NULL != pPrevInst) + { + pPrevInst->pNextInst = pInst->pNextInst; + } + + FREE(pInst); + } +} + void Init_R700_Shader(R700_Shader * pShader) { pShader->Type = R700_SHADER_INVALID; @@ -488,6 +537,47 @@ void DebugPrint(void) { } +void cleanup_vfetch_shaderinst(R700_Shader *pShader) +{ + R700ShaderInstruction *pInst; + R700ShaderInstruction *pInstToFree; + R700VertexInstruction *pVTXInst; + R700ControlFlowInstruction *pCFInst; + + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pVTXInst = (R700VertexInstruction *)pInst; + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pVTXInst->m_ShaderInstType); + + if(NULL != pVTXInst->m_pLinkedGenericClause) + { + pCFInst = (R700ControlFlowInstruction*)(pVTXInst->m_pLinkedGenericClause); + + TakeInstOutFromList(&(pShader->lstCFInstructions), + (R700ShaderInstruction*)pCFInst); + + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pCFInst->m_ShaderInstType); + } + + pInst = pInst->pNextInst; + }; + + //destroy each item in pShader->lstVTXInstructions; + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pInstToFree = pInst; + pInst = pInst->pNextInst; + FREE(pInstToFree); + }; + + //set NULL pShader->lstVTXInstructions + pShader->lstVTXInstructions.pHead=NULL; + pShader->lstVTXInstructions.pTail=NULL; + pShader->lstVTXInstructions.uNumOfNode=0; +} + void Clean_Up_Shader(R700_Shader *pShader) { FREE(pShader->pProgram); diff --git a/r600/r700_shader.h b/r600/r700_shader.h index bfd01e1..c6a0586 100644 --- a/r600/r700_shader.h +++ b/r600/r700_shader.h @@ -128,6 +128,7 @@ typedef struct R700_Shader //Internal void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst); +void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst); void ResolveLinks(R700_Shader *pShader); void Assemble(R700_Shader *pShader); @@ -143,6 +144,7 @@ void LoadProgram(R700_Shader *pShader); void UpdateShaderRegisters(R700_Shader *pShader); void DeleteInstructions(R700_Shader *pShader); void DebugPrint(void); +void cleanup_vfetch_shaderinst(R700_Shader *pShader); void Clean_Up_Shader(R700_Shader *pShader); diff --git a/r600/r700_state.c b/r600/r700_state.c index 124469b..16b05d5 100644 --- a/r600/r700_state.c +++ b/r600/r700_state.c @@ -46,7 +46,6 @@ #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" #include "vbo/vbo.h" -#include "main/texformat.h" #include "r600_context.h" @@ -55,18 +54,15 @@ #include "r700_fragprog.h" #include "r700_vertprog.h" - +void r600UpdateTextureState(GLcontext * ctx); static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state); static void r700UpdatePolygonMode(GLcontext * ctx); static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state); static void r700SetStencilState(GLcontext * ctx, GLboolean state); -void r700UpdateShaders (GLcontext * ctx) //---------------------------------- +void r700UpdateShaders(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); - GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; - GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; - int i; /* should only happenen once, just after context is created */ /* TODO: shouldn't we fallback to sw here? */ @@ -77,21 +73,6 @@ void r700UpdateShaders (GLcontext * ctx) //---------------------------------- r700SelectFragmentShader(ctx); - if (context->radeon.NewGLState) { - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - /* mat states from state var not array for sw */ - dummy_attrib[i].stride = 0; - temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &(dummy_attrib[i]); - } - - _tnl_UpdateFixedFunctionProgram(ctx); - - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = temp_attrib[i]; - } - } - r700SelectVertexShader(ctx); r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); context->radeon.NewGLState = 0; @@ -171,6 +152,14 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //----------- R600_STATECHANGE(context, db_target); } + if (new_state & (_NEW_LIGHT)) { + R600_STATECHANGE(context, su); + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) + SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit); + else + CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit); + } + r700UpdateStateParameters(ctx, new_state); R600_STATECHANGE(context, cl); @@ -202,6 +191,67 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //----------- context->radeon.NewGLState |= new_state; } +static void r700SetDBRenderState(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + struct r700_fragment_program *fp = (struct r700_fragment_program *) + (ctx->FragmentProgram._Current); + + R600_STATECHANGE(context, db); + + SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit); + SETfield(r700->DB_SHADER_CONTROL.u32All, EARLY_Z_THEN_LATE_Z, Z_ORDER_shift, Z_ORDER_mask); + /* XXX need to enable htile for hiz/s */ + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); + + if (context->radeon.query.current) + { + SETbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit); + if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770) + { + SETbit(r700->DB_RENDER_CONTROL.u32All, PERFECT_ZPASS_COUNTS_bit); + } + } + else + { + CLEARbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit); + if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770) + { + CLEARbit(r700->DB_RENDER_CONTROL.u32All, PERFECT_ZPASS_COUNTS_bit); + } + } + + if (fp) + { + if (fp->r700Shader.killIsUsed) + { + SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); + } + else + { + CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); + } + + if (fp->r700Shader.depthIsExported) + { + SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); + } + else + { + CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); + } + } +} + +void r700UpdateShaderStates(GLcontext * ctx) +{ + r700SetDBRenderState(ctx); + r600UpdateTextureState(ctx); +} + static void r700SetDepthState(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); @@ -467,10 +517,10 @@ static void r700SetBlendState(GLcontext * ctx) eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE), + blend_factor(ctx->Color.BlendSrcA, GL_TRUE), ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendDstRGB, GL_FALSE), + blend_factor(ctx->Color.BlendDstA, GL_FALSE), ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask); switch (ctx->Color.BlendEquationA) { @@ -655,6 +705,10 @@ static void r700UpdateCulling(GLcontext * ctx) CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */ break; } + + /* Winding is inverted when rendering to FBO */ + if (ctx->DrawBuffer && ctx->DrawBuffer->Name) + r700->PA_SU_SC_MODE_CNTL.u32All ^= FACE_bit; } static void r700UpdateLineStipple(GLcontext * ctx) @@ -745,9 +799,9 @@ static void r700ColorMask(GLcontext * ctx, (b ? 4 : 0) | (a ? 8 : 0)); - if (mask != r700->CB_SHADER_MASK.u32All) { + if (mask != r700->CB_TARGET_MASK.u32All) { R600_STATECHANGE(context, cb); - SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask); + SETfield(r700->CB_TARGET_MASK.u32All, mask, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask); } } @@ -1041,6 +1095,7 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //-------------------- GLfloat tz = v[MAT_TZ] * depthScale; R600_STATECHANGE(context, vpt); + R600_STATECHANGE(context, cl); r700->viewport[id].PA_CL_VPORT_XSCALE.f32All = sx; r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx; @@ -1051,6 +1106,18 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //-------------------- r700->viewport[id].PA_CL_VPORT_ZSCALE.f32All = sz; r700->viewport[id].PA_CL_VPORT_ZOFFSET.f32All = tz; + if (ctx->Transform.DepthClamp) { + r700->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); + r700->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); + SETbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit); + SETbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit); + } else { + r700->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = 0.0; + r700->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = 1.0; + CLEARbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit); + CLEARbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit); + } + r700->viewport[id].enabled = GL_TRUE; r700SetScissor(context); @@ -1164,13 +1231,8 @@ static void r700UpdatePolygonMode(GLcontext * ctx) /* Handle GL_CW (clock wise and GL_CCW (counter clock wise) * correctly by selecting the correct front and back face */ - if (ctx->Polygon.FrontFace == GL_CCW) { - f = ctx->Polygon.FrontMode; - b = ctx->Polygon.BackMode; - } else { - f = ctx->Polygon.BackMode; - b = ctx->Polygon.FrontMode; - } + f = ctx->Polygon.FrontMode; + b = ctx->Polygon.BackMode; /* Enable polygon mode */ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DUAL_MODE, POLY_MODE_shift, POLY_MODE_mask); @@ -1269,11 +1331,15 @@ void r700SetScissor(context_t *context) //--------------- return; } if (context->radeon.state.scissor.enabled) { - /* r600 has exclusive scissors */ x1 = context->radeon.state.scissor.rect.x1; y1 = context->radeon.state.scissor.rect.y1; - x2 = context->radeon.state.scissor.rect.x2 + 1; - y2 = context->radeon.state.scissor.rect.y2 + 1; + x2 = context->radeon.state.scissor.rect.x2; + y2 = context->radeon.state.scissor.rect.y2; + /* r600 has exclusive BR scissors */ + if (context->radeon.radeonScreen->kernel_mm) { + x2++; + y2++; + } } else { if (context->radeon.radeonScreen->driScreen->dri2.enabled) { x1 = 0; @@ -1352,8 +1418,6 @@ void r700SetScissor(context_t *context) //--------------- SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, y2, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask); - r700->viewport[id].PA_SC_VPORT_ZMIN_0.u32All = 0; - r700->viewport[id].PA_SC_VPORT_ZMAX_0.u32All = 0x3F800000; r700->viewport[id].enabled = GL_TRUE; } @@ -1668,19 +1732,10 @@ void r700InitState(GLcontext * ctx) //------------------- r700Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test); r700DepthMask(ctx, ctx->Depth.Mask); r700DepthFunc(ctx, ctx->Depth.Func); - SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit); - r700->DB_DEPTH_CLEAR.u32All = 0x3F800000; - - r700->DB_RENDER_CONTROL.u32All = 0; SETbit(r700->DB_RENDER_CONTROL.u32All, STENCIL_COMPRESS_DISABLE_bit); SETbit(r700->DB_RENDER_CONTROL.u32All, DEPTH_COMPRESS_DISABLE_bit); - r700->DB_RENDER_OVERRIDE.u32All = 0; - if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) - SETbit(r700->DB_RENDER_OVERRIDE.u32All, FORCE_SHADER_Z_ORDER_bit); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); + r700SetDBRenderState(ctx); r700->DB_ALPHA_TO_MASK.u32All = 0; SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask); @@ -1754,7 +1809,7 @@ void r700InitState(GLcontext * ctx) //------------------- r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF; /* screen/window/view */ - SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask); + SETfield(r700->CB_SHADER_MASK.u32All, 0xF, (4 * id), OUTPUT0_ENABLE_mask); context->radeon.hw.all_dirty = GL_TRUE; diff --git a/r600/r700_state.h b/r600/r700_state.h index 0f53d5b..60c6a7f 100644 --- a/r600/r700_state.h +++ b/r600/r700_state.h @@ -35,6 +35,7 @@ extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state); extern void r700UpdateShaders (GLcontext * ctx); +extern void r700UpdateShaderStates(GLcontext * ctx); extern void r700UpdateViewportOffset(GLcontext * ctx); diff --git a/r600/r700_vertprog.c b/r600/r700_vertprog.c index 04726ec..6986eb0 100644 --- a/r600/r700_vertprog.c +++ b/r600/r700_vertprog.c @@ -159,7 +159,35 @@ GLboolean Process_Vertex_Program_Vfetch_Instructions( return GL_TRUE; } -void Map_Vertex_Program(struct r700_vertex_program *vp, +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + int i; + context_t *context = R700_CONTEXT(ctx); + + VTX_FETCH_METHOD vtxFetchMethod; + vtxFetchMethod.bEnableMini = GL_FALSE; + vtxFetchMethod.mega_fetch_remainder = 0; + + for(i=0; i<context->nNumActiveAos; i++) + { + assemble_vfetch_instruction2(&vp->r700AsmCode, + vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element], + context->stream_desc[i].type, + context->stream_desc[i].size, + context->stream_desc[i].element, + context->stream_desc[i]._signed, + context->stream_desc[i].normalize, + &vtxFetchMethod); + } + + return GL_TRUE; +} + +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp) { GLuint ui; @@ -175,10 +203,10 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, pAsm->number_used_registers += num_inputs; // Create VFETCH instructions for inputs - if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) ) { - radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); - return; //error + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n"); + return; } // Map Outputs @@ -189,7 +217,7 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, pAsm->number_used_registers += pAsm->number_of_exports; pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports); - + for(ui=0; ui<pAsm->number_of_exports; ui++) { pAsm->pucOutMask[ui] = 0x0; @@ -206,7 +234,7 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, { /* fix func t_vp uses NumTemporaries */ pAsm->number_used_registers += mesa_vp->Base.NumTemporaries; } - + pAsm->uFirstHelpReg = pAsm->number_used_registers; } @@ -261,13 +289,10 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, } struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp) + struct gl_vertex_program *mesa_vp) { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program *vp; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - unsigned int unBit; unsigned int i; vp = _mesa_calloc(sizeof(*vp)); @@ -278,17 +303,13 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, _mesa_insert_mvp_code(ctx, vp->mesa_program); } - for(i=0; i<VERT_ATTRIB_MAX; i++) + for(i=0; i<context->nNumActiveAos; i++) { - unBit = 1 << i; - if(vp->mesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */ - { - vp->aos_desc[i].size = vb->AttribPtr[i]->size; - vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ - vp->aos_desc[i].type = GL_FLOAT; - } + vp->aos_desc[i].size = context->stream_desc[i].size; + vp->aos_desc[i].stride = context->stream_desc[i].stride; + vp->aos_desc[i].type = context->stream_desc[i].type; } - + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { vp->r700AsmCode.bR6xx = 1; @@ -296,19 +317,19 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, //Init_Program Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) ); - Map_Vertex_Program( vp, vp->mesa_program ); + Map_Vertex_Program(ctx, vp, vp->mesa_program ); if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program)) { return NULL; - } + } if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions, - &(vp->mesa_program->Base.Instructions[0]), + &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) { return NULL; - } + } if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) ) { @@ -330,9 +351,6 @@ void r700SelectVertexShader(GLcontext *ctx) context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program_cont *vpc; struct r700_vertex_program *vp; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - unsigned int unBit; unsigned int i; GLboolean match; GLbitfield InputsRead; @@ -343,29 +361,27 @@ void r700SelectVertexShader(GLcontext *ctx) if (vpc->mesa_program.IsPositionInvariant) { InputsRead |= VERT_BIT_POS; - } - + } + for (vp = vpc->progs; vp; vp = vp->next) { - match = GL_TRUE; - for(i=0; i<VERT_ATTRIB_MAX; i++) + match = GL_TRUE; + for(i=0; i<context->nNumActiveAos; i++) { - unBit = 1 << i; - if(InputsRead & unBit) + if (vp->aos_desc[i].size != context->stream_desc[i].size) { - if (vp->aos_desc[i].size != vb->AttribPtr[i]->size) - match = GL_FALSE; - break; + match = GL_FALSE; + break; } } - if (match) + if (match) { context->selected_vp = vp; return; } } - vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program) ); + vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program)); if(!vp) { radeon_error("Failed to translate vertex shader. \n"); @@ -377,6 +393,146 @@ void r700SelectVertexShader(GLcontext *ctx) return; } +int getTypeSize(GLenum type) +{ + switch (type) + { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input) +{ + context_t *context = R700_CONTEXT(ctx); + + StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]); + + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size + : input->StrideB; + + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input->Type) != 4 || +#endif + stride < 4) + { + pStreamDesc->type = GL_FLOAT; + + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = sizeof(GLfloat) * input->Size; + } + pStreamDesc->dwords = input->Size; + pStreamDesc->is_named_bo = GL_FALSE; + } + else + { + pStreamDesc->type = input->Type; + pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4; + if (!input->BufferObj->Name) + { + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3; + } + + pStreamDesc->is_named_bo = GL_FALSE; + } + } + + pStreamDesc->size = input->Size; + pStreamDesc->dst_loc = context->nNumActiveAos; + pStreamDesc->element = unLoc; + + switch (pStreamDesc->type) + { //GetSurfaceFormat + case GL_FLOAT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = GL_FALSE; + break; + case GL_SHORT: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_BYTE: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_SHORT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_BYTE: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + default: + case GL_INT: + case GL_UNSIGNED_INT: + case GL_DOUBLE: + assert(0); + break; + } + context->nNumActiveAos++; +} + +void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vpc + = (struct r700_vertex_program *)ctx->VertexProgram._Current; + + struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program); + unsigned int unLoc = 0; + unsigned int unBit = mesa_vp->Base.InputsRead; + context->nNumActiveAos = 0; + + if (mesa_vp->IsPositionInvariant) + { + unBit |= VERT_BIT_POS; + } + + while(unBit) + { + if(unBit & 1) + { + r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]); + } + + unBit >>= 1; + ++unLoc; + } + context->radeon.tcl.aos_count = context->nNumActiveAos; +} + void * r700GetActiveVpShaderBo(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); diff --git a/r600/r700_vertprog.h b/r600/r700_vertprog.h index c48764c..00824c2 100644 --- a/r600/r700_vertprog.h +++ b/r600/r700_vertprog.h @@ -52,8 +52,7 @@ struct r700_vertex_program GLboolean translated; GLboolean loaded; - GLboolean needUpdateVF; - + void * shaderbo; ArrayDesc aos_desc[VERT_ATTRIB_MAX]; @@ -76,19 +75,27 @@ unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm, GLboolean Process_Vertex_Program_Vfetch_Instructions( struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); -void Map_Vertex_Program(struct r700_vertex_program *vp, +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp); + struct gl_vertex_program *mesa_vp); /* Interface */ extern void r700SelectVertexShader(GLcontext *ctx); +extern void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count); extern GLboolean r700SetupVertexProgram(GLcontext * ctx); extern void * r700GetActiveVpShaderBo(GLcontext * ctx); +extern int getTypeSize(GLenum type); + #endif /* _R700_VERTPROG_H_ */ diff --git a/radeon/Makefile.am b/radeon/Makefile.am index 554f67e..0ff149d 100644 --- a/radeon/Makefile.am +++ b/radeon/Makefile.am @@ -1,6 +1,6 @@ AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING -RADEON_CFLAGS = -Iserver -DRADEON_COMMON=0 +RADEON_CFLAGS = -DRADEON_R100 -Iserver radeon_dri_la_LTLIBRARIES = radeon_dri.la radeon_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(RADEON_CFLAGS) @@ -36,5 +36,7 @@ if HAVE_LIBDRM_RADEON radeon_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS) radeon_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS) radeon_dri_la_SOURCES += \ - radeon_cs_space_drm.c + radeon_cs_space_drm.c \ + radeon_bo.c \ + radeon_cs.c endif diff --git a/radeon/radeon_bo.c b/radeon/radeon_bo.c new file mode 100644 index 0000000..393d156 --- /dev/null +++ b/radeon/radeon_bo.c @@ -0,0 +1,110 @@ +#include <radeon_bocs_wrapper.h> +#include <radeon_bo_int_drm.h> + +void radeon_bo_debug(struct radeon_bo *bo, + const char *op) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + + fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X\n", + op, bo, bo->handle, boi->size, boi->cref); +} + +struct radeon_bo *radeon_bo_open(struct radeon_bo_manager *bom, + uint32_t handle, + uint32_t size, + uint32_t alignment, + uint32_t domains, + uint32_t flags) +{ + struct radeon_bo *bo; + bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags); + return bo; +} + +void radeon_bo_ref(struct radeon_bo *bo) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + boi->cref++; + boi->bom->funcs->bo_ref(boi); +} + +struct radeon_bo *radeon_bo_unref(struct radeon_bo *bo) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + boi->cref--; + return boi->bom->funcs->bo_unref(boi); +} + +int radeon_bo_map(struct radeon_bo *bo, int write) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + return boi->bom->funcs->bo_map(boi, write); +} + +int radeon_bo_unmap(struct radeon_bo *bo) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + return boi->bom->funcs->bo_unmap(boi); +} + +int radeon_bo_wait(struct radeon_bo *bo) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + if (!boi->bom->funcs->bo_wait) + return 0; + return boi->bom->funcs->bo_wait(boi); +} + +int radeon_bo_is_busy(struct radeon_bo *bo, + uint32_t *domain) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + return boi->bom->funcs->bo_is_busy(boi, domain); +} + +int radeon_bo_set_tiling(struct radeon_bo *bo, + uint32_t tiling_flags, uint32_t pitch) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + return boi->bom->funcs->bo_set_tiling(boi, tiling_flags, pitch); +} + +int radeon_bo_get_tiling(struct radeon_bo *bo, + uint32_t *tiling_flags, uint32_t *pitch) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + return boi->bom->funcs->bo_get_tiling(boi, tiling_flags, pitch); +} + +int radeon_bo_is_static(struct radeon_bo *bo) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + if (boi->bom->funcs->bo_is_static) + return boi->bom->funcs->bo_is_static(boi); + return 0; +} + +int radeon_bo_is_referenced_by_cs(struct radeon_bo *bo, + struct radeon_cs *cs) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + return boi->cref > 1; +} + +uint32_t radeon_bo_get_handle(struct radeon_bo *bo) +{ + return bo->handle; +} + +uint32_t radeon_bo_get_src_domain(struct radeon_bo *bo) +{ + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + uint32_t src_domain; + + src_domain = boi->space_accounted & 0xffff; + if (!src_domain) + src_domain = boi->space_accounted >> 16; + + return src_domain; +} diff --git a/radeon/radeon_bo_drm.h b/radeon/radeon_bo_drm.h index 7141371..beb2369 100644 --- a/radeon/radeon_bo_drm.h +++ b/radeon/radeon_bo_drm.h @@ -32,188 +32,44 @@ #include <stdio.h> #include <stdint.h> -//#include "radeon_track.h" /* bo object */ #define RADEON_BO_FLAGS_MACRO_TILE 1 #define RADEON_BO_FLAGS_MICRO_TILE 2 struct radeon_bo_manager; +struct radeon_cs; struct radeon_bo { - uint32_t alignment; + void *ptr; + uint32_t flags; uint32_t handle; uint32_t size; - uint32_t domains; - uint32_t flags; - unsigned cref; -#ifdef RADEON_BO_TRACK - struct radeon_track *track; -#endif - void *ptr; - struct radeon_bo_manager *bom; - uint32_t space_accounted; -}; - -/* bo functions */ -struct radeon_bo_funcs { - struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom, - uint32_t handle, - uint32_t size, - uint32_t alignment, - uint32_t domains, - uint32_t flags); - void (*bo_ref)(struct radeon_bo *bo); - struct radeon_bo *(*bo_unref)(struct radeon_bo *bo); - int (*bo_map)(struct radeon_bo *bo, int write); - int (*bo_unmap)(struct radeon_bo *bo); - int (*bo_wait)(struct radeon_bo *bo); - int (*bo_is_static)(struct radeon_bo *bo); - int (*bo_set_tiling)(struct radeon_bo *bo, uint32_t tiling_flags, - uint32_t pitch); - int (*bo_get_tiling)(struct radeon_bo *bo, uint32_t *tiling_flags, - uint32_t *pitch); - int (*bo_is_busy)(struct radeon_bo *bo, uint32_t *domain); }; -struct radeon_bo_manager { - struct radeon_bo_funcs *funcs; - int fd; - -#ifdef RADEON_BO_TRACK - struct radeon_tracker tracker; -#endif -}; - -static inline void _radeon_bo_debug(struct radeon_bo *bo, - const char *op, - const char *file, - const char *func, - int line) -{ - fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X [%s %s %d]\n", - op, bo, bo->handle, bo->size, bo->cref, file, func, line); -} - -static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom, - uint32_t handle, - uint32_t size, - uint32_t alignment, - uint32_t domains, - uint32_t flags, - const char *file, - const char *func, - int line) -{ - struct radeon_bo *bo; - - bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags); - -#ifdef RADEON_BO_TRACK - if (bo) { - bo->track = radeon_tracker_add_track(&bom->tracker, bo->handle); - radeon_track_add_event(bo->track, file, func, "open", line); - } -#endif - return bo; -} - -static inline void _radeon_bo_ref(struct radeon_bo *bo, - const char *file, - const char *func, - int line) -{ - bo->cref++; -#ifdef RADEON_BO_TRACK - radeon_track_add_event(bo->track, file, func, "ref", line); -#endif - bo->bom->funcs->bo_ref(bo); -} - -static inline struct radeon_bo *_radeon_bo_unref(struct radeon_bo *bo, - const char *file, - const char *func, - int line) -{ - bo->cref--; -#ifdef RADEON_BO_TRACK - radeon_track_add_event(bo->track, file, func, "unref", line); - if (bo->cref <= 0) { - radeon_tracker_remove_track(&bo->bom->tracker, bo->track); - bo->track = NULL; - } -#endif - return bo->bom->funcs->bo_unref(bo); -} - -static inline int _radeon_bo_map(struct radeon_bo *bo, - int write, - const char *file, - const char *func, - int line) -{ - return bo->bom->funcs->bo_map(bo, write); -} - -static inline int _radeon_bo_unmap(struct radeon_bo *bo, - const char *file, - const char *func, - int line) -{ - return bo->bom->funcs->bo_unmap(bo); -} - -static inline int _radeon_bo_wait(struct radeon_bo *bo, - const char *file, - const char *func, - int line) -{ - return bo->bom->funcs->bo_wait(bo); -} - -static inline int _radeon_bo_is_busy(struct radeon_bo *bo, - uint32_t *domain, - const char *file, - const char *func, - int line) -{ - return bo->bom->funcs->bo_is_busy(bo, domain); -} - -static inline int radeon_bo_set_tiling(struct radeon_bo *bo, - uint32_t tiling_flags, uint32_t pitch) -{ - return bo->bom->funcs->bo_set_tiling(bo, tiling_flags, pitch); -} - -static inline int radeon_bo_get_tiling(struct radeon_bo *bo, - uint32_t *tiling_flags, uint32_t *pitch) -{ - return bo->bom->funcs->bo_get_tiling(bo, tiling_flags, pitch); -} - -static inline int radeon_bo_is_static(struct radeon_bo *bo) -{ - if (bo->bom->funcs->bo_is_static) - return bo->bom->funcs->bo_is_static(bo); - return 0; -} - -#define radeon_bo_open(bom, h, s, a, d, f)\ - _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__) -#define radeon_bo_ref(bo)\ - _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__) -#define radeon_bo_unref(bo)\ - _radeon_bo_unref(bo, __FILE__, __FUNCTION__, __LINE__) -#define radeon_bo_map(bo, w)\ - _radeon_bo_map(bo, w, __FILE__, __FUNCTION__, __LINE__) -#define radeon_bo_unmap(bo)\ - _radeon_bo_unmap(bo, __FILE__, __FUNCTION__, __LINE__) -#define radeon_bo_debug(bo, opcode)\ - _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__) -#define radeon_bo_wait(bo) \ - _radeon_bo_wait(bo, __FILE__, __func__, __LINE__) -#define radeon_bo_is_busy(bo, domain) \ - _radeon_bo_is_busy(bo, domain, __FILE__, __func__, __LINE__) +struct radeon_bo_manager; +void radeon_bo_debug(struct radeon_bo *bo, + const char *op); + +struct radeon_bo *radeon_bo_open(struct radeon_bo_manager *bom, + uint32_t handle, + uint32_t size, + uint32_t alignment, + uint32_t domains, + uint32_t flags); + +void radeon_bo_ref(struct radeon_bo *bo); +struct radeon_bo *radeon_bo_unref(struct radeon_bo *bo); +int radeon_bo_map(struct radeon_bo *bo, int write); +int radeon_bo_unmap(struct radeon_bo *bo); +int radeon_bo_wait(struct radeon_bo *bo); +int radeon_bo_is_busy(struct radeon_bo *bo, uint32_t *domain); +int radeon_bo_set_tiling(struct radeon_bo *bo, uint32_t tiling_flags, uint32_t pitch); +int radeon_bo_get_tiling(struct radeon_bo *bo, uint32_t *tiling_flags, uint32_t *pitch); +int radeon_bo_is_static(struct radeon_bo *bo); +int radeon_bo_is_referenced_by_cs(struct radeon_bo *bo, + struct radeon_cs *cs); +uint32_t radeon_bo_get_handle(struct radeon_bo *bo); +uint32_t radeon_bo_get_src_domain(struct radeon_bo *bo); #endif diff --git a/radeon/radeon_bo_int_drm.h b/radeon/radeon_bo_int_drm.h new file mode 100644 index 0000000..190c332 --- /dev/null +++ b/radeon/radeon_bo_int_drm.h @@ -0,0 +1,45 @@ +#ifndef RADEON_BO_INT +#define RADEON_BO_INT + +struct radeon_bo_manager { + struct radeon_bo_funcs *funcs; + int fd; +}; + +struct radeon_bo_int { + void *ptr; + uint32_t flags; + uint32_t handle; + uint32_t size; + /* private members */ + uint32_t alignment; + uint32_t domains; + unsigned cref; + struct radeon_bo_manager *bom; + uint32_t space_accounted; + uint32_t referenced_in_cs; +}; + +/* bo functions */ +struct radeon_bo_funcs { + struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom, + uint32_t handle, + uint32_t size, + uint32_t alignment, + uint32_t domains, + uint32_t flags); + void (*bo_ref)(struct radeon_bo_int *bo); + struct radeon_bo *(*bo_unref)(struct radeon_bo_int *bo); + int (*bo_map)(struct radeon_bo_int *bo, int write); + int (*bo_unmap)(struct radeon_bo_int *bo); + int (*bo_wait)(struct radeon_bo_int *bo); + int (*bo_is_static)(struct radeon_bo_int *bo); + int (*bo_set_tiling)(struct radeon_bo_int *bo, uint32_t tiling_flags, + uint32_t pitch); + int (*bo_get_tiling)(struct radeon_bo_int *bo, uint32_t *tiling_flags, + uint32_t *pitch); + int (*bo_is_busy)(struct radeon_bo_int *bo, uint32_t *domain); + int (*bo_is_referenced_by_cs)(struct radeon_bo_int *bo, struct radeon_cs *cs); +}; + +#endif diff --git a/radeon/radeon_bo_legacy.c b/radeon/radeon_bo_legacy.c index 3e7547d..cf12664 100644 --- a/radeon/radeon_bo_legacy.c +++ b/radeon/radeon_bo_legacy.c @@ -50,6 +50,12 @@ #include "radeon_bocs_wrapper.h" #include "radeon_macros.h" +#ifdef HAVE_LIBDRM_RADEON +#include "radeon_bo_int.h" +#else +#include "radeon_bo_int_drm.h" +#endif + /* no seriously texmem.c is this screwed up */ struct bo_legacy_texture_object { driTextureObject base; @@ -57,7 +63,7 @@ struct bo_legacy_texture_object { }; struct bo_legacy { - struct radeon_bo base; + struct radeon_bo_int base; int map_count; uint32_t pending; int is_pending; @@ -187,10 +193,10 @@ static void legacy_get_current_age(struct bo_manager_legacy *boml) } } -static int legacy_is_pending(struct radeon_bo *bo) +static int legacy_is_pending(struct radeon_bo_int *boi) { - struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; - struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom; + struct bo_legacy *bo_legacy = (struct bo_legacy*)boi; if (bo_legacy->is_pending <= 0) { bo_legacy->is_pending = 0; @@ -204,13 +210,13 @@ static int legacy_is_pending(struct radeon_bo *bo) if (bo_legacy->pnext) { bo_legacy->pnext->pprev = bo_legacy->pprev; } - assert(bo_legacy->is_pending <= bo->cref); + assert(bo_legacy->is_pending <= boi->cref); while (bo_legacy->is_pending--) { - bo = radeon_bo_unref(bo); - if (!bo) + boi = (struct radeon_bo_int *)radeon_bo_unref((struct radeon_bo *)boi); + if (!boi) break; } - if (bo) + if (boi) bo_legacy->is_pending = 0; boml->cpendings--; return 0; @@ -218,7 +224,7 @@ static int legacy_is_pending(struct radeon_bo *bo) return 1; } -static int legacy_wait_pending(struct radeon_bo *bo) +static int legacy_wait_pending(struct radeon_bo_int *bo) { struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; @@ -323,7 +329,7 @@ static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml, return bo_legacy; } -static int bo_dma_alloc(struct radeon_bo *bo) +static int bo_dma_alloc(struct radeon_bo_int *bo) { struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; @@ -333,7 +339,7 @@ static int bo_dma_alloc(struct radeon_bo *bo) int r; /* align size on 4Kb */ - size = (((4 * 1024) - 1) + bo->size) & ~((4 * 1024) - 1); + size = (((4 * 1024) - 1) + bo_legacy->base.size) & ~((4 * 1024) - 1); alloc.region = RADEON_MEM_REGION_GART; alloc.alignment = bo_legacy->base.alignment; alloc.size = size; @@ -355,7 +361,7 @@ static int bo_dma_alloc(struct radeon_bo *bo) return 0; } -static int bo_dma_free(struct radeon_bo *bo) +static int bo_dma_free(struct radeon_bo_int *bo) { struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; @@ -428,7 +434,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, bo_legacy = boml->bos.next; while (bo_legacy) { if (bo_legacy->base.handle == handle) { - radeon_bo_ref(&(bo_legacy->base)); + radeon_bo_ref((struct radeon_bo *)&(bo_legacy->base)); return (struct radeon_bo*)bo_legacy; } bo_legacy = bo_legacy->next; @@ -468,20 +474,20 @@ retry: return NULL; } } - radeon_bo_ref(&(bo_legacy->base)); + radeon_bo_ref((struct radeon_bo *)&(bo_legacy->base)); return (struct radeon_bo*)bo_legacy; } -static void bo_ref(struct radeon_bo *bo) +static void bo_ref(struct radeon_bo_int *bo) { } -static struct radeon_bo *bo_unref(struct radeon_bo *bo) +static struct radeon_bo *bo_unref(struct radeon_bo_int *boi) { - struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + struct bo_legacy *bo_legacy = (struct bo_legacy*)boi; - if (bo->cref <= 0) { + if (boi->cref <= 0) { bo_legacy->prev->next = bo_legacy->next; if (bo_legacy->next) { bo_legacy->next->prev = bo_legacy->prev; @@ -491,10 +497,10 @@ static struct radeon_bo *bo_unref(struct radeon_bo *bo) } return NULL; } - return bo; + return (struct radeon_bo *)boi; } -static int bo_map(struct radeon_bo *bo, int write) +static int bo_map(struct radeon_bo_int *bo, int write) { struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; @@ -528,7 +534,7 @@ static int bo_map(struct radeon_bo *bo, int write) return 0; } -static int bo_unmap(struct radeon_bo *bo) +static int bo_unmap(struct radeon_bo_int *bo) { struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; @@ -542,7 +548,7 @@ static int bo_unmap(struct radeon_bo *bo) return 0; } -static int bo_is_busy(struct radeon_bo *bo, uint32_t *domain) +static int bo_is_busy(struct radeon_bo_int *bo, uint32_t *domain) { *domain = 0; if (bo->domains & RADEON_GEM_DOMAIN_GTT) @@ -555,7 +561,7 @@ static int bo_is_busy(struct radeon_bo *bo, uint32_t *domain) return 0; } -static int bo_is_static(struct radeon_bo *bo) +static int bo_is_static(struct radeon_bo_int *bo) { struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; return bo_legacy->static_bo; @@ -574,7 +580,7 @@ static struct radeon_bo_funcs bo_legacy_funcs = { bo_is_busy }; -static int bo_vram_validate(struct radeon_bo *bo, +static int bo_vram_validate(struct radeon_bo_int *bo, uint32_t *soffset, uint32_t *eoffset) { @@ -700,25 +706,30 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo, uint32_t *soffset, uint32_t *eoffset) { - struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom; struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; int r; int retries = 0; if (bo_legacy->map_count) { fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n", - bo, bo->size, bo_legacy->map_count); + bo, boi->size, bo_legacy->map_count); + return -EINVAL; + } + if(boi->size == 0) { + fprintf(stderr, "bo(%p) has size 0.\n", bo); return -EINVAL; } if (bo_legacy->static_bo || bo_legacy->validated) { *soffset = bo_legacy->offset; - *eoffset = bo_legacy->offset + bo->size; + *eoffset = bo_legacy->offset + boi->size; return 0; } - if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) { + if (!(boi->domains & RADEON_GEM_DOMAIN_GTT)) { - r = bo_vram_validate(bo, soffset, eoffset); + r = bo_vram_validate(boi, soffset, eoffset); if (r) { legacy_track_pending(&boml->base, 0); legacy_kick_all_buffers(boml); @@ -732,7 +743,7 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo, } } *soffset = bo_legacy->offset; - *eoffset = bo_legacy->offset + bo->size; + *eoffset = bo_legacy->offset + boi->size; bo_legacy->validated = 1; return 0; @@ -740,7 +751,8 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo, void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending) { - struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom; struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; bo_legacy->pending = pending; @@ -795,7 +807,7 @@ static struct bo_legacy *radeon_legacy_bo_alloc_static(struct bo_manager_legacy if (bo->base.handle > bom->nhandle) { bom->nhandle = bo->base.handle + 1; } - radeon_bo_ref(&(bo->base)); + radeon_bo_ref((struct radeon_bo *)&(bo->base)); return bo; } @@ -890,12 +902,13 @@ void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom) unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo) { + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; - if (bo_legacy->static_bo || (bo->domains & RADEON_GEM_DOMAIN_GTT)) { + if (bo_legacy->static_bo || (boi->domains & RADEON_GEM_DOMAIN_GTT)) { return 0; } - return bo->size; + return boi->size; } /* @@ -920,7 +933,7 @@ struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, if (bo->base.handle > boml->nhandle) { boml->nhandle = bo->base.handle + 1; } - radeon_bo_ref(&(bo->base)); - return &(bo->base); + radeon_bo_ref((struct radeon_bo *)&(bo->base)); + return (struct radeon_bo *)&(bo->base); } diff --git a/radeon/radeon_bocs_wrapper.h b/radeon/radeon_bocs_wrapper.h index 4520a7d..6c2648b 100644 --- a/radeon/radeon_bocs_wrapper.h +++ b/radeon/radeon_bocs_wrapper.h @@ -18,8 +18,11 @@ #define RADEON_TILING_MACRO 0x1 #define RADEON_TILING_MICRO 0x2 #define RADEON_TILING_SWAP 0x4 + +#ifndef RADEON_TILING_SURFACE #define RADEON_TILING_SURFACE 0x8 /* this object requires a surface * when mapped - i.e. front buffer */ +#endif /* to be used to build locally in mesa with no libdrm bits */ #include "../radeon/radeon_bo_drm.h" diff --git a/radeon/radeon_buffer_objects.c b/radeon/radeon_buffer_objects.c index 8fac5c6..99d3ec7 100644 --- a/radeon/radeon_buffer_objects.c +++ b/radeon/radeon_buffer_objects.c @@ -136,8 +136,13 @@ radeonBufferSubData(GLcontext * ctx, const GLvoid * data, struct gl_buffer_object *obj) { + radeonContextPtr radeon = RADEON_CONTEXT(ctx); struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + if (radeon_bo_is_referenced_by_cs(radeon_obj->bo, radeon->cmdbuf.cs)) { + radeon_firevertices(radeon); + } + radeon_bo_map(radeon_obj->bo, GL_TRUE); _mesa_memcpy(radeon_obj->bo->ptr + offset, data, size); diff --git a/radeon/radeon_common.c b/radeon/radeon_common.c index 9817ff8..9b64c21 100644 --- a/radeon/radeon_common.c +++ b/radeon/radeon_common.c @@ -229,16 +229,15 @@ void radeonUpdateScissor( GLcontext *ctx ) } if (!rmesa->radeonScreen->kernel_mm) { /* Fix scissors for dri 1 */ - __DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa); x1 += dPriv->x; - x2 += dPriv->x; + x2 += dPriv->x + 1; min_x += dPriv->x; - max_x += dPriv->x; + max_x += dPriv->x + 1; y1 += dPriv->y; - y2 += dPriv->y; + y2 += dPriv->y + 1; min_y += dPriv->y; - max_y += dPriv->y; + max_y += dPriv->y + 1; } rmesa->state.scissor.rect.x1 = CLAMP(x1, min_x, max_x); @@ -263,29 +262,6 @@ void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h) } } -void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask ) -{ - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - GLuint i; - drm_radeon_stipple_t stipple; - - /* Must flip pattern upside down. - */ - for ( i = 0 ; i < 32 ; i++ ) { - stipple.mask[31 - i] = ((GLuint *) mask)[i]; - } - - /* TODO: push this into cmd mechanism - */ - radeon_firevertices(radeon); - LOCK_HARDWARE( radeon ); - - drmCommandWrite( radeon->dri.fd, DRM_RADEON_STIPPLE, - &stipple, sizeof(stipple) ); - UNLOCK_HARDWARE( radeon ); -} - - /* ================================================================ * SwapBuffers with client-side throttling */ @@ -1124,8 +1100,6 @@ void radeonFlush(GLcontext *ctx) if (radeon->dma.flush) radeon->dma.flush( ctx ); - radeonEmitState(radeon); - if (radeon->cmdbuf.cs->cdw) rcommonFlushCmdBuf(radeon, __FUNCTION__); @@ -1148,9 +1122,6 @@ void radeonFlush(GLcontext *ctx) } } } - - make_empty_list(&radeon->query.not_flushed_head); - } /* Make sure all commands have been sent to the hardware and have @@ -1345,5 +1316,5 @@ void rcommonBeginBatch(radeonContextPtr rmesa, int n, void radeonUserClear(GLcontext *ctx, GLuint mask) { - _mesa_meta_clear(ctx, mask); + _mesa_meta_Clear(ctx, mask); } diff --git a/radeon/radeon_common.h b/radeon/radeon_common.h index f320191..0608fe2 100644 --- a/radeon/radeon_common.h +++ b/radeon/radeon_common.h @@ -10,7 +10,6 @@ void radeonRecalcScissorRects(radeonContextPtr radeon); void radeonSetCliprects(radeonContextPtr radeon); void radeonUpdateScissor( GLcontext *ctx ); void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h); -void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask ); void radeonWaitForIdleLocked(radeonContextPtr radeon); extern uint32_t radeonGetAge(radeonContextPtr radeon); @@ -43,7 +42,7 @@ void radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb, struct radeon_bo *bo); struct radeon_renderbuffer * -radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv); +radeon_create_renderbuffer(gl_format format, __DRIdrawablePrivate *driDrawPriv); static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb) { struct radeon_renderbuffer *rrb = (struct radeon_renderbuffer *)rb; diff --git a/radeon/radeon_common_context.c b/radeon/radeon_common_context.c index 330721a..71f70d7 100644 --- a/radeon/radeon_common_context.c +++ b/radeon/radeon_common_context.c @@ -47,7 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */ +#if defined(RADEON_R600) #include "r600_context.h" #endif @@ -262,10 +262,9 @@ GLboolean radeonInitContext(radeonContextPtr radeon, else radeon->texture_row_align = 32; radeon->texture_rect_row_align = 64; - radeon->texture_compressed_row_align = 64; + radeon->texture_compressed_row_align = 32; } - make_empty_list(&radeon->query.not_flushed_head); radeon_init_dma(radeon); return GL_TRUE; @@ -496,19 +495,7 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, static unsigned radeon_bits_per_pixel(const struct radeon_renderbuffer *rb) { - switch (rb->base._ActualFormat) { - case GL_RGB5: - case GL_DEPTH_COMPONENT16: - return 16; - case GL_RGB8: - case GL_RGBA8: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH24_STENCIL8_EXT: - case GL_STENCIL_INDEX8_EXT: - return 32; - default: - return 0; - } + return _mesa_get_format_bytes(rb->base.Format) * 8; } void diff --git a/radeon/radeon_common_context.h b/radeon/radeon_common_context.h index 0309345..6298748 100644 --- a/radeon/radeon_common_context.h +++ b/radeon/radeon_common_context.h @@ -208,6 +208,10 @@ struct radeon_tex_obj { * and so on. */ GLboolean validated; + /* Minimum LOD to be used during rendering */ + unsigned minLod; + /* Miximum LOD to be used during rendering */ + unsigned maxLod; GLuint override_offset; GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ @@ -401,9 +405,6 @@ struct radeon_state { struct radeon_depthbuffer_state depth; struct radeon_scissor_state scissor; struct radeon_stencilbuffer_state stencil; - - struct radeon_cs_space_check bos[RADEON_MAX_BOS]; - int validated_bo_count; }; /** @@ -502,7 +503,6 @@ struct radeon_context { struct { struct radeon_query_object *current; - struct radeon_query_object not_flushed_head; struct radeon_state_atom queryobj; } query; diff --git a/radeon/radeon_context.c b/radeon/radeon_context.c index 8f4485a..5e700be 100644 --- a/radeon/radeon_context.c +++ b/radeon/radeon_context.c @@ -69,7 +69,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color #define need_GL_EXT_framebuffer_object -#include "extension_helper.h" +#include "main/remap_helper.h" #define DRIVER_DATE "20061018" @@ -79,7 +79,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /* Extension strings exported by the R100 driver. */ -const struct dri_extension card_extensions[] = +static const struct dri_extension card_extensions[] = { { "GL_ARB_multitexture", NULL }, { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, @@ -109,7 +109,7 @@ const struct dri_extension card_extensions[] = { NULL, NULL } }; -const struct dri_extension mm_extensions[] = { +static const struct dri_extension mm_extensions[] = { { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, { NULL, NULL } }; diff --git a/radeon/radeon_context.h b/radeon/radeon_context.h index 4e2c52c..12ab33a 100644 --- a/radeon/radeon_context.h +++ b/radeon/radeon_context.h @@ -331,8 +331,12 @@ struct r100_hw_state { struct radeon_state_atom stp; }; +struct radeon_stipple_state { + GLuint mask[32]; +}; struct r100_state { + struct radeon_stipple_state stipple; struct radeon_texture_state texture; }; diff --git a/radeon/radeon_cs.c b/radeon/radeon_cs.c new file mode 100644 index 0000000..17e7433 --- /dev/null +++ b/radeon/radeon_cs.c @@ -0,0 +1,95 @@ + +#include <stdio.h> +#include <stdint.h> +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_bocs_wrapper.h" +#include "radeon_cs_int_drm.h" + +struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm, + uint32_t ndw) +{ + struct radeon_cs_int *csi = csm->funcs->cs_create(csm, ndw); + return (struct radeon_cs *)csi; +} + +int radeon_cs_write_reloc(struct radeon_cs *cs, + struct radeon_bo *bo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t flags) +{ + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + + return csi->csm->funcs->cs_write_reloc(csi, + bo, + read_domain, + write_domain, + flags); +} + +int radeon_cs_begin(struct radeon_cs *cs, + uint32_t ndw, + const char *file, + const char *func, + int line) +{ + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + return csi->csm->funcs->cs_begin(csi, ndw, file, func, line); +} + +int radeon_cs_end(struct radeon_cs *cs, + const char *file, + const char *func, + int line) +{ + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + return csi->csm->funcs->cs_end(csi, file, func, line); +} + +int radeon_cs_emit(struct radeon_cs *cs) +{ + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + return csi->csm->funcs->cs_emit(csi); +} + +int radeon_cs_destroy(struct radeon_cs *cs) +{ + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + return csi->csm->funcs->cs_destroy(csi); +} + +int radeon_cs_erase(struct radeon_cs *cs) +{ + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + return csi->csm->funcs->cs_erase(csi); +} + +int radeon_cs_need_flush(struct radeon_cs *cs) +{ + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + return csi->csm->funcs->cs_need_flush(csi); +} + +void radeon_cs_print(struct radeon_cs *cs, FILE *file) +{ + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + csi->csm->funcs->cs_print(csi, file); +} + +void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit) +{ + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + if (domain == RADEON_GEM_DOMAIN_VRAM) + csi->csm->vram_limit = limit; + else + csi->csm->gart_limit = limit; +} + +void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data) +{ + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + csi->space_flush_fn = fn; + csi->space_flush_data = data; +} + diff --git a/radeon/radeon_cs_drm.h b/radeon/radeon_cs_drm.h index ab4eca3..a3f1750 100644 --- a/radeon/radeon_cs_drm.h +++ b/radeon/radeon_cs_drm.h @@ -36,6 +36,7 @@ #include <string.h> #include "drm.h" #include "radeon_drm.h" +#include "radeon_bo_drm.h" struct radeon_cs_reloc { struct radeon_bo *bo; @@ -49,173 +50,41 @@ struct radeon_cs_reloc { #define RADEON_CS_SPACE_OP_TO_BIG 1 #define RADEON_CS_SPACE_FLUSH 2 -struct radeon_cs_space_check { - struct radeon_bo *bo; - uint32_t read_domains; - uint32_t write_domain; - uint32_t new_accounted; -}; - -#define MAX_SPACE_BOS (32) - -struct radeon_cs_manager; - struct radeon_cs { - struct radeon_cs_manager *csm; - void *relocs; - uint32_t *packets; - unsigned crelocs; - unsigned relocs_total_size; - unsigned cdw; - unsigned ndw; - int section; + uint32_t *packets; + unsigned cdw; + unsigned ndw; unsigned section_ndw; unsigned section_cdw; - const char *section_file; - const char *section_func; - int section_line; - struct radeon_cs_space_check bos[MAX_SPACE_BOS]; - int bo_count; - void (*space_flush_fn)(void *); - void *space_flush_data; -}; - -/* cs functions */ -struct radeon_cs_funcs { - struct radeon_cs *(*cs_create)(struct radeon_cs_manager *csm, - uint32_t ndw); - int (*cs_write_reloc)(struct radeon_cs *cs, - struct radeon_bo *bo, - uint32_t read_domain, - uint32_t write_domain, - uint32_t flags); - int (*cs_begin)(struct radeon_cs *cs, - uint32_t ndw, - const char *file, - const char *func, - int line); - int (*cs_end)(struct radeon_cs *cs, - const char *file, - const char *func, - int line); - int (*cs_emit)(struct radeon_cs *cs); - int (*cs_destroy)(struct radeon_cs *cs); - int (*cs_erase)(struct radeon_cs *cs); - int (*cs_need_flush)(struct radeon_cs *cs); - void (*cs_print)(struct radeon_cs *cs, FILE *file); -}; - -struct radeon_cs_manager { - struct radeon_cs_funcs *funcs; - int fd; - int32_t vram_limit, gart_limit; - int32_t vram_write_used, gart_write_used; - int32_t read_used; }; -static inline struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm, - uint32_t ndw) -{ - return csm->funcs->cs_create(csm, ndw); -} - -static inline int radeon_cs_write_reloc(struct radeon_cs *cs, - struct radeon_bo *bo, - uint32_t read_domain, - uint32_t write_domain, - uint32_t flags) -{ - return cs->csm->funcs->cs_write_reloc(cs, - bo, - read_domain, - write_domain, - flags); -} - -static inline int radeon_cs_begin(struct radeon_cs *cs, - uint32_t ndw, - const char *file, - const char *func, - int line) -{ - return cs->csm->funcs->cs_begin(cs, ndw, file, func, line); -} - -static inline int radeon_cs_end(struct radeon_cs *cs, - const char *file, - const char *func, - int line) -{ - return cs->csm->funcs->cs_end(cs, file, func, line); -} - -static inline int radeon_cs_emit(struct radeon_cs *cs) -{ - return cs->csm->funcs->cs_emit(cs); -} - -static inline int radeon_cs_destroy(struct radeon_cs *cs) -{ - return cs->csm->funcs->cs_destroy(cs); -} - -static inline int radeon_cs_erase(struct radeon_cs *cs) -{ - return cs->csm->funcs->cs_erase(cs); -} - -static inline int radeon_cs_need_flush(struct radeon_cs *cs) -{ - return cs->csm->funcs->cs_need_flush(cs); -} - -static inline void radeon_cs_print(struct radeon_cs *cs, FILE *file) -{ - cs->csm->funcs->cs_print(cs, file); -} - -static inline void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit) -{ - - if (domain == RADEON_GEM_DOMAIN_VRAM) - cs->csm->vram_limit = limit; - else - cs->csm->gart_limit = limit; -} - -static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword) -{ - cs->packets[cs->cdw++] = dword; - if (cs->section) { - cs->section_cdw++; - } -} - -static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword) -{ - - memcpy(cs->packets + cs->cdw, &qword, sizeof(qword)); - cs->cdw+=2; - if (cs->section) { - cs->section_cdw+=2; - } -} - -static inline void radeon_cs_write_table(struct radeon_cs *cs, void *data, uint32_t size) -{ - memcpy(cs->packets + cs->cdw, data, size * 4); - cs->cdw += size; - if (cs->section) { - cs->section_cdw += size; - } -} +#define MAX_SPACE_BOS (32) -static inline void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data) -{ - cs->space_flush_fn = fn; - cs->space_flush_data = data; -} +struct radeon_cs_manager; +extern struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm, + uint32_t ndw); + +extern int radeon_cs_begin(struct radeon_cs *cs, + uint32_t ndw, + const char *file, + const char *func, int line); +extern int radeon_cs_end(struct radeon_cs *cs, + const char *file, + const char *func, + int line); +extern int radeon_cs_emit(struct radeon_cs *cs); +extern int radeon_cs_destroy(struct radeon_cs *cs); +extern int radeon_cs_erase(struct radeon_cs *cs); +extern int radeon_cs_need_flush(struct radeon_cs *cs); +extern void radeon_cs_print(struct radeon_cs *cs, FILE *file); +extern void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit); +extern void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data); +extern int radeon_cs_write_reloc(struct radeon_cs *cs, + struct radeon_bo *bo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t flags); /* * add a persistent BO to the list @@ -243,4 +112,30 @@ int radeon_cs_space_check_with_bo(struct radeon_cs *cs, uint32_t read_domains, uint32_t write_domain); +static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword) +{ + cs->packets[cs->cdw++] = dword; + if (cs->section_ndw) { + cs->section_cdw++; + } +} + +static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword) +{ + memcpy(cs->packets + cs->cdw, &qword, sizeof(uint64_t)); + cs->cdw += 2; + if (cs->section_ndw) { + cs->section_cdw += 2; + } +} + +static inline void radeon_cs_write_table(struct radeon_cs *cs, + void *data, uint32_t size) +{ + memcpy(cs->packets + cs->cdw, data, size * 4); + cs->cdw += size; + if (cs->section_ndw) { + cs->section_cdw += size; + } +} #endif diff --git a/radeon/radeon_cs_int_drm.h b/radeon/radeon_cs_int_drm.h new file mode 100644 index 0000000..8ba76bf --- /dev/null +++ b/radeon/radeon_cs_int_drm.h @@ -0,0 +1,66 @@ + +#ifndef _RADEON_CS_INT_H_ +#define _RADEON_CS_INT_H_ + +struct radeon_cs_space_check { + struct radeon_bo_int *bo; + uint32_t read_domains; + uint32_t write_domain; + uint32_t new_accounted; +}; + +struct radeon_cs_int { + /* keep first two in same place */ + uint32_t *packets; + unsigned cdw; + unsigned ndw; + unsigned section_ndw; + unsigned section_cdw; + /* private members */ + struct radeon_cs_manager *csm; + void *relocs; + unsigned crelocs; + unsigned relocs_total_size; + const char *section_file; + const char *section_func; + int section_line; + struct radeon_cs_space_check bos[MAX_SPACE_BOS]; + int bo_count; + void (*space_flush_fn)(void *); + void *space_flush_data; +}; + +/* cs functions */ +struct radeon_cs_funcs { + struct radeon_cs_int *(*cs_create)(struct radeon_cs_manager *csm, + uint32_t ndw); + int (*cs_write_reloc)(struct radeon_cs_int *cs, + struct radeon_bo *bo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t flags); + int (*cs_begin)(struct radeon_cs_int *cs, + uint32_t ndw, + const char *file, + const char *func, + int line); + int (*cs_end)(struct radeon_cs_int *cs, + const char *file, const char *func, + int line); + + + int (*cs_emit)(struct radeon_cs_int *cs); + int (*cs_destroy)(struct radeon_cs_int *cs); + int (*cs_erase)(struct radeon_cs_int *cs); + int (*cs_need_flush)(struct radeon_cs_int *cs); + void (*cs_print)(struct radeon_cs_int *cs, FILE *file); +}; + +struct radeon_cs_manager { + struct radeon_cs_funcs *funcs; + int fd; + int32_t vram_limit, gart_limit; + int32_t vram_write_used, gart_write_used; + int32_t read_used; +}; +#endif diff --git a/radeon/radeon_cs_legacy.c b/radeon/radeon_cs_legacy.c index f1addb2..45b608a 100644 --- a/radeon/radeon_cs_legacy.c +++ b/radeon/radeon_cs_legacy.c @@ -30,10 +30,18 @@ * Jérôme Glisse <glisse@freedesktop.org> */ #include <errno.h> +#include <unistd.h> +#include <stdint.h> +#include "drm.h" +#include "radeon_drm.h" #include "radeon_bocs_wrapper.h" #include "radeon_common.h" - +#ifdef HAVE_LIBDRM_RADEON +#include "radeon_cs_int.h" +#else +#include "radeon_cs_int_drm.h" +#endif struct cs_manager_legacy { struct radeon_cs_manager base; struct radeon_context *ctx; @@ -51,27 +59,27 @@ struct cs_reloc_legacy { }; -static struct radeon_cs *cs_create(struct radeon_cs_manager *csm, - uint32_t ndw) +static struct radeon_cs_int *cs_create(struct radeon_cs_manager *csm, + uint32_t ndw) { - struct radeon_cs *cs; + struct radeon_cs_int *csi; - cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs)); - if (cs == NULL) { + csi = (struct radeon_cs_int*)calloc(1, sizeof(struct radeon_cs_int)); + if (csi == NULL) { return NULL; } - cs->csm = csm; - cs->ndw = (ndw + 0x3FF) & (~0x3FF); - cs->packets = (uint32_t*)malloc(4*cs->ndw); - if (cs->packets == NULL) { - free(cs); + csi->csm = csm; + csi->ndw = (ndw + 0x3FF) & (~0x3FF); + csi->packets = (uint32_t*)malloc(4*csi->ndw); + if (csi->packets == NULL) { + free(csi); return NULL; } - cs->relocs_total_size = 0; - return cs; + csi->relocs_total_size = 0; + return csi; } -static int cs_write_reloc(struct radeon_cs *cs, +static int cs_write_reloc(struct radeon_cs_int *cs, struct radeon_bo *bo, uint32_t read_domain, uint32_t write_domain, @@ -150,20 +158,19 @@ static int cs_write_reloc(struct radeon_cs *cs, return 0; } -static int cs_begin(struct radeon_cs *cs, +static int cs_begin(struct radeon_cs_int *cs, uint32_t ndw, const char *file, const char *func, int line) { - if (cs->section) { + if (cs->section_ndw) { fprintf(stderr, "CS already in a section(%s,%s,%d)\n", cs->section_file, cs->section_func, cs->section_line); fprintf(stderr, "CS can't start section(%s,%s,%d)\n", file, func, line); return -EPIPE; } - cs->section = 1; cs->section_ndw = ndw; cs->section_cdw = 0; cs->section_file = file; @@ -187,18 +194,17 @@ static int cs_begin(struct radeon_cs *cs, return 0; } -static int cs_end(struct radeon_cs *cs, +static int cs_end(struct radeon_cs_int *cs, const char *file, const char *func, int line) { - if (!cs->section) { + if (!cs->section_ndw) { fprintf(stderr, "CS no section to end at (%s,%s,%d)\n", file, func, line); return -EPIPE; } - cs->section = 0; if (cs->section_ndw != cs->section_cdw) { fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n", cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw); @@ -206,10 +212,12 @@ static int cs_end(struct radeon_cs *cs, file, func, line); return -EPIPE; } + cs->section_ndw = 0; + return 0; } -static int cs_process_relocs(struct radeon_cs *cs) +static int cs_process_relocs(struct radeon_cs_int *cs) { struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; struct cs_reloc_legacy *relocs; @@ -254,7 +262,7 @@ restart: return 0; } -static int cs_set_age(struct radeon_cs *cs) +static int cs_set_age(struct radeon_cs_int *cs) { struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; struct cs_reloc_legacy *relocs; @@ -268,7 +276,7 @@ static int cs_set_age(struct radeon_cs *cs) return 0; } -static int cs_emit(struct radeon_cs *cs) +static int cs_emit(struct radeon_cs_int *cs) { struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; drm_radeon_cmd_buffer_t cmd; @@ -276,7 +284,7 @@ static int cs_emit(struct radeon_cs *cs) uint64_t ull; int r; - csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); + csm->ctx->vtbl.emit_cs_header((struct radeon_cs *)cs, csm->ctx); /* append buffer age */ if ( IS_R300_CLASS(csm->ctx->radeonScreen) ) @@ -289,9 +297,9 @@ static int cs_emit(struct radeon_cs *cs) age.scratch.reg = 2; age.scratch.n_bufs = 1; age.scratch.flags = 0; - radeon_cs_write_dword(cs, age.u); - radeon_cs_write_qword(cs, ull); - radeon_cs_write_dword(cs, 0); + radeon_cs_write_dword((struct radeon_cs *)cs, age.u); + radeon_cs_write_qword((struct radeon_cs *)cs, ull); + radeon_cs_write_dword((struct radeon_cs *)cs, 0); } r = cs_process_relocs(cs); @@ -342,7 +350,7 @@ static void inline cs_free_reloc(void *relocs_p, int crelocs) free(relocs[i].indices); } -static int cs_destroy(struct radeon_cs *cs) +static int cs_destroy(struct radeon_cs_int *cs) { cs_free_reloc(cs->relocs, cs->crelocs); free(cs->relocs); @@ -351,7 +359,7 @@ static int cs_destroy(struct radeon_cs *cs) return 0; } -static int cs_erase(struct radeon_cs *cs) +static int cs_erase(struct radeon_cs_int *cs) { cs_free_reloc(cs->relocs, cs->crelocs); free(cs->relocs); @@ -359,18 +367,18 @@ static int cs_erase(struct radeon_cs *cs) cs->relocs = NULL; cs->crelocs = 0; cs->cdw = 0; - cs->section = 0; + cs->section_ndw = 0; return 0; } -static int cs_need_flush(struct radeon_cs *cs) +static int cs_need_flush(struct radeon_cs_int *cs) { /* this function used to flush when the BO usage got to * a certain size, now the higher levels handle this better */ return 0; } -static void cs_print(struct radeon_cs *cs, FILE *file) +static void cs_print(struct radeon_cs_int *cs, FILE *file) { } diff --git a/radeon/radeon_cs_space_drm.c b/radeon/radeon_cs_space_drm.c index 89cbbb5..e22b437 100644 --- a/radeon/radeon_cs_space_drm.c +++ b/radeon/radeon_cs_space_drm.c @@ -29,6 +29,8 @@ #include <errno.h> #include <stdlib.h> #include "radeon_bocs_wrapper.h" +#include "radeon_bo_int_drm.h" +#include "radeon_cs_int_drm.h" struct rad_sizes { int32_t op_read; @@ -39,7 +41,7 @@ struct rad_sizes { static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct rad_sizes *sizes) { uint32_t read_domains, write_domain; - struct radeon_bo *bo; + struct radeon_bo_int *bo; bo = sc->bo; sc->new_accounted = 0; @@ -47,7 +49,7 @@ static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct ra write_domain = sc->write_domain; /* legacy needs a static check */ - if (radeon_bo_is_static(bo)) { + if (radeon_bo_is_static((struct radeon_bo *)sc->bo)) { bo->space_accounted = sc->new_accounted = (read_domains << 16) | write_domain; return 0; } @@ -100,11 +102,11 @@ static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct ra return 0; } -static int radeon_cs_do_space_check(struct radeon_cs *cs, struct radeon_cs_space_check *new_tmp) +static int radeon_cs_do_space_check(struct radeon_cs_int *cs, struct radeon_cs_space_check *new_tmp) { struct radeon_cs_manager *csm = cs->csm; int i; - struct radeon_bo *bo; + struct radeon_bo_int *bo; struct rad_sizes sizes; int ret; @@ -158,25 +160,28 @@ static int radeon_cs_do_space_check(struct radeon_cs *cs, struct radeon_cs_space void radeon_cs_space_add_persistent_bo(struct radeon_cs *cs, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain) { + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; int i; - for (i = 0; i < cs->bo_count; i++) { - if (cs->bos[i].bo == bo && - cs->bos[i].read_domains == read_domains && - cs->bos[i].write_domain == write_domain) + for (i = 0; i < csi->bo_count; i++) { + if (csi->bos[i].bo == boi && + csi->bos[i].read_domains == read_domains && + csi->bos[i].write_domain == write_domain) return; } radeon_bo_ref(bo); - i = cs->bo_count; - cs->bos[i].bo = bo; - cs->bos[i].read_domains = read_domains; - cs->bos[i].write_domain = write_domain; - cs->bos[i].new_accounted = 0; - cs->bo_count++; - - assert(cs->bo_count < MAX_SPACE_BOS); + i = csi->bo_count; + csi->bos[i].bo = boi; + csi->bos[i].read_domains = read_domains; + csi->bos[i].write_domain = write_domain; + csi->bos[i].new_accounted = 0; + csi->bo_count++; + + assert(csi->bo_count < MAX_SPACE_BOS); } -static int radeon_cs_check_space_internal(struct radeon_cs *cs, struct radeon_cs_space_check *tmp_bo) +static int radeon_cs_check_space_internal(struct radeon_cs_int *cs, + struct radeon_cs_space_check *tmp_bo) { int ret; int flushed = 0; @@ -198,37 +203,42 @@ again: int radeon_cs_space_check_with_bo(struct radeon_cs *cs, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain) -{ +{ + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + struct radeon_bo_int *boi = (struct radeon_bo_int *)bo; struct radeon_cs_space_check temp_bo; + int ret = 0; if (bo) { - temp_bo.bo = bo; + temp_bo.bo = boi; temp_bo.read_domains = read_domains; temp_bo.write_domain = write_domain; temp_bo.new_accounted = 0; } - ret = radeon_cs_check_space_internal(cs, bo ? &temp_bo : NULL); + ret = radeon_cs_check_space_internal(csi, bo ? &temp_bo : NULL); return ret; } int radeon_cs_space_check(struct radeon_cs *cs) { - return radeon_cs_check_space_internal(cs, NULL); + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; + return radeon_cs_check_space_internal(csi, NULL); } void radeon_cs_space_reset_bos(struct radeon_cs *cs) { + struct radeon_cs_int *csi = (struct radeon_cs_int *)cs; int i; - for (i = 0; i < cs->bo_count; i++) { - radeon_bo_unref(cs->bos[i].bo); - cs->bos[i].bo = NULL; - cs->bos[i].read_domains = 0; - cs->bos[i].write_domain = 0; - cs->bos[i].new_accounted = 0; + for (i = 0; i < csi->bo_count; i++) { + radeon_bo_unref((struct radeon_bo *)csi->bos[i].bo); + csi->bos[i].bo = NULL; + csi->bos[i].read_domains = 0; + csi->bos[i].write_domain = 0; + csi->bos[i].new_accounted = 0; } - cs->bo_count = 0; + csi->bo_count = 0; } diff --git a/radeon/radeon_dma.c b/radeon/radeon_dma.c index c6edbae..232972d 100644 --- a/radeon/radeon_dma.c +++ b/radeon/radeon_dma.c @@ -306,10 +306,6 @@ static int radeon_bo_is_idle(struct radeon_bo* bo) WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n" "This may cause small performance drop for you.\n"); } - /* Protect against bug in legacy bo handling that causes bos stay - * referenced even after they should be freed */ - if (bo->cref != 1) - return 0; return ret != -EBUSY; } @@ -346,9 +342,7 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa) foreach_s(dma_bo, temp, &rmesa->dma.wait) { if (dma_bo->expire_counter == time) { WARN_ONCE("Leaking dma buffer object!\n"); - /* force free of buffer so we don't realy start - * leaking stuff now*/ - while ((dma_bo->bo = radeon_bo_unref(dma_bo->bo))) {} + radeon_bo_unref(dma_bo->bo); remove_from_list(dma_bo); FREE(dma_bo); continue; diff --git a/radeon/radeon_fbo.c b/radeon/radeon_fbo.c index d83b166..fc21069 100644 --- a/radeon/radeon_fbo.c +++ b/radeon/radeon_fbo.c @@ -33,7 +33,6 @@ #include "main/framebuffer.h" #include "main/renderbuffer.h" #include "main/context.h" -#include "main/texformat.h" #include "main/texrender.h" #include "drivers/common/meta.h" @@ -91,11 +90,8 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_R3_G3_B2: case GL_RGB4: case GL_RGB5: - rb->_ActualFormat = GL_RGB5; + rb->Format = _dri_texformat_rgb565; rb->DataType = GL_UNSIGNED_BYTE; - rb->RedBits = 5; - rb->GreenBits = 6; - rb->BlueBits = 5; cpp = 2; break; case GL_RGB: @@ -103,12 +99,8 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_RGB10: case GL_RGB12: case GL_RGB16: - rb->_ActualFormat = GL_RGB8; + rb->Format = _dri_texformat_argb8888; rb->DataType = GL_UNSIGNED_BYTE; - rb->RedBits = 8; - rb->GreenBits = 8; - rb->BlueBits = 8; - rb->AlphaBits = 0; cpp = 4; break; case GL_RGBA: @@ -119,12 +111,8 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_RGB10_A2: case GL_RGBA12: case GL_RGBA16: - rb->_ActualFormat = GL_RGBA8; + rb->Format = _dri_texformat_argb8888; rb->DataType = GL_UNSIGNED_BYTE; - rb->RedBits = 8; - rb->GreenBits = 8; - rb->BlueBits = 8; - rb->AlphaBits = 8; cpp = 4; break; case GL_STENCIL_INDEX: @@ -133,39 +121,36 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_STENCIL_INDEX8_EXT: case GL_STENCIL_INDEX16_EXT: /* alloc a depth+stencil buffer */ - rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rb->Format = MESA_FORMAT_S8_Z24; rb->DataType = GL_UNSIGNED_INT_24_8_EXT; - rb->StencilBits = 8; cpp = 4; break; case GL_DEPTH_COMPONENT16: - rb->_ActualFormat = GL_DEPTH_COMPONENT16; + rb->Format = MESA_FORMAT_Z16; rb->DataType = GL_UNSIGNED_SHORT; - rb->DepthBits = 16; cpp = 2; break; case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT24: case GL_DEPTH_COMPONENT32: - rb->_ActualFormat = GL_DEPTH_COMPONENT24; + rb->Format = MESA_FORMAT_X8_Z24; rb->DataType = GL_UNSIGNED_INT; - rb->DepthBits = 24; cpp = 4; break; case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: - rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rb->Format = MESA_FORMAT_S8_Z24; rb->DataType = GL_UNSIGNED_INT_24_8_EXT; - rb->DepthBits = 24; - rb->StencilBits = 8; cpp = 4; break; default: _mesa_problem(ctx, - "Unexpected format in intel_alloc_renderbuffer_storage"); + "Unexpected format in radeon_alloc_renderbuffer_storage"); return GL_FALSE; } + rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat); + if (ctx->Driver.Flush) ctx->Driver.Flush(ctx); /* +r6/r7 */ @@ -213,7 +198,7 @@ radeon_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb, ASSERT(rb->Name == 0); rb->Width = width; rb->Height = height; - rb->_ActualFormat = internalFormat; + rb->InternalFormat = internalFormat; return GL_TRUE; } @@ -255,8 +240,13 @@ radeon_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, return GL_FALSE; } + +/** + * Create a renderbuffer for a window's color, depth and/or stencil buffer. + * Not used for user-created renderbuffers. + */ struct radeon_renderbuffer * -radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv) +radeon_create_renderbuffer(gl_format format, __DRIdrawablePrivate *driDrawPriv) { struct radeon_renderbuffer *rrb; @@ -267,67 +257,64 @@ radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv) _mesa_init_renderbuffer(&rrb->base, 0); rrb->base.ClassID = RADEON_RB_CLASS; - /* XXX format junk */ + rrb->base.Format = format; + switch (format) { - case GL_RGB5: - rrb->base._ActualFormat = GL_RGB5; - rrb->base._BaseFormat = GL_RGBA; - rrb->base.RedBits = 5; - rrb->base.GreenBits = 6; - rrb->base.BlueBits = 5; + case MESA_FORMAT_RGB565: + assert(_mesa_little_endian()); + rrb->base.DataType = GL_UNSIGNED_BYTE; + rrb->base._BaseFormat = GL_RGB; + break; + case MESA_FORMAT_RGB565_REV: + assert(!_mesa_little_endian()); + rrb->base.DataType = GL_UNSIGNED_BYTE; + rrb->base._BaseFormat = GL_RGB; + break; + case MESA_FORMAT_XRGB8888: + assert(_mesa_little_endian()); rrb->base.DataType = GL_UNSIGNED_BYTE; + rrb->base._BaseFormat = GL_RGB; break; - case GL_RGB8: - rrb->base._ActualFormat = GL_RGB8; - rrb->base._BaseFormat = GL_RGB; - rrb->base.RedBits = 8; - rrb->base.GreenBits = 8; - rrb->base.BlueBits = 8; - rrb->base.AlphaBits = 0; + case MESA_FORMAT_XRGB8888_REV: + assert(!_mesa_little_endian()); rrb->base.DataType = GL_UNSIGNED_BYTE; + rrb->base._BaseFormat = GL_RGB; break; - case GL_RGBA8: - rrb->base._ActualFormat = GL_RGBA8; - rrb->base._BaseFormat = GL_RGBA; - rrb->base.RedBits = 8; - rrb->base.GreenBits = 8; - rrb->base.BlueBits = 8; - rrb->base.AlphaBits = 8; + case MESA_FORMAT_ARGB8888: + assert(_mesa_little_endian()); rrb->base.DataType = GL_UNSIGNED_BYTE; + rrb->base._BaseFormat = GL_RGBA; break; - case GL_STENCIL_INDEX8_EXT: - rrb->base._ActualFormat = GL_STENCIL_INDEX8_EXT; - rrb->base._BaseFormat = GL_STENCIL_INDEX; - rrb->base.StencilBits = 8; + case MESA_FORMAT_ARGB8888_REV: + assert(!_mesa_little_endian()); rrb->base.DataType = GL_UNSIGNED_BYTE; + rrb->base._BaseFormat = GL_RGBA; break; - case GL_DEPTH_COMPONENT16: - rrb->base._ActualFormat = GL_DEPTH_COMPONENT16; - rrb->base._BaseFormat = GL_DEPTH_COMPONENT; - rrb->base.DepthBits = 16; + case MESA_FORMAT_S8: + rrb->base.DataType = GL_UNSIGNED_BYTE; + rrb->base._BaseFormat = GL_STENCIL_INDEX; + break; + case MESA_FORMAT_Z16: rrb->base.DataType = GL_UNSIGNED_SHORT; + rrb->base._BaseFormat = GL_DEPTH_COMPONENT; break; - case GL_DEPTH_COMPONENT24: - rrb->base._ActualFormat = GL_DEPTH_COMPONENT24; - rrb->base._BaseFormat = GL_DEPTH_COMPONENT; - rrb->base.DepthBits = 24; + case MESA_FORMAT_X8_Z24: rrb->base.DataType = GL_UNSIGNED_INT; + rrb->base._BaseFormat = GL_DEPTH_COMPONENT; break; - case GL_DEPTH24_STENCIL8_EXT: - rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; - rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT; - rrb->base.DepthBits = 24; - rrb->base.StencilBits = 8; + case MESA_FORMAT_S8_Z24: rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT; + rrb->base._BaseFormat = GL_DEPTH_STENCIL; break; default: - fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format); + fprintf(stderr, "%s: Unknown format %s\n", + __FUNCTION__, _mesa_get_format_name(format)); _mesa_delete_renderbuffer(&rrb->base); return NULL; } rrb->dPriv = driDrawPriv; - rrb->base.InternalFormat = format; + rrb->base.InternalFormat = _mesa_get_format_base_format(format); rrb->base.Delete = radeon_delete_renderbuffer; rrb->base.AllocStorage = radeon_alloc_window_storage; @@ -382,51 +369,41 @@ radeon_framebuffer_renderbuffer(GLcontext * ctx, } +/* TODO: According to EXT_fbo spec internal format of texture image + * once set during glTexImage call, should be preserved when + * attaching image to renderbuffer. When HW doesn't support + * rendering to format of attached image, set framebuffer + * completeness accordingly in radeon_validate_framebuffer (issue #79). + */ static GLboolean radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb, struct gl_texture_image *texImage) { int retry = 0; + gl_format texFormat; + restart: - if (texImage->TexFormat == &_mesa_texformat_argb8888) { - rrb->cpp = 4; - rrb->base._ActualFormat = GL_RGBA8; - rrb->base._BaseFormat = GL_RGBA; + if (texImage->TexFormat == _dri_texformat_argb8888) { rrb->base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGBA8 texture OK\n"); } - else if (texImage->TexFormat == &_mesa_texformat_rgb565) { - rrb->cpp = 2; - rrb->base._ActualFormat = GL_RGB5; - rrb->base._BaseFormat = GL_RGB; + else if (texImage->TexFormat == _dri_texformat_rgb565) { rrb->base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGB5 texture OK\n"); } - else if (texImage->TexFormat == &_mesa_texformat_argb1555) { - rrb->cpp = 2; - rrb->base._ActualFormat = GL_RGB5_A1; - rrb->base._BaseFormat = GL_RGBA; + else if (texImage->TexFormat == _dri_texformat_argb1555) { rrb->base.DataType = GL_UNSIGNED_BYTE; DBG("Render to ARGB1555 texture OK\n"); } - else if (texImage->TexFormat == &_mesa_texformat_argb4444) { - rrb->cpp = 2; - rrb->base._ActualFormat = GL_RGBA4; - rrb->base._BaseFormat = GL_RGBA; + else if (texImage->TexFormat == _dri_texformat_argb4444) { rrb->base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to ARGB1555 texture OK\n"); + DBG("Render to ARGB4444 texture OK\n"); } - else if (texImage->TexFormat == &_mesa_texformat_z16) { - rrb->cpp = 2; - rrb->base._ActualFormat = GL_DEPTH_COMPONENT16; - rrb->base._BaseFormat = GL_DEPTH_COMPONENT; + else if (texImage->TexFormat == MESA_FORMAT_Z16) { rrb->base.DataType = GL_UNSIGNED_SHORT; DBG("Render to DEPTH16 texture OK\n"); } - else if (texImage->TexFormat == &_mesa_texformat_s8_z24) { - rrb->cpp = 4; - rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; - rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT; + else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) { rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT; DBG("Render to DEPTH_STENCIL texture OK\n"); } @@ -434,27 +411,31 @@ restart: /* try redoing the FBO */ if (retry == 1) { DBG("Render to texture BAD FORMAT %d\n", - texImage->TexFormat->MesaFormat); + texImage->TexFormat); return GL_FALSE; } + /* XXX why is the tex format being set here? + * I think this can be removed. + */ texImage->TexFormat = radeonChooseTextureFormat(ctx, texImage->InternalFormat, 0, - texImage->TexFormat->DataType, + _mesa_get_format_datatype(texImage->TexFormat), 1); retry++; goto restart; } + texFormat = texImage->TexFormat; + + rrb->base.Format = texFormat; + + rrb->cpp = _mesa_get_format_bytes(texFormat); rrb->pitch = texImage->Width * rrb->cpp; - rrb->base.InternalFormat = rrb->base._ActualFormat; + rrb->base.InternalFormat = texImage->InternalFormat; + rrb->base._BaseFormat = _mesa_base_fbo_format(ctx, rrb->base.InternalFormat); + rrb->base.Width = texImage->Width; rrb->base.Height = texImage->Height; - rrb->base.RedBits = texImage->TexFormat->RedBits; - rrb->base.GreenBits = texImage->TexFormat->GreenBits; - rrb->base.BlueBits = texImage->TexFormat->BlueBits; - rrb->base.AlphaBits = texImage->TexFormat->AlphaBits; - rrb->base.DepthBits = texImage->TexFormat->DepthBits; - rrb->base.StencilBits = texImage->TexFormat->StencilBits; rrb->base.Delete = radeon_delete_renderbuffer; rrb->base.AllocStorage = radeon_nop_alloc_storage; @@ -555,8 +536,10 @@ radeon_render_texture(GLcontext * ctx, imageOffset += offsets[att->Zoffset]; } - /* store that offset in the region */ + /* store that offset in the region, along with the correct pitch for + * the image we are rendering to */ rrb->draw_offset = imageOffset; + rrb->pitch = radeon_image->mt->levels[att->TextureLevel].rowstride; /* update drawing region, etc */ radeon_draw_buffer(ctx, fb); @@ -583,7 +566,7 @@ void radeon_fbo_init(struct radeon_context *radeon) radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture; radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers; radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer; - radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer; + radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer; } diff --git a/radeon/radeon_lock.c b/radeon/radeon_lock.c index 02de8e5..7ad781b 100644 --- a/radeon/radeon_lock.c +++ b/radeon/radeon_lock.c @@ -62,8 +62,6 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) __DRIdrawablePrivate *const readable = radeon_get_readable(rmesa); __DRIscreenPrivate *sPriv = rmesa->dri.screen; - assert(drawable != NULL); - drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags); /* The window might have moved, so we might need to get new clip @@ -74,12 +72,13 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) * Since the hardware state depends on having the latest drawable * clip rects, all state checking must be done _after_ this call. */ - DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable); - if (drawable != readable) { + if (drawable) + DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable); + if (readable && drawable != readable) { DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable); } - if (rmesa->lastStamp != drawable->lastStamp) { + if (drawable && (rmesa->lastStamp != drawable->lastStamp)) { radeon_window_moved(rmesa); rmesa->lastStamp = drawable->lastStamp; } diff --git a/radeon/radeon_mipmap_tree.c b/radeon/radeon_mipmap_tree.c index 38db305..5a346c5 100644 --- a/radeon/radeon_mipmap_tree.c +++ b/radeon/radeon_mipmap_tree.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2009 Maciej Cencora. * Copyright (C) 2008 Nicolai Haehnle. * * All Rights Reserved. @@ -32,51 +33,39 @@ #include "main/simple_list.h" #include "main/texcompress.h" -#include "main/texformat.h" - -static GLuint radeon_compressed_texture_size(GLcontext *ctx, - GLsizei width, GLsizei height, GLsizei depth, - GLuint mesaFormat) +#include "main/teximage.h" +#include "main/texobj.h" +#include "radeon_texture.h" + +static unsigned get_aligned_compressed_row_stride( + gl_format format, + unsigned width, + unsigned minStride) { - GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat); - - if (mesaFormat == MESA_FORMAT_RGB_DXT1 || - mesaFormat == MESA_FORMAT_RGBA_DXT1) { - if (width + 3 < 8) /* width one block */ - size = size * 4; - else if (width + 3 < 16) - size = size * 2; - } else { - /* DXT3/5, 16 bytes per block */ - // WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n"); - if (width + 3 < 8) - size = size * 2; + const unsigned blockSize = _mesa_get_format_bytes(format); + unsigned blockWidth, blockHeight, numXBlocks; + + _mesa_get_format_block_size(format, &blockWidth, &blockHeight); + numXBlocks = (width + blockWidth - 1) / blockWidth; + + while (numXBlocks * blockSize < minStride) + { + ++numXBlocks; } - return size; + return numXBlocks * blockSize; } - -static int radeon_compressed_num_bytes(GLuint mesaFormat) +static unsigned get_compressed_image_size( + gl_format format, + unsigned rowStride, + unsigned height) { - int bytes = 0; - switch(mesaFormat) { - - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: - case MESA_FORMAT_RGB_DXT1: - case MESA_FORMAT_RGBA_DXT1: - bytes = 2; - break; - - case MESA_FORMAT_RGBA_DXT3: - case MESA_FORMAT_RGBA_DXT5: - bytes = 4; - default: - break; - } - - return bytes; + unsigned blockWidth, blockHeight; + + _mesa_get_format_block_size(format, &blockWidth, &blockHeight); + + return rowStride * ((height + blockHeight - 1) / blockHeight); } /** @@ -93,25 +82,22 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree uint32_t row_align; /* Find image size in bytes */ - if (mt->compressed) { - /* TODO: Is this correct? Need test cases for compressed textures! */ - row_align = rmesa->texture_compressed_row_align - 1; - lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; - lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx, - lvl->width, lvl->height, lvl->depth, mt->compressed); + if (_mesa_is_format_compressed(mt->mesaFormat)) { + lvl->rowstride = get_aligned_compressed_row_stride(mt->mesaFormat, lvl->width, rmesa->texture_compressed_row_align); + lvl->size = get_compressed_image_size(mt->mesaFormat, lvl->rowstride, lvl->height); } else if (mt->target == GL_TEXTURE_RECTANGLE_NV) { row_align = rmesa->texture_rect_row_align - 1; - lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; + lvl->rowstride = (_mesa_format_row_stride(mt->mesaFormat, lvl->width) + row_align) & ~row_align; lvl->size = lvl->rowstride * lvl->height; } else if (mt->tilebits & RADEON_TXO_MICRO_TILE) { /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, * though the actual offset may be different (if texture is less than * 32 bytes width) to the untiled case */ - lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31; + lvl->rowstride = (_mesa_format_row_stride(mt->mesaFormat, lvl->width) * 2 + 31) & ~31; lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth; } else { row_align = rmesa->texture_row_align - 1; - lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; + lvl->rowstride = (_mesa_format_row_stride(mt->mesaFormat, lvl->width) + row_align) & ~row_align; lvl->size = lvl->rowstride * lvl->height * lvl->depth; } assert(lvl->size > 0); @@ -138,22 +124,19 @@ static GLuint minify(GLuint size, GLuint levels) static void calculate_miptree_layout_r100(radeonContextPtr rmesa, radeon_mipmap_tree *mt) { - GLuint curOffset; - GLuint numLevels; - GLuint i; - GLuint face; + GLuint curOffset, i, face, level; - numLevels = mt->lastLevel - mt->firstLevel + 1; - assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels); + assert(mt->numLevels <= rmesa->glCtx->Const.MaxTextureLevels); curOffset = 0; for(face = 0; face < mt->faces; face++) { - for(i = 0; i < numLevels; i++) { - mt->levels[i].width = minify(mt->width0, i); - mt->levels[i].height = minify(mt->height0, i); - mt->levels[i].depth = minify(mt->depth0, i); - compute_tex_image_offset(rmesa, mt, face, i, &curOffset); + for(i = 0, level = mt->baseLevel; i < mt->numLevels; i++, level++) { + mt->levels[level].valid = 1; + mt->levels[level].width = minify(mt->width0, i); + mt->levels[level].height = minify(mt->height0, i); + mt->levels[level].depth = minify(mt->depth0, i); + compute_tex_image_offset(rmesa, mt, face, level, &curOffset); } } @@ -163,23 +146,21 @@ static void calculate_miptree_layout_r100(radeonContextPtr rmesa, radeon_mipmap_ static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_tree *mt) { - GLuint curOffset; - GLuint numLevels; - GLuint i; + GLuint curOffset, i, level; - numLevels = mt->lastLevel - mt->firstLevel + 1; - assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels); + assert(mt->numLevels <= rmesa->glCtx->Const.MaxTextureLevels); curOffset = 0; - for(i = 0; i < numLevels; i++) { + for(i = 0, level = mt->baseLevel; i < mt->numLevels; i++, level++) { GLuint face; - mt->levels[i].width = minify(mt->width0, i); - mt->levels[i].height = minify(mt->height0, i); - mt->levels[i].depth = minify(mt->depth0, i); + mt->levels[level].valid = 1; + mt->levels[level].width = minify(mt->width0, i); + mt->levels[level].height = minify(mt->height0, i); + mt->levels[level].depth = minify(mt->depth0, i); for(face = 0; face < mt->faces; face++) - compute_tex_image_offset(rmesa, mt, face, i, &curOffset); + compute_tex_image_offset(rmesa, mt, face, level, &curOffset); } /* Note the required size in memory */ @@ -189,27 +170,22 @@ static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_ /** * Create a new mipmap tree, calculate its layout and allocate memory. */ -radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t, - GLenum target, GLenum internal_format, GLuint firstLevel, GLuint lastLevel, - GLuint width0, GLuint height0, GLuint depth0, - GLuint bpp, GLuint tilebits, GLuint compressed) +static radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, + GLenum target, gl_format mesaFormat, GLuint baseLevel, GLuint numLevels, + GLuint width0, GLuint height0, GLuint depth0, GLuint tilebits) { radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree); - mt->radeon = rmesa; - mt->internal_format = internal_format; + mt->mesaFormat = mesaFormat; mt->refcount = 1; - mt->t = t; mt->target = target; mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - mt->firstLevel = firstLevel; - mt->lastLevel = lastLevel; + mt->baseLevel = baseLevel; + mt->numLevels = numLevels; mt->width0 = width0; mt->height0 = height0; mt->depth0 = depth0; - mt->bpp = compressed ? radeon_compressed_num_bytes(compressed) : bpp; mt->tilebits = tilebits; - mt->compressed = compressed; if (rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R300) calculate_miptree_layout_r300(rmesa, mt); @@ -224,53 +200,43 @@ radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj * return mt; } -void radeon_miptree_reference(radeon_mipmap_tree *mt) +void radeon_miptree_reference(radeon_mipmap_tree *mt, radeon_mipmap_tree **ptr) { + assert(!*ptr); + mt->refcount++; assert(mt->refcount > 0); + + *ptr = mt; } -void radeon_miptree_unreference(radeon_mipmap_tree *mt) +void radeon_miptree_unreference(radeon_mipmap_tree **ptr) { + radeon_mipmap_tree *mt = *ptr; if (!mt) return; assert(mt->refcount > 0); + mt->refcount--; if (!mt->refcount) { radeon_bo_unref(mt->bo); free(mt); } -} + *ptr = 0; +} /** - * Calculate first and last mip levels for the given texture object, - * where the dimensions are taken from the given texture image at - * the given level. - * - * Note: level is the OpenGL level number, which is not necessarily the same - * as the first level that is actually present. - * - * The base level image of the given texture face must be non-null, - * or this will fail. + * Calculate min and max LOD for the given texture object. + * @param[in] tObj texture object whose LOD values to calculate + * @param[out] pminLod minimal LOD + * @param[out] pmaxLod maximal LOD */ -static void calculate_first_last_level(struct gl_texture_object *tObj, - GLuint *pfirstLevel, GLuint *plastLevel, - GLuint face, GLuint level) +static void calculate_min_max_lod(struct gl_texture_object *tObj, + unsigned *pminLod, unsigned *pmaxLod) { - const struct gl_texture_image * const baseImage = - tObj->Image[face][level]; - - assert(baseImage); - - /* These must be signed values. MinLod and MaxLod can be negative numbers, - * and having firstLevel and lastLevel as signed prevents the need for - * extra sign checks. - */ - int firstLevel; - int lastLevel; - + int minLod, maxLod; /* Yes, this looks overly complicated, but it's all needed. */ switch (tObj->Target) { @@ -281,32 +247,30 @@ static void calculate_first_last_level(struct gl_texture_object *tObj, if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. */ - firstLevel = lastLevel = tObj->BaseLevel; + minLod = maxLod = tObj->BaseLevel; } else { - firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5); - firstLevel = MAX2(firstLevel, tObj->BaseLevel); - firstLevel = MIN2(firstLevel, level + baseImage->MaxLog2); - lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5); - lastLevel = MAX2(lastLevel, tObj->BaseLevel); - lastLevel = MIN2(lastLevel, level + baseImage->MaxLog2); - lastLevel = MIN2(lastLevel, tObj->MaxLevel); - lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ + minLod = tObj->BaseLevel + (GLint)(tObj->MinLod); + minLod = MAX2(minLod, tObj->BaseLevel); + minLod = MIN2(minLod, tObj->MaxLevel); + maxLod = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5); + maxLod = MIN2(maxLod, tObj->MaxLevel); + maxLod = MIN2(maxLod, tObj->Image[0][minLod]->MaxLog2 + minLod); + maxLod = MAX2(maxLod, minLod); /* need at least one level */ } break; case GL_TEXTURE_RECTANGLE_NV: case GL_TEXTURE_4D_SGIS: - firstLevel = lastLevel = 0; + minLod = maxLod = 0; break; default: return; } /* save these values */ - *pfirstLevel = firstLevel; - *plastLevel = lastLevel; + *pminLod = minLod; + *pmaxLod = maxLod; } - /** * Checks whether the given miptree can hold the given texture image at the * given face and level. @@ -316,20 +280,15 @@ GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, { radeon_mipmap_level *lvl; - if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel) - return GL_FALSE; - - if (texImage->InternalFormat != mt->internal_format || - texImage->IsCompressed != mt->compressed) + if (face >= mt->faces) return GL_FALSE; - if (!texImage->IsCompressed && - !mt->compressed && - texImage->TexFormat->TexelBytes != mt->bpp) + if (texImage->TexFormat != mt->mesaFormat) return GL_FALSE; - lvl = &mt->levels[level - mt->firstLevel]; - if (lvl->width != texImage->Width || + lvl = &mt->levels[level]; + if (!lvl->valid || + lvl->width != texImage->Width || lvl->height != texImage->Height || lvl->depth != texImage->Depth) return GL_FALSE; @@ -337,59 +296,72 @@ GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, return GL_TRUE; } - /** * Checks whether the given miptree has the right format to store the given texture object. */ -GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj) +static GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj) { struct gl_texture_image *firstImage; - GLuint compressed; - GLuint numfaces = 1; - GLuint firstLevel, lastLevel; - - calculate_first_last_level(texObj, &firstLevel, &lastLevel, 0, texObj->BaseLevel); - if (texObj->Target == GL_TEXTURE_CUBE_MAP) - numfaces = 6; - - firstImage = texObj->Image[0][firstLevel]; - compressed = firstImage->IsCompressed ? firstImage->TexFormat->MesaFormat : 0; - - return (mt->firstLevel == firstLevel && - mt->lastLevel == lastLevel && - mt->width0 == firstImage->Width && - mt->height0 == firstImage->Height && - mt->depth0 == firstImage->Depth && - mt->compressed == compressed && - (!mt->compressed ? (mt->bpp == firstImage->TexFormat->TexelBytes) : 1)); -} + unsigned numLevels; + radeon_mipmap_level *mtBaseLevel; + if (texObj->BaseLevel < mt->baseLevel) + return GL_FALSE; + + mtBaseLevel = &mt->levels[texObj->BaseLevel - mt->baseLevel]; + firstImage = texObj->Image[0][texObj->BaseLevel]; + numLevels = MIN2(texObj->MaxLevel - texObj->BaseLevel + 1, firstImage->MaxLog2 + 1); + + if (RADEON_DEBUG & RADEON_TEXTURE) { + fprintf(stderr, "Checking if miptree %p matches texObj %p\n", mt, texObj); + fprintf(stderr, "target %d vs %d\n", mt->target, texObj->Target); + fprintf(stderr, "format %d vs %d\n", mt->mesaFormat, firstImage->TexFormat); + fprintf(stderr, "numLevels %d vs %d\n", mt->numLevels, numLevels); + fprintf(stderr, "width0 %d vs %d\n", mtBaseLevel->width, firstImage->Width); + fprintf(stderr, "height0 %d vs %d\n", mtBaseLevel->height, firstImage->Height); + fprintf(stderr, "depth0 %d vs %d\n", mtBaseLevel->depth, firstImage->Depth); + if (mt->target == texObj->Target && + mt->mesaFormat == firstImage->TexFormat && + mt->numLevels >= numLevels && + mtBaseLevel->width == firstImage->Width && + mtBaseLevel->height == firstImage->Height && + mtBaseLevel->depth == firstImage->Depth) { + fprintf(stderr, "MATCHED\n"); + } else { + fprintf(stderr, "NOT MATCHED\n"); + } + } + + return (mt->target == texObj->Target && + mt->mesaFormat == firstImage->TexFormat && + mt->numLevels >= numLevels && + mtBaseLevel->width == firstImage->Width && + mtBaseLevel->height == firstImage->Height && + mtBaseLevel->depth == firstImage->Depth); +} /** - * Try to allocate a mipmap tree for the given texture that will fit the - * given image in the given position. + * Try to allocate a mipmap tree for the given texture object. + * @param[in] rmesa radeon context + * @param[in] t radeon texture object */ -void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, - radeon_texture_image *image, GLuint face, GLuint level) +void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t) { - GLuint compressed = image->base.IsCompressed ? image->base.TexFormat->MesaFormat : 0; - GLuint numfaces = 1; - GLuint firstLevel, lastLevel; + struct gl_texture_object *texObj = &t->base; + struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel]; + GLuint numLevels; assert(!t->mt); - calculate_first_last_level(&t->base, &firstLevel, &lastLevel, face, level); - if (t->base.Target == GL_TEXTURE_CUBE_MAP) - numfaces = 6; - - if (level != firstLevel || face >= numfaces) + if (!texImg) return; - t->mt = radeon_miptree_create(rmesa, t, t->base.Target, - image->base.InternalFormat, - firstLevel, lastLevel, - image->base.Width, image->base.Height, image->base.Depth, - image->base.TexFormat->TexelBytes, t->tile_bits, compressed); + numLevels = MIN2(texObj->MaxLevel - texObj->BaseLevel + 1, texImg->MaxLog2 + 1); + + t->mt = radeon_miptree_create(rmesa, t->base.Target, + texImg->TexFormat, texObj->BaseLevel, + numLevels, texImg->Width, texImg->Height, + texImg->Depth, t->tile_bits); } /* Although we use the image_offset[] array to store relative offsets @@ -401,21 +373,236 @@ void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, void radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets) { - if (mt->target != GL_TEXTURE_3D || mt->faces == 1) - offsets[0] = 0; - else { - int i; - for (i = 0; i < 6; i++) - offsets[i] = mt->levels[level].faces[i].offset; - } + if (mt->target != GL_TEXTURE_3D || mt->faces == 1) { + offsets[0] = 0; + } else { + int i; + for (i = 0; i < 6; i++) { + offsets[i] = mt->levels[level].faces[i].offset; + } + } } GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt, GLuint face, GLuint level) { - if (mt->target == GL_TEXTURE_CUBE_MAP_ARB) - return (mt->levels[level].faces[face].offset); - else - return mt->levels[level].faces[0].offset; + if (mt->target == GL_TEXTURE_CUBE_MAP_ARB) + return (mt->levels[level].faces[face].offset); + else + return mt->levels[level].faces[0].offset; +} + +/** + * Ensure that the given image is stored in the given miptree from now on. + */ +static void migrate_image_to_miptree(radeon_mipmap_tree *mt, + radeon_texture_image *image, + int face, int level) +{ + radeon_mipmap_level *dstlvl = &mt->levels[level]; + unsigned char *dest; + + assert(image->mt != mt); + assert(dstlvl->valid); + assert(dstlvl->width == image->base.Width); + assert(dstlvl->height == image->base.Height); + assert(dstlvl->depth == image->base.Depth); + + radeon_bo_map(mt->bo, GL_TRUE); + dest = mt->bo->ptr + dstlvl->faces[face].offset; + + if (image->mt) { + /* Format etc. should match, so we really just need a memcpy(). + * In fact, that memcpy() could be done by the hardware in many + * cases, provided that we have a proper memory manager. + */ + assert(mt->mesaFormat == image->base.TexFormat); + + radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel]; + + /* TODO: bring back these assertions once the FBOs are fixed */ +#if 0 + assert(image->mtlevel == level); + assert(srclvl->size == dstlvl->size); + assert(srclvl->rowstride == dstlvl->rowstride); +#endif + + radeon_bo_map(image->mt->bo, GL_FALSE); + + memcpy(dest, + image->mt->bo->ptr + srclvl->faces[face].offset, + dstlvl->size); + radeon_bo_unmap(image->mt->bo); + + radeon_miptree_unreference(&image->mt); + } else if (image->base.Data) { + /* This condition should be removed, it's here to workaround + * a segfault when mapping textures during software fallbacks. + */ + const uint32_t srcrowstride = _mesa_format_row_stride(image->base.TexFormat, image->base.Width); + uint32_t rows = image->base.Height * image->base.Depth; + + if (_mesa_is_format_compressed(image->base.TexFormat)) { + uint32_t blockWidth, blockHeight; + _mesa_get_format_block_size(image->base.TexFormat, &blockWidth, &blockHeight); + rows = (rows + blockHeight - 1) / blockHeight; + } + + copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride, + rows, srcrowstride); + + _mesa_free_texmemory(image->base.Data); + image->base.Data = 0; + } + + radeon_bo_unmap(mt->bo); + + radeon_miptree_reference(mt, &image->mt); + image->mtface = face; + image->mtlevel = level; +} + +/** + * Filter matching miptrees, and select one with the most of data. + * @param[in] texObj radeon texture object + * @param[in] firstLevel first texture level to check + * @param[in] lastLevel last texture level to check + */ +static radeon_mipmap_tree * get_biggest_matching_miptree(radeonTexObj *texObj, + unsigned firstLevel, + unsigned lastLevel) +{ + const unsigned numLevels = lastLevel - firstLevel + 1; + unsigned *mtSizes = calloc(numLevels, sizeof(unsigned)); + radeon_mipmap_tree **mts = calloc(numLevels, sizeof(radeon_mipmap_tree *)); + unsigned mtCount = 0; + unsigned maxMtIndex = 0; + radeon_mipmap_tree *tmp; + unsigned level; + int i; + + for (level = firstLevel; level <= lastLevel; ++level) { + radeon_texture_image *img = get_radeon_texture_image(texObj->base.Image[0][level]); + unsigned found = 0; + // TODO: why this hack?? + if (!img) + break; + + if (!img->mt) + continue; + + for (i = 0; i < mtCount; ++i) { + if (mts[i] == img->mt) { + found = 1; + mtSizes[i] += img->mt->levels[img->mtlevel].size; + break; + } + } + + if (!found && radeon_miptree_matches_texture(img->mt, &texObj->base)) { + mtSizes[mtCount] = img->mt->levels[img->mtlevel].size; + mts[mtCount] = img->mt; + mtCount++; + } + } + + if (mtCount == 0) { + return NULL; + } + + for (i = 1; i < mtCount; ++i) { + if (mtSizes[i] > mtSizes[maxMtIndex]) { + maxMtIndex = i; + } + } + + tmp = mts[maxMtIndex]; + free(mtSizes); + free(mts); + + return tmp; +} + +/** + * Validate texture mipmap tree. + * If individual images are stored in different mipmap trees + * use the mipmap tree that has the most of the correct data. + */ +int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + radeonTexObj *t = radeon_tex_obj(texObj); + + if (t->validated || t->image_override) { + return GL_TRUE; + } + + if (texObj->Image[0][texObj->BaseLevel]->Border > 0) + return GL_FALSE; + + _mesa_test_texobj_completeness(rmesa->glCtx, texObj); + if (!texObj->_Complete) { + return GL_FALSE; + } + + calculate_min_max_lod(&t->base, &t->minLod, &t->maxLod); + + if (RADEON_DEBUG & RADEON_TEXTURE) + fprintf(stderr, "%s: Validating texture %p now, minLod = %d, maxLod = %d\n", + __FUNCTION__, texObj ,t->minLod, t->maxLod); + + radeon_mipmap_tree *dst_miptree; + dst_miptree = get_biggest_matching_miptree(t, t->minLod, t->maxLod); + + if (!dst_miptree) { + radeon_miptree_unreference(&t->mt); + radeon_try_alloc_miptree(rmesa, t); + dst_miptree = t->mt; + if (RADEON_DEBUG & RADEON_TEXTURE) { + fprintf(stderr, "%s: No matching miptree found, allocated new one %p\n", __FUNCTION__, t->mt); + } + } else if (RADEON_DEBUG & RADEON_TEXTURE) { + fprintf(stderr, "%s: Using miptree %p\n", __FUNCTION__, t->mt); + } + + const unsigned faces = texObj->Target == GL_TEXTURE_CUBE_MAP ? 6 : 1; + unsigned face, level; + radeon_texture_image *img; + /* Validate only the levels that will actually be used during rendering */ + for (face = 0; face < faces; ++face) { + for (level = t->minLod; level <= t->maxLod; ++level) { + img = get_radeon_texture_image(texObj->Image[face][level]); + + if (RADEON_DEBUG & RADEON_TEXTURE) { + fprintf(stderr, "Checking image level %d, face %d, mt %p ... ", level, face, img->mt); + } + + if (img->mt != dst_miptree) { + if (RADEON_DEBUG & RADEON_TEXTURE) { + fprintf(stderr, "MIGRATING\n"); + } + struct radeon_bo *src_bo = (img->mt) ? img->mt->bo : img->bo; + if (src_bo && radeon_bo_is_referenced_by_cs(src_bo, rmesa->cmdbuf.cs)) { + radeon_firevertices(rmesa); + } + migrate_image_to_miptree(dst_miptree, img, face, level); + } else if (RADEON_DEBUG & RADEON_TEXTURE) { + fprintf(stderr, "OK\n"); + } + } + } + + t->validated = GL_TRUE; + + return GL_TRUE; +} + +uint32_t get_base_teximage_offset(radeonTexObj *texObj) +{ + if (!texObj->mt) { + return 0; + } else { + return radeon_miptree_image_offset(texObj->mt, 0, texObj->minLod); + } } diff --git a/radeon/radeon_mipmap_tree.h b/radeon/radeon_mipmap_tree.h index db28252..a10649b 100644 --- a/radeon/radeon_mipmap_tree.h +++ b/radeon/radeon_mipmap_tree.h @@ -44,6 +44,7 @@ struct _radeon_mipmap_level { GLuint depth; GLuint size; /** Size of each image, in bytes */ GLuint rowstride; /** in bytes */ + GLuint valid; radeon_mipmap_image faces[6]; }; @@ -59,43 +60,35 @@ struct _radeon_mipmap_level { * changed. */ struct _radeon_mipmap_tree { - radeonContextPtr radeon; - radeonTexObj *t; struct radeon_bo *bo; GLuint refcount; GLuint totalsize; /** total size of the miptree, in bytes */ GLenum target; /** GL_TEXTURE_xxx */ - GLenum internal_format; + GLenum mesaFormat; /** MESA_FORMAT_xxx */ GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */ - GLuint firstLevel; /** First mip level stored in this mipmap tree */ - GLuint lastLevel; /** Last mip level stored in this mipmap tree */ + GLuint baseLevel; /** gl_texture_object->baseLevel it was created for */ + GLuint numLevels; /** Number of mip levels stored in this mipmap tree */ - GLuint width0; /** Width of firstLevel image */ - GLuint height0; /** Height of firstLevel image */ - GLuint depth0; /** Depth of firstLevel image */ + GLuint width0; /** Width of baseLevel image */ + GLuint height0; /** Height of baseLevel image */ + GLuint depth0; /** Depth of baseLevel image */ - GLuint bpp; /** Bytes per texel */ GLuint tilebits; /** RADEON_TXO_xxx_TILE */ - GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */ radeon_mipmap_level levels[RADEON_MIPTREE_MAX_TEXTURE_LEVELS]; }; -radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t, - GLenum target, GLenum internal_format, GLuint firstLevel, GLuint lastLevel, - GLuint width0, GLuint height0, GLuint depth0, - GLuint bpp, GLuint tilebits, GLuint compressed); -void radeon_miptree_reference(radeon_mipmap_tree *mt); -void radeon_miptree_unreference(radeon_mipmap_tree *mt); +void radeon_miptree_reference(radeon_mipmap_tree *mt, radeon_mipmap_tree **ptr); +void radeon_miptree_unreference(radeon_mipmap_tree **ptr); GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, struct gl_texture_image *texImage, GLuint face, GLuint level); -GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj); -void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, - radeon_texture_image *texImage, GLuint face, GLuint level); +void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t); GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt, GLuint face, GLuint level); void radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets); + +uint32_t get_base_teximage_offset(radeonTexObj *texObj); #endif /* __RADEON_MIPMAP_TREE_H_ */ diff --git a/radeon/radeon_queryobj.c b/radeon/radeon_queryobj.c index b79d864..98117cd 100644 --- a/radeon/radeon_queryobj.c +++ b/radeon/radeon_queryobj.c @@ -31,24 +31,11 @@ #include "main/imports.h" #include "main/simple_list.h" -static int radeonQueryIsFlushed(GLcontext *ctx, struct gl_query_object *q) -{ - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - struct radeon_query_object *tmp, *query = (struct radeon_query_object *)q; - - foreach(tmp, &radeon->query.not_flushed_head) { - if (tmp == query) { - return 0; - } - } - - return 1; -} - static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q) { + radeonContextPtr radeon = RADEON_CONTEXT(ctx); struct radeon_query_object *query = (struct radeon_query_object *)q; - uint32_t *result; + uint32_t *result; int i; radeon_print(RADEON_STATE, RADEON_VERBOSE, @@ -56,13 +43,35 @@ static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q) __FUNCTION__, query->Base.Id, (int) query->Base.Result); radeon_bo_map(query->bo, GL_FALSE); - - result = query->bo->ptr; + result = query->bo->ptr; query->Base.Result = 0; - for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) { - query->Base.Result += result[i]; - radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, result[i]); + if (IS_R600_CLASS(radeon->radeonScreen)) { + /* ZPASS EVENT writes alternating qwords + * At query start we set the start offset to 0 and + * hw writes zpass start counts to qwords 0, 2, 4, 6. + * At query end we set the start offset to 8 and + * hw writes zpass end counts to qwords 1, 3, 5, 7. + * then we substract. MSB is the valid bit. + */ + for (i = 0; i < 16; i += 4) { + uint64_t start = (uint64_t)LE32_TO_CPU(result[i]) | + (uint64_t)LE32_TO_CPU(result[i + 1]) << 32; + uint64_t end = (uint64_t)LE32_TO_CPU(result[i + 2]) | + (uint64_t)LE32_TO_CPU(result[i + 3]) << 32; + if ((start & 0x8000000000000000) && (end & 0x8000000000000000)) { + uint64_t query_count = end - start; + query->Base.Result += query_count; + + } + radeon_print(RADEON_STATE, RADEON_TRACE, + "%d start: %lx, end: %lx %ld\n", i, start, end, end - start); + } + } else { + for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) { + query->Base.Result += LE32_TO_CPU(result[i]); + radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, LE32_TO_CPU(result[i])); + } } radeon_bo_unmap(query->bo); @@ -99,10 +108,11 @@ static void radeonDeleteQuery(GLcontext *ctx, struct gl_query_object *q) static void radeonWaitQuery(GLcontext *ctx, struct gl_query_object *q) { + radeonContextPtr radeon = RADEON_CONTEXT(ctx); struct radeon_query_object *query = (struct radeon_query_object *)q; /* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */ - if (!radeonQueryIsFlushed(ctx, q)) + if (radeon_bo_is_referenced_by_cs(query->bo, radeon->cmdbuf.cs)) ctx->Driver.Flush(ctx); radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset); @@ -134,8 +144,6 @@ static void radeonBeginQuery(GLcontext *ctx, struct gl_query_object *q) radeon->query.queryobj.dirty = GL_TRUE; radeon->hw.is_dirty = GL_TRUE; - insert_at_tail(&radeon->query.not_flushed_head, query); - } void radeonEmitQueryEnd(GLcontext *ctx) @@ -183,7 +191,7 @@ static void radeonCheckQuery(GLcontext *ctx, struct gl_query_object *q) uint32_t domain; /* Need to perform a flush, as per ARB_occlusion_query spec */ - if (!radeonQueryIsFlushed(ctx, q)) { + if (radeon_bo_is_referenced_by_cs(query->bo, radeon->cmdbuf.cs)) { ctx->Driver.Flush(ctx); } diff --git a/radeon/radeon_screen.c b/radeon/radeon_screen.c index 5ffb55d..be2d836 100644 --- a/radeon/radeon_screen.c +++ b/radeon/radeon_screen.c @@ -48,17 +48,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_screen.h" #include "radeon_common.h" #include "radeon_span.h" -#if !RADEON_COMMON +#if defined(RADEON_R100) #include "radeon_context.h" #include "radeon_tex.h" -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) #include "r200_context.h" #include "r200_ioctl.h" #include "r200_tex.h" -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#elif defined(RADEON_R300) #include "r300_context.h" #include "r300_tex.h" -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) #include "r600_context.h" #include "r700_driconf.h" /* +r6/r7 */ #include "r600_tex.h" /* +r6/r7 */ @@ -82,7 +82,7 @@ DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \ DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \ DRI_CONF_OPT_END -#if !RADEON_COMMON /* R100 */ +#if defined(RADEON_R100) /* R100 */ PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE @@ -109,7 +109,7 @@ DRI_CONF_BEGIN DRI_CONF_END; static const GLuint __driNConfigOptions = 15; -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN @@ -141,13 +141,7 @@ DRI_CONF_BEGIN DRI_CONF_END; static const GLuint __driNConfigOptions = 17; -extern const struct dri_extension blend_extensions[]; -extern const struct dri_extension ARB_vp_extension[]; -extern const struct dri_extension NV_vp_extension[]; -extern const struct dri_extension ATI_fs_extension[]; -extern const struct dri_extension point_extensions[]; - -#elif RADEON_COMMON && (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600)) +#elif defined(RADEON_R300) || defined(RADEON_R600) #define DRI_CONF_FP_OPTIMIZATION_SPEED 0 #define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 @@ -218,12 +212,7 @@ DRI_CONF_BEGIN DRI_CONF_END; static const GLuint __driNConfigOptions = 17; -extern const struct dri_extension gl_20_extension[]; - -#endif /* RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) */ - -extern const struct dri_extension card_extensions[]; -extern const struct dri_extension mm_extensions[]; +#endif static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ); @@ -337,7 +326,7 @@ radeonFillInModes( __DRIscreenPrivate *psp, return (const __DRIconfig **) configs; } -#if !RADEON_COMMON +#if defined(RADEON_R100) static const __DRItexOffsetExtension radeonTexOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, radeonSetTexOffset, @@ -350,7 +339,7 @@ static const __DRItexBufferExtension radeonTexBufferExtension = { }; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) static const __DRIallocateExtension r200AllocateExtension = { { __DRI_ALLOCATE, __DRI_ALLOCATE_VERSION }, r200AllocateMemoryMESA, @@ -370,7 +359,7 @@ static const __DRItexBufferExtension r200TexBufferExtension = { }; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#if defined(RADEON_R300) static const __DRItexOffsetExtension r300texOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, r300SetTexOffset, @@ -383,7 +372,7 @@ static const __DRItexBufferExtension r300TexBufferExtension = { }; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) static const __DRItexOffsetExtension r600texOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, r600SetTexOffset, /* +r6/r7 */ @@ -401,12 +390,14 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->device_id = device_id; screen->chip_flags = 0; switch ( device_id ) { + case PCI_CHIP_RN50_515E: + case PCI_CHIP_RN50_5969: + return -1; + case PCI_CHIP_RADEON_LY: case PCI_CHIP_RADEON_LZ: case PCI_CHIP_RADEON_QY: case PCI_CHIP_RADEON_QZ: - case PCI_CHIP_RN50_515E: - case PCI_CHIP_RN50_5969: screen->chip_family = CHIP_FAMILY_RV100; break; @@ -1222,22 +1213,22 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->extensions[i++] = &driMediaStreamCounterExtension.base; } -#if !RADEON_COMMON +#if defined(RADEON_R100) screen->extensions[i++] = &radeonTexOffsetExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) if (IS_R200_CLASS(screen)) screen->extensions[i++] = &r200AllocateExtension.base; screen->extensions[i++] = &r200texOffsetExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#if defined(RADEON_R300) screen->extensions[i++] = &r300texOffsetExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) screen->extensions[i++] = &r600texOffsetExtension.base; #endif @@ -1376,22 +1367,22 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) screen->extensions[i++] = &driMediaStreamCounterExtension.base; } -#if !RADEON_COMMON +#if defined(RADEON_R100) screen->extensions[i++] = &radeonTexBufferExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) if (IS_R200_CLASS(screen)) screen->extensions[i++] = &r200AllocateExtension.base; screen->extensions[i++] = &r200TexBufferExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#if defined(RADEON_R300) screen->extensions[i++] = &r300TexBufferExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) screen->extensions[i++] = &r600TexBufferExtension.base; #endif @@ -1480,7 +1471,7 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, const GLboolean swAccum = mesaVis->accumRedBits > 0; const GLboolean swStencil = mesaVis->stencilBits > 0 && mesaVis->depthBits != 24; - GLenum rgbFormat; + gl_format rgbFormat; struct radeon_framebuffer *rfb; if (isPixmap) @@ -1493,11 +1484,11 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, _mesa_initialize_framebuffer(&rfb->base, mesaVis); if (mesaVis->redBits == 5) - rgbFormat = GL_RGB5; + rgbFormat = _mesa_little_endian() ? MESA_FORMAT_RGB565 : MESA_FORMAT_RGB565_REV; else if (mesaVis->alphaBits == 0) - rgbFormat = GL_RGB8; + rgbFormat = _mesa_little_endian() ? MESA_FORMAT_XRGB8888 : MESA_FORMAT_XRGB8888_REV; else - rgbFormat = GL_RGBA8; + rgbFormat = _mesa_little_endian() ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB8888_REV; /* front color renderbuffer */ rfb->color_rb[0] = radeon_create_renderbuffer(rgbFormat, driDrawPriv); @@ -1513,19 +1504,22 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, if (mesaVis->depthBits == 24) { if (mesaVis->stencilBits == 8) { - struct radeon_renderbuffer *depthStencilRb = radeon_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT, driDrawPriv); + struct radeon_renderbuffer *depthStencilRb = + radeon_create_renderbuffer(MESA_FORMAT_S8_Z24, driDrawPriv); _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depthStencilRb->base); _mesa_add_renderbuffer(&rfb->base, BUFFER_STENCIL, &depthStencilRb->base); depthStencilRb->has_surface = screen->depthHasSurface; } else { /* depth renderbuffer */ - struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT24, driDrawPriv); + struct radeon_renderbuffer *depth = + radeon_create_renderbuffer(MESA_FORMAT_X8_Z24, driDrawPriv); _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base); depth->has_surface = screen->depthHasSurface; } } else if (mesaVis->depthBits == 16) { - /* just 16-bit depth buffer, no hw stencil */ - struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT16, driDrawPriv); + /* just 16-bit depth buffer, no hw stencil */ + struct radeon_renderbuffer *depth = + radeon_create_renderbuffer(MESA_FORMAT_Z16, driDrawPriv); _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base); depth->has_surface = screen->depthHasSurface; } @@ -1589,22 +1583,22 @@ radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) static const __DRIconfig ** radeonInitScreen(__DRIscreenPrivate *psp) { -#if !RADEON_COMMON +#if defined(RADEON_R100) static const char *driver_name = "Radeon"; static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 1, 6, 0 }; -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) static const char *driver_name = "R200"; static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 1, 6, 0 }; -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#elif defined(RADEON_R300) static const char *driver_name = "R300"; static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 1, 24, 0 }; -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) static const char *driver_name = "R600"; static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; @@ -1619,27 +1613,6 @@ radeonInitScreen(__DRIscreenPrivate *psp) return NULL; } - /* Calling driInitExtensions here, with a NULL context pointer, - * does not actually enable the extensions. It just makes sure - * that all the dispatch offsets for all the extensions that - * *might* be enables are known. This is needed because the - * dispatch offsets need to be known when _mesa_context_create - * is called, but we can't enable the extensions until we have a - * context pointer. - * - * Hello chicken. Hello egg. How are you two today? - */ - driInitExtensions( NULL, card_extensions, GL_FALSE ); -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) - driInitExtensions( NULL, blend_extensions, GL_FALSE ); - driInitSingleExtension( NULL, ARB_vp_extension ); - driInitSingleExtension( NULL, NV_vp_extension ); - driInitSingleExtension( NULL, ATI_fs_extension ); - driInitExtensions( NULL, point_extensions, GL_FALSE ); -#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600)) - driInitSingleExtension( NULL, gl_20_extension ); -#endif - if (!radeonInitDriver(psp)) return NULL; @@ -1672,28 +1645,6 @@ __DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp) int color; __DRIconfig **configs = NULL; - /* Calling driInitExtensions here, with a NULL context pointer, - * does not actually enable the extensions. It just makes sure - * that all the dispatch offsets for all the extensions that - * *might* be enables are known. This is needed because the - * dispatch offsets need to be known when _mesa_context_create - * is called, but we can't enable the extensions until we have a - * context pointer. - * - * Hello chicken. Hello egg. How are you two today? - */ - driInitExtensions( NULL, card_extensions, GL_FALSE ); - driInitExtensions( NULL, mm_extensions, GL_FALSE ); -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) - driInitExtensions( NULL, blend_extensions, GL_FALSE ); - driInitSingleExtension( NULL, ARB_vp_extension ); - driInitSingleExtension( NULL, NV_vp_extension ); - driInitSingleExtension( NULL, ATI_fs_extension ); - driInitExtensions( NULL, point_extensions, GL_FALSE ); -#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600)) - driInitSingleExtension( NULL, gl_20_extension ); -#endif - if (!radeonInitDriver(psp)) { return NULL; } @@ -1772,13 +1723,13 @@ getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) const struct __DriverAPIRec driDriverAPI = { .InitScreen = radeonInitScreen, .DestroyScreen = radeonDestroyScreen, -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) .CreateContext = r200CreateContext, .DestroyContext = r200DestroyContext, -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) .CreateContext = r600CreateContext, .DestroyContext = radeonDestroyContext, -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#elif defined(RADEON_R300) .CreateContext = r300CreateContext, .DestroyContext = radeonDestroyContext, #else diff --git a/radeon/radeon_span.c b/radeon/radeon_span.c index d603f52..665f2b6 100644 --- a/radeon/radeon_span.c +++ b/radeon/radeon_span.c @@ -41,6 +41,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "main/glheader.h" +#include "main/texformat.h" #include "swrast/swrast.h" #include "radeon_common.h" @@ -55,7 +56,7 @@ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); /* r200 depth buffer is always tiled - this is the formula according to the docs unless I typo'ed in it */ -#if defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { @@ -112,7 +113,7 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, * - 2D (akin to macro-tiled/micro-tiled on older asics) * only 1D tiling is implemented below */ -#if defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, GLint x, GLint y, GLint is_depth, GLint is_stencil) { @@ -334,22 +335,6 @@ static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb, #endif -#ifndef COMPILE_R300 -#ifndef COMPILE_R600 -static uint32_t -z24s8_to_s8z24(uint32_t val) -{ - return (val << 24) | (val >> 8); -} - -static uint32_t -s8z24_to_z24s8(uint32_t val) -{ - return (val >> 24) | (val << 8); -} -#endif -#endif - /* * Note that all information needed to access pixels in a renderbuffer * should be obtained through the gl_renderbuffer parameter, not per-context @@ -409,7 +394,19 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_RGB565 #define TAG2(x,y) radeon##x##_RGB565##y -#if defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else +#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5_REV + +#define TAG(x) radeon##x##_RGB565_REV +#define TAG2(x,y) radeon##x##_RGB565_REV##y +#if defined(RADEON_R600) #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) #else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) @@ -423,7 +420,19 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB1555 #define TAG2(x,y) radeon##x##_ARGB1555##y -#if defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else +#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5 + +#define TAG(x) radeon##x##_ARGB1555_REV +#define TAG2(x,y) radeon##x##_ARGB1555_REV##y +#if defined(RADEON_R600) #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) #else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) @@ -437,7 +446,19 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB4444 #define TAG2(x,y) radeon##x##_ARGB4444##y -#if defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else +#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4 + +#define TAG(x) radeon##x##_ARGB4444_REV +#define TAG2(x,y) radeon##x##_ARGB4444_REV##y +#if defined(RADEON_R600) #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) #else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) @@ -451,7 +472,7 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_xRGB8888 #define TAG2(x,y) radeon##x##_xRGB8888##y -#if defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) #define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000)) #define PUT_VALUE(_x, _y, d) { \ GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ @@ -473,7 +494,7 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB8888 #define TAG2(x,y) radeon##x##_ARGB8888##y -#if defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) #define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))) #define PUT_VALUE(_x, _y, d) { \ GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ @@ -488,6 +509,42 @@ s8z24_to_z24s8(uint32_t val) #endif #include "spantmp2.h" +/* 32 bit, BGRx8888 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8 + +#define TAG(x) radeon##x##_BGRx8888 +#define TAG2(x,y) radeon##x##_BGRx8888##y +#if defined(RADEON_R600) +#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0x000000ff)) +#define PUT_VALUE(_x, _y, d) { \ + GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) +#else +#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0x000000ff)) +#define PUT_VALUE(_x, _y, d) { \ + GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) +#endif +#include "spantmp2.h" + +/* 32 bit, BGRA8888 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8 + +#define TAG(x) radeon##x##_BGRA8888 +#define TAG2(x,y) radeon##x##_BGRA8888##y +#if defined(RADEON_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else +#define GET_PTR(X,Y) radeon_ptr_4byte(rrb, (X) + x_off, (Y) + y_off) +#endif +#include "spantmp2.h" + /* ================================================================ * Depth buffer */ @@ -506,10 +563,10 @@ s8z24_to_z24s8(uint32_t val) */ #define VALUE_TYPE GLushort -#if defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d -#elif defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d #else @@ -517,10 +574,10 @@ s8z24_to_z24s8(uint32_t val) *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d #endif -#if defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) #define READ_DEPTH( d, _x, _y ) \ d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) -#elif defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) #define READ_DEPTH( d, _x, _y ) \ d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) #else @@ -538,16 +595,16 @@ s8z24_to_z24s8(uint32_t val) */ #define VALUE_TYPE GLuint -#if defined(COMPILE_R300) +#if defined(RADEON_R300) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ - GLuint tmp = *_ptr; \ + GLuint tmp = LE32_TO_CPU(*_ptr); \ tmp &= 0x000000ff; \ tmp |= ((d << 8) & 0xffffff00); \ - *_ptr = tmp; \ + *_ptr = CPU_TO_LE32(tmp); \ } while (0) -#elif defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ @@ -556,44 +613,44 @@ do { \ tmp |= ((d) & 0x00ffffff); \ *_ptr = tmp; \ } while (0) -#elif defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ - GLuint tmp = *_ptr; \ + GLuint tmp = LE32_TO_CPU(*_ptr); \ tmp &= 0xff000000; \ tmp |= ((d) & 0x00ffffff); \ - *_ptr = tmp; \ + *_ptr = CPU_TO_LE32(tmp); \ } while (0) #else #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ - GLuint tmp = *_ptr; \ + GLuint tmp = LE32_TO_CPU(*_ptr); \ tmp &= 0xff000000; \ tmp |= ((d) & 0x00ffffff); \ - *_ptr = tmp; \ + *_ptr = CPU_TO_LE32(tmp); \ } while (0) #endif -#if defined(COMPILE_R300) +#if defined(RADEON_R300) #define READ_DEPTH( d, _x, _y ) \ do { \ - d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \ + d = (LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) & 0xffffff00) >> 8; \ }while(0) -#elif defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) #define READ_DEPTH( d, _x, _y ) \ do { \ d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \ }while(0) -#elif defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) #define READ_DEPTH( d, _x, _y ) \ do { \ - d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \ + d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \ }while(0) #else #define READ_DEPTH( d, _x, _y ) \ - d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; + d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; #endif #define TAG(x) radeon##x##_z24 @@ -607,65 +664,64 @@ do { \ */ #define VALUE_TYPE GLuint -#if defined(COMPILE_R300) +#if defined(RADEON_R300) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ - *_ptr = d; \ + *_ptr = CPU_TO_LE32((((d) & 0xff000000) >> 24) | (((d) & 0x00ffffff) << 8)); \ } while (0) -#elif defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ GLuint tmp = *_ptr; \ tmp &= 0xff000000; \ - tmp |= (((d) >> 8) & 0x00ffffff); \ + tmp |= ((d) & 0x00ffffff); \ *_ptr = tmp; \ _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ tmp = *_ptr; \ tmp &= 0xffffff00; \ - tmp |= (d) & 0xff; \ + tmp |= ((d) >> 24) & 0xff; \ *_ptr = tmp; \ } while (0) -#elif defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ - GLuint tmp = z24s8_to_s8z24(d); \ - *_ptr = tmp; \ + *_ptr = CPU_TO_LE32(d); \ } while (0) #else #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ - GLuint tmp = z24s8_to_s8z24(d); \ - *_ptr = tmp; \ + *_ptr = CPU_TO_LE32(d); \ } while (0) #endif -#if defined(COMPILE_R300) +#if defined(RADEON_R300) #define READ_DEPTH( d, _x, _y ) \ do { \ - d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \ + GLuint tmp = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \ + d = LE32_TO_CPU(((tmp & 0x000000ff) << 24) | ((tmp & 0xffffff00) >> 8)); \ }while(0) -#elif defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) #define READ_DEPTH( d, _x, _y ) \ do { \ - d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \ - d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \ + d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \ + d |= ((*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) << 24) & 0xff000000; \ }while(0) -#elif defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) #define READ_DEPTH( d, _x, _y ) \ do { \ - d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \ + d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \ }while(0) #else #define READ_DEPTH( d, _x, _y ) do { \ - d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \ + d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \ } while (0) #endif -#define TAG(x) radeon##x##_z24_s8 +#define TAG(x) radeon##x##_s8_z24 #include "depthtmp.h" /* ================================================================ @@ -674,16 +730,16 @@ do { \ /* 24 bit depth, 8 bit stencil depthbuffer functions */ -#ifdef COMPILE_R300 +#ifdef RADEON_R300 #define WRITE_STENCIL( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \ - GLuint tmp = *_ptr; \ + GLuint tmp = LE32_TO_CPU(*_ptr); \ tmp &= 0xffffff00; \ tmp |= (d) & 0xff; \ - *_ptr = tmp; \ + *_ptr = CPU_TO_LE32(tmp); \ } while (0) -#elif defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) #define WRITE_STENCIL( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ @@ -692,57 +748,57 @@ do { \ tmp |= (d) & 0xff; \ *_ptr = tmp; \ } while (0) -#elif defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) #define WRITE_STENCIL( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \ - GLuint tmp = *_ptr; \ + GLuint tmp = LE32_TO_CPU(*_ptr); \ tmp &= 0x00ffffff; \ tmp |= (((d) & 0xff) << 24); \ - *_ptr = tmp; \ + *_ptr = CPU_TO_LE32(tmp); \ } while (0) #else #define WRITE_STENCIL( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \ - GLuint tmp = *_ptr; \ + GLuint tmp = LE32_TO_CPU(*_ptr); \ tmp &= 0x00ffffff; \ tmp |= (((d) & 0xff) << 24); \ - *_ptr = tmp; \ + *_ptr = CPU_TO_LE32(tmp); \ } while (0) #endif -#ifdef COMPILE_R300 +#ifdef RADEON_R300 #define READ_STENCIL( d, _x, _y ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ - GLuint tmp = *_ptr; \ + GLuint tmp = LE32_TO_CPU(*_ptr); \ d = tmp & 0x000000ff; \ } while (0) -#elif defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) #define READ_STENCIL( d, _x, _y ) \ do { \ GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \ GLuint tmp = *_ptr; \ d = tmp & 0x000000ff; \ } while (0) -#elif defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) #define READ_STENCIL( d, _x, _y ) \ do { \ GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ - GLuint tmp = *_ptr; \ + GLuint tmp = LE32_TO_CPU(*_ptr); \ d = (tmp & 0xff000000) >> 24; \ } while (0) #else #define READ_STENCIL( d, _x, _y ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ - GLuint tmp = *_ptr; \ + GLuint tmp = LE32_TO_CPU(*_ptr); \ d = (tmp & 0xff000000) >> 24; \ } while (0) #endif -#define TAG(x) radeon##x##_z24_s8 +#define TAG(x) radeon##x##_s8_z24 #include "stenciltmp.h" @@ -755,8 +811,7 @@ static void map_unmap_rb(struct gl_renderbuffer *rb, int flag) return; if (flag) { - if (rrb->bo->bom->funcs->bo_wait) - radeon_bo_wait(rrb->bo); + radeon_bo_wait(rrb->bo); r = radeon_bo_map(rrb->bo, 1); if (r) { fprintf(stderr, "(%s) error(%d) mapping buffer.\n", @@ -864,25 +919,35 @@ void radeonInitSpanFuncs(GLcontext * ctx) */ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb) { - if (rrb->base._ActualFormat == GL_RGB5) { + if (rrb->base.Format == MESA_FORMAT_RGB565) { radeonInitPointers_RGB565(&rrb->base); - } else if (rrb->base._ActualFormat == GL_RGB8) { + } else if (rrb->base.Format == MESA_FORMAT_RGB565_REV) { + radeonInitPointers_RGB565_REV(&rrb->base); + } else if (rrb->base.Format == MESA_FORMAT_XRGB8888) { radeonInitPointers_xRGB8888(&rrb->base); - } else if (rrb->base._ActualFormat == GL_RGBA8) { + } else if (rrb->base.Format == MESA_FORMAT_XRGB8888_REV) { + radeonInitPointers_BGRx8888(&rrb->base); + } else if (rrb->base.Format == MESA_FORMAT_ARGB8888) { radeonInitPointers_ARGB8888(&rrb->base); - } else if (rrb->base._ActualFormat == GL_RGBA4) { + } else if (rrb->base.Format == MESA_FORMAT_ARGB8888_REV) { + radeonInitPointers_BGRA8888(&rrb->base); + } else if (rrb->base.Format == MESA_FORMAT_ARGB4444) { radeonInitPointers_ARGB4444(&rrb->base); - } else if (rrb->base._ActualFormat == GL_RGB5_A1) { + } else if (rrb->base.Format == MESA_FORMAT_ARGB4444_REV) { + radeonInitPointers_ARGB4444_REV(&rrb->base); + } else if (rrb->base.Format == MESA_FORMAT_ARGB1555) { radeonInitPointers_ARGB1555(&rrb->base); - } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) { + } else if (rrb->base.Format == MESA_FORMAT_ARGB1555_REV) { + radeonInitPointers_ARGB1555_REV(&rrb->base); + } else if (rrb->base.Format == MESA_FORMAT_Z16) { radeonInitDepthPointers_z16(&rrb->base); - } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) { + } else if (rrb->base.Format == MESA_FORMAT_X8_Z24) { radeonInitDepthPointers_z24(&rrb->base); - } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) { - radeonInitDepthPointers_z24_s8(&rrb->base); - } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) { - radeonInitStencilPointers_z24_s8(&rrb->base); + } else if (rrb->base.Format == MESA_FORMAT_S8_Z24) { + radeonInitDepthPointers_s8_z24(&rrb->base); + } else if (rrb->base.Format == MESA_FORMAT_S8) { + radeonInitStencilPointers_s8_z24(&rrb->base); } else { - fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat); + fprintf(stderr, "radeonSetSpanFunctions: bad format: 0x%04X\n", rrb->base.Format); } } diff --git a/radeon/radeon_state.c b/radeon/radeon_state.c index 4d0d35e..f6c733a 100644 --- a/radeon/radeon_state.c +++ b/radeon/radeon_state.c @@ -550,6 +550,31 @@ static void radeonPolygonOffset( GLcontext *ctx, rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32; } +static void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask ) +{ + r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint i; + drm_radeon_stipple_t stipple; + + /* Must flip pattern upside down. + */ + for ( i = 0 ; i < 32 ; i++ ) { + rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i]; + } + + /* TODO: push this into cmd mechanism + */ + radeon_firevertices(&rmesa->radeon); + LOCK_HARDWARE( &rmesa->radeon ); + + /* FIXME: Use window x,y offsets into stipple RAM. + */ + stipple.mask = rmesa->state.stipple.mask; + drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, + &stipple, sizeof(drm_radeon_stipple_t) ); + UNLOCK_HARDWARE( &rmesa->radeon ); +} + static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); diff --git a/radeon/radeon_state_init.c b/radeon/radeon_state_init.c index f3ad0dd..dd82888 100644 --- a/radeon/radeon_state_init.c +++ b/radeon/radeon_state_init.c @@ -440,16 +440,18 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); if (rrb->cpp == 4) atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; - else switch (rrb->base._ActualFormat) { - case GL_RGB5: + else switch (rrb->base.Format) { + case MESA_FORMAT_RGB565: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; break; - case GL_RGBA4: + case MESA_FORMAT_ARGB4444: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444; break; - case GL_RGB5_A1: + case MESA_FORMAT_ARGB1555: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555; break; + default: + _mesa_problem(ctx, "unexpected format in ctx_emit_cs()"); } cbpitch = (rrb->pitch / rrb->cpp); @@ -643,11 +645,11 @@ static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) OUT_BATCH(CP_PACKET0(RADEON_PP_TXOFFSET_0 + (24 * i), 0)); if (t->mt && !t->image_override) { if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) { - lvl = &t->mt->levels[0]; + lvl = &t->mt->levels[t->minLod]; OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); } else { - OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t), RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); } } else { diff --git a/radeon/radeon_tex.c b/radeon/radeon_tex.c index 99865ff..749ab75 100644 --- a/radeon/radeon_tex.c +++ b/radeon/radeon_tex.c @@ -38,7 +38,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/enums.h" #include "main/image.h" #include "main/simple_list.h" -#include "main/texformat.h" #include "main/texstore.h" #include "main/teximage.h" #include "main/texobj.h" @@ -349,17 +348,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, case GL_TEXTURE_MAX_LEVEL: case GL_TEXTURE_MIN_LOD: case GL_TEXTURE_MAX_LOD: - - /* This isn't the most efficient solution but there doesn't appear to - * be a nice alternative. Since there's no LOD clamping, - * we just have to rely on loading the right subset of mipmap levels - * to simulate a clamped LOD. - */ - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = 0; - t->validated = GL_FALSE; - } + t->validated = GL_FALSE; break; default: @@ -389,10 +378,8 @@ static void radeonDeleteTexture( GLcontext *ctx, } } - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = 0; - } + radeon_miptree_unreference(&t->mt); + /* Free mipmap images and the texture object itself */ _mesa_delete_texture_object(ctx, texObj); } diff --git a/radeon/radeon_texstate.c b/radeon/radeon_texstate.c index 9d252aa..3cbe3b4 100644 --- a/radeon/radeon_texstate.c +++ b/radeon/radeon_texstate.c @@ -38,8 +38,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/colormac.h" #include "main/context.h" #include "main/macros.h" -#include "main/texformat.h" #include "main/teximage.h" +#include "main/texstate.h" #include "main/texobj.h" #include "main/enums.h" @@ -81,8 +81,10 @@ struct tx_table { GLuint format, filter; }; +/* XXX verify this table against MESA_FORMAT_x values */ static const struct tx_table tx_table[] = { + _INVALID(NONE), /* MESA_FORMAT_NONE */ _ALPHA(RGBA8888), _ALPHA_REV(RGBA8888), _ALPHA(ARGB8888), @@ -660,7 +662,7 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_ rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; - texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; + texUnit = _mesa_get_current_tex_unit(radeon->glCtx); texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target); texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0); @@ -697,20 +699,14 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_ radeon_bo_unref(rImage->bo); rImage->bo = NULL; } - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = NULL; - } - if (rImage->mt) { - radeon_miptree_unreference(rImage->mt); - rImage->mt = NULL; - } + + radeon_miptree_unreference(&t->mt); + radeon_miptree_unreference(&rImage->mt); + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, rb->base.Width, rb->base.Height, 1, 0, rb->cpp); texImage->RowStride = rb->pitch / rb->cpp; - texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx, - internalFormat, - type, format, 0); + rImage->bo = rb->bo; radeon_bo_ref(rImage->bo); t->bo = rb->bo; @@ -718,8 +714,6 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_ t->tile_bits = 0; t->image_override = GL_TRUE; t->override_offset = 0; - t->pp_txpitch &= (1 << 13) -1; - pitch_val = rb->pitch; switch (rb->cpp) { case 4: if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) @@ -738,12 +732,17 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_ t->pp_txfilter |= tx_table[MESA_FORMAT_RGB565].filter; break; } - t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT) - | ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT); - t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2; - t->pp_txpitch = pitch_val; - t->pp_txpitch -= 32; + t->pp_txpitch &= (1 << 13) -1; + pitch_val = rb->pitch; + + t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT) + | ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT); + if (target == GL_TEXTURE_RECTANGLE_NV) { + t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2; + t->pp_txpitch = pitch_val; + t->pp_txpitch -= 32; + } t->validated = GL_TRUE; _mesa_unlock_texture(radeon->glCtx, texObj); return; @@ -833,11 +832,14 @@ static void import_tex_obj_state( r100ContextPtr rmesa, cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK; cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; - if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) { - GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] ); + if (texobj->pp_txformat & RADEON_TXFORMAT_NON_POWER2) { + uint32_t *txr_cmd = &rmesa->hw.txr[unit].cmd[TXR_CMD_0]; txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */ txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */ - RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.txr[unit] ); + RADEON_STATECHANGE( rmesa, txr[unit] ); + } + + if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) { se_coord_fmt |= RADEON_VTX_ST0_NONPARAMETRIC << unit; } else { @@ -1018,7 +1020,7 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int return GL_TRUE; } - firstImage = t->base.Image[0][t->mt->firstLevel]; + firstImage = t->base.Image[0][t->minLod]; if (firstImage->Border > 0) { fprintf(stderr, "%s: border\n", __FUNCTION__); @@ -1028,27 +1030,27 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int log2Width = firstImage->WidthLog2; log2Height = firstImage->HeightLog2; log2Depth = firstImage->DepthLog2; - texelBytes = firstImage->TexFormat->TexelBytes; + texelBytes = _mesa_get_format_bytes(firstImage->TexFormat); if (!t->image_override) { - if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) { + if (VALID_FORMAT(firstImage->TexFormat)) { const struct tx_table *table = tx_table; t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK | RADEON_TXFORMAT_ALPHA_IN_MAP); t->pp_txfilter &= ~RADEON_YUV_TO_RGB; - t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format; - t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter; + t->pp_txformat |= table[ firstImage->TexFormat ].format; + t->pp_txfilter |= table[ firstImage->TexFormat ].filter; } else { _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); return GL_FALSE; } } - + t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK; - t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << RADEON_MAX_MIP_LEVEL_SHIFT; + t->pp_txfilter |= (t->maxLod - t->minLod) << RADEON_MAX_MIP_LEVEL_SHIFT; t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK | RADEON_TXFORMAT_HEIGHT_MASK | @@ -1057,9 +1059,9 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int RADEON_TXFORMAT_F5_HEIGHT_MASK); t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) | (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT)); - + t->tile_bits = 0; - + if (t->base.Target == GL_TEXTURE_CUBE_MAP) { ASSERT(log2Width == log2Height); t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) | @@ -1080,7 +1082,7 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT)); if ( !t->image_override ) { - if (firstImage->IsCompressed) + if (_mesa_is_format_compressed(firstImage->TexFormat)) t->pp_txpitch = (firstImage->Width + 63) & ~(63); else t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63); @@ -1114,7 +1116,6 @@ static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_objec RADEON_STATECHANGE( rmesa, ctx ); rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit; - RADEON_STATECHANGE( rmesa, tcl ); rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit); diff --git a/radeon/radeon_texture.c b/radeon/radeon_texture.c index fad3d1c..0317811 100644 --- a/radeon/radeon_texture.c +++ b/radeon/radeon_texture.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2009 Maciej Cencora. * Copyright (C) 2008 Nicolai Haehnle. * Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. * @@ -34,7 +35,6 @@ #include "main/convolve.h" #include "main/mipmap.h" #include "main/texcompress.h" -#include "main/texformat.h" #include "main/texstore.h" #include "main/teximage.h" #include "main/texobj.h" @@ -47,7 +47,7 @@ #include "radeon_mipmap_tree.h" -static void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride, +void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride, GLuint numrows, GLuint rowsize) { assert(rowsize <= dststride); @@ -82,8 +82,7 @@ void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage) radeon_texture_image* image = get_radeon_texture_image(timage); if (image->mt) { - radeon_miptree_unreference(image->mt); - image->mt = 0; + radeon_miptree_unreference(&image->mt); assert(!image->base.Data); } else { _mesa_free_texture_image_data(ctx, timage); @@ -101,10 +100,15 @@ void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage) /* Set Data pointer and additional data for mapped texture image */ static void teximage_set_map_data(radeon_texture_image *image) { - radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel]; + radeon_mipmap_level *lvl; + + if (!image->mt) + return; + + lvl = &image->mt->levels[image->mtlevel]; image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset; - image->base.RowStride = lvl->rowstride / image->mt->bpp; + image->base.RowStride = lvl->rowstride / _mesa_get_format_bytes(image->base.TexFormat); } @@ -139,7 +143,6 @@ static void map_override(GLcontext *ctx, radeonTexObj *t) radeon_bo_map(t->bo, GL_FALSE); img->base.Data = t->bo->ptr; - _mesa_set_fetch_functions(&img->base, 2); } static void unmap_override(GLcontext *ctx, radeonTexObj *t) @@ -171,7 +174,7 @@ void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj) radeon_bo_map(t->mt->bo, GL_FALSE); for(face = 0; face < t->mt->faces; ++face) { - for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) + for(level = t->minLod; level <= t->maxLod; ++level) teximage_set_map_data(get_radeon_texture_image(texObj->Image[face][level])); } } @@ -188,7 +191,7 @@ void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj) return; for(face = 0; face < t->mt->faces; ++face) { - for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) + for(level = t->minLod; level <= t->maxLod; ++level) texObj->Image[face][level]->Data = 0; } radeon_bo_unmap(t->mt->bo); @@ -237,8 +240,7 @@ static void radeon_generate_mipmap(GLcontext *ctx, GLenum target, image->mtlevel = i; image->mtface = face; - radeon_miptree_unreference(image->mt); - image->mt = NULL; + radeon_miptree_unreference(&image->mt); } } @@ -256,9 +258,9 @@ void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_objec /* try to find a format which will only need a memcopy */ -static const struct gl_texture_format *radeonChoose8888TexFormat(radeonContextPtr rmesa, - GLenum srcFormat, - GLenum srcType, GLboolean fbo) +static gl_format radeonChoose8888TexFormat(radeonContextPtr rmesa, + GLenum srcFormat, + GLenum srcType, GLboolean fbo) { const GLuint ui = 1; const GLubyte littleEndian = *((const GLubyte *)&ui); @@ -271,37 +273,37 @@ static const struct gl_texture_format *radeonChoose8888TexFormat(radeonContextPt (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) { - return &_mesa_texformat_rgba8888; + return MESA_FORMAT_RGBA8888; } else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) { - return &_mesa_texformat_rgba8888_rev; + return MESA_FORMAT_RGBA8888_REV; } else if (IS_R200_CLASS(rmesa->radeonScreen)) { return _dri_texformat_argb8888; } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) || srcType == GL_UNSIGNED_INT_8_8_8_8)) { - return &_mesa_texformat_argb8888_rev; + return MESA_FORMAT_ARGB8888_REV; } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) || srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) { - return &_mesa_texformat_argb8888; + return MESA_FORMAT_ARGB8888; } else return _dri_texformat_argb8888; } -const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx, - GLint internalFormat, - GLenum format, - GLenum type) +gl_format radeonChooseTextureFormat_mesa(GLcontext * ctx, + GLint internalFormat, + GLenum format, + GLenum type) { return radeonChooseTextureFormat(ctx, internalFormat, format, type, 0); } -const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx, - GLint internalFormat, - GLenum format, - GLenum type, GLboolean fbo) +gl_format radeonChooseTextureFormat(GLcontext * ctx, + GLint internalFormat, + GLenum format, + GLenum type, GLboolean fbo) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); const GLboolean do32bpt = @@ -425,58 +427,72 @@ const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx, case GL_YCBCR_MESA: if (type == GL_UNSIGNED_SHORT_8_8_APPLE || type == GL_UNSIGNED_BYTE) - return &_mesa_texformat_ycbcr; + return MESA_FORMAT_YCBCR; else - return &_mesa_texformat_ycbcr_rev; + return MESA_FORMAT_YCBCR_REV; case GL_RGB_S3TC: case GL_RGB4_S3TC: case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; + return MESA_FORMAT_RGB_DXT1; case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - return &_mesa_texformat_rgba_dxt1; + return MESA_FORMAT_RGBA_DXT1; case GL_RGBA_S3TC: case GL_RGBA4_S3TC: case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - return &_mesa_texformat_rgba_dxt3; + return MESA_FORMAT_RGBA_DXT3; case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return &_mesa_texformat_rgba_dxt5; + return MESA_FORMAT_RGBA_DXT5; case GL_ALPHA16F_ARB: - return &_mesa_texformat_alpha_float16; + return MESA_FORMAT_ALPHA_FLOAT16; case GL_ALPHA32F_ARB: - return &_mesa_texformat_alpha_float32; + return MESA_FORMAT_ALPHA_FLOAT32; case GL_LUMINANCE16F_ARB: - return &_mesa_texformat_luminance_float16; + return MESA_FORMAT_LUMINANCE_FLOAT16; case GL_LUMINANCE32F_ARB: - return &_mesa_texformat_luminance_float32; + return MESA_FORMAT_LUMINANCE_FLOAT32; case GL_LUMINANCE_ALPHA16F_ARB: - return &_mesa_texformat_luminance_alpha_float16; + return MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16; case GL_LUMINANCE_ALPHA32F_ARB: - return &_mesa_texformat_luminance_alpha_float32; + return MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32; case GL_INTENSITY16F_ARB: - return &_mesa_texformat_intensity_float16; + return MESA_FORMAT_INTENSITY_FLOAT16; case GL_INTENSITY32F_ARB: - return &_mesa_texformat_intensity_float32; + return MESA_FORMAT_INTENSITY_FLOAT32; case GL_RGB16F_ARB: - return &_mesa_texformat_rgba_float16; + return MESA_FORMAT_RGBA_FLOAT16; case GL_RGB32F_ARB: - return &_mesa_texformat_rgba_float32; + return MESA_FORMAT_RGBA_FLOAT32; case GL_RGBA16F_ARB: - return &_mesa_texformat_rgba_float16; + return MESA_FORMAT_RGBA_FLOAT16; case GL_RGBA32F_ARB: - return &_mesa_texformat_rgba_float32; + return MESA_FORMAT_RGBA_FLOAT32; +#ifdef RADEON_R300 + case GL_DEPTH_COMPONENT: + case GL_DEPTH_COMPONENT16: + return MESA_FORMAT_Z16; + case GL_DEPTH_COMPONENT24: + case GL_DEPTH_COMPONENT32: + case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH24_STENCIL8_EXT: + if (rmesa->radeonScreen->chip_family >= CHIP_FAMILY_RV515) + return MESA_FORMAT_S8_Z24; + else + return MESA_FORMAT_Z16; +#else case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT16: case GL_DEPTH_COMPONENT24: case GL_DEPTH_COMPONENT32: case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: - return &_mesa_texformat_s8_z24; + return MESA_FORMAT_S8_Z24; +#endif /* EXT_texture_sRGB */ case GL_SRGB: @@ -485,26 +501,193 @@ const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx, case GL_SRGB8_ALPHA8: case GL_COMPRESSED_SRGB: case GL_COMPRESSED_SRGB_ALPHA: - return &_mesa_texformat_srgba8; + return MESA_FORMAT_SRGBA8; case GL_SLUMINANCE: case GL_SLUMINANCE8: case GL_COMPRESSED_SLUMINANCE: - return &_mesa_texformat_sl8; + return MESA_FORMAT_SL8; case GL_SLUMINANCE_ALPHA: case GL_SLUMINANCE8_ALPHA8: case GL_COMPRESSED_SLUMINANCE_ALPHA: - return &_mesa_texformat_sla8; + return MESA_FORMAT_SLA8; default: _mesa_problem(ctx, "unexpected internalFormat 0x%x in %s", (int)internalFormat, __func__); + return MESA_FORMAT_NONE; + } + + return MESA_FORMAT_NONE; /* never get here */ +} + +/** Check if given image is valid within current texture object. + */ +static int image_matches_texture_obj(struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + unsigned level) +{ + const struct gl_texture_image *baseImage = texObj->Image[0][texObj->BaseLevel]; + + if (!baseImage) + return 0; + + if (level < texObj->BaseLevel || level > texObj->MaxLevel) + return 0; + + const unsigned levelDiff = level - texObj->BaseLevel; + const unsigned refWidth = MAX2(baseImage->Width >> levelDiff, 1); + const unsigned refHeight = MAX2(baseImage->Height >> levelDiff, 1); + const unsigned refDepth = MAX2(baseImage->Depth >> levelDiff, 1); + + return (texImage->Width == refWidth && + texImage->Height == refHeight && + texImage->Depth == refDepth); +} + +static void teximage_assign_miptree(radeonContextPtr rmesa, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + unsigned face, + unsigned level) +{ + radeonTexObj *t = radeon_tex_obj(texObj); + radeon_texture_image* image = get_radeon_texture_image(texImage); + + /* Since miptree holds only images for levels <BaseLevel..MaxLevel> + * don't allocate the miptree if the teximage won't fit. + */ + if (!image_matches_texture_obj(texObj, texImage, level)) + return; + + /* Try using current miptree, or create new if there isn't any */ + if (!t->mt || !radeon_miptree_matches_image(t->mt, texImage, face, level)) { + radeon_miptree_unreference(&t->mt); + radeon_try_alloc_miptree(rmesa, t); + if (RADEON_DEBUG & RADEON_TEXTURE) { + fprintf(stderr, "%s: texObj %p, texImage %p, face %d, level %d, " + "texObj miptree doesn't match, allocated new miptree %p\n", + __FUNCTION__, texObj, texImage, face, level, t->mt); + } + } + + /* Miptree alocation may have failed, + * when there was no image for baselevel specified */ + if (t->mt) { + image->mtface = face; + image->mtlevel = level; + radeon_miptree_reference(t->mt, &image->mt); + } +} + +static GLuint * allocate_image_offsets(GLcontext *ctx, + unsigned alignedWidth, + unsigned height, + unsigned depth) +{ + int i; + GLuint *offsets; + + offsets = _mesa_malloc(depth * sizeof(GLuint)) ; + if (!offsets) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTex[Sub]Image"); return NULL; } - return NULL; /* never get here */ + for (i = 0; i < depth; ++i) { + offsets[i] = alignedWidth * height * i; + } + + return offsets; +} + +/** + * Update a subregion of the given texture image. + */ +static void radeon_store_teximage(GLcontext* ctx, int dims, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLsizei imageSize, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + int compressed) +{ + radeonTexObj *t = radeon_tex_obj(texObj); + radeon_texture_image* image = get_radeon_texture_image(texImage); + + GLuint dstRowStride; + GLuint *dstImageOffsets; + + if (image->mt) { + dstRowStride = image->mt->levels[image->mtlevel].rowstride; + } else if (t->bo) { + /* TFP case */ + /* TODO */ + assert(0); + } else { + dstRowStride = _mesa_format_row_stride(texImage->TexFormat, texImage->Width); + } + + assert(dstRowStride); + + if (dims == 3) { + unsigned alignedWidth = dstRowStride/_mesa_get_format_bytes(texImage->TexFormat); + dstImageOffsets = allocate_image_offsets(ctx, alignedWidth, texImage->Height, texImage->Depth); + if (!dstImageOffsets) { + return; + } + } else { + dstImageOffsets = texImage->ImageOffsets; + } + + radeon_teximage_map(image, GL_TRUE); + + if (compressed) { + uint32_t srcRowStride, bytesPerRow, rows, block_width, block_height; + GLubyte *img_start; + + _mesa_get_format_block_size(texImage->TexFormat, &block_width, &block_height); + + if (!image->mt) { + dstRowStride = _mesa_format_row_stride(texImage->TexFormat, texImage->Width); + img_start = _mesa_compressed_image_address(xoffset, yoffset, 0, + texImage->TexFormat, + texImage->Width, texImage->Data); + } + else { + uint32_t offset; + offset = dstRowStride / _mesa_get_format_bytes(texImage->TexFormat) * yoffset / block_height + xoffset / block_width; + offset *= _mesa_get_format_bytes(texImage->TexFormat); + img_start = texImage->Data + offset; + } + srcRowStride = _mesa_format_row_stride(texImage->TexFormat, width); + bytesPerRow = srcRowStride; + rows = (height + block_height - 1) / block_height; + + copy_rows(img_start, dstRowStride, pixels, srcRowStride, rows, bytesPerRow); + } + else { + if (!_mesa_texstore(ctx, dims, texImage->_BaseFormat, + texImage->TexFormat, texImage->Data, + xoffset, yoffset, zoffset, + dstRowStride, + dstImageOffsets, + width, height, depth, + format, type, pixels, packing)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage"); + } + } + + if (dims == 3) { + _mesa_free(dstImageOffsets); + } + + radeon_teximage_unmap(image); } /** @@ -525,13 +708,22 @@ static void radeon_teximage( radeonContextPtr rmesa = RADEON_CONTEXT(ctx); radeonTexObj* t = radeon_tex_obj(texObj); radeon_texture_image* image = get_radeon_texture_image(texImage); - GLuint dstRowStride; GLint postConvWidth = width; GLint postConvHeight = height; - GLuint texelBytes; GLuint face = radeon_face_for_target(target); - radeon_firevertices(rmesa); + { + struct radeon_bo *bo; + bo = !image->mt ? image->bo : image->mt->bo; + if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) { + radeon_firevertices(rmesa); + } + } + + if (RADEON_DEBUG & RADEON_TEXTURE) { + fprintf(stderr, "radeon_teximage%dd: texObj %p, texImage %p, face %d, level %d\n", + dims, texObj, texImage, face, level); + } t->validated = GL_FALSE; @@ -540,62 +732,35 @@ static void radeon_teximage( &postConvHeight); } - /* Choose and fill in the texture format for this image */ - texImage->TexFormat = radeonChooseTextureFormat(ctx, internalFormat, format, type, 0); - _mesa_set_fetch_functions(texImage, dims); - - if (texImage->TexFormat->TexelBytes == 0) { - texelBytes = 0; - texImage->IsCompressed = GL_TRUE; - texImage->CompressedSize = - ctx->Driver.CompressedTextureSize(ctx, texImage->Width, - texImage->Height, texImage->Depth, - texImage->TexFormat->MesaFormat); - } else { - texImage->IsCompressed = GL_FALSE; - texImage->CompressedSize = 0; - - texelBytes = texImage->TexFormat->TexelBytes; + if (!_mesa_is_format_compressed(texImage->TexFormat)) { + GLuint texelBytes = _mesa_get_format_bytes(texImage->TexFormat); /* Minimum pitch of 32 bytes */ if (postConvWidth * texelBytes < 32) { - postConvWidth = 32 / texelBytes; - texImage->RowStride = postConvWidth; + postConvWidth = 32 / texelBytes; + texImage->RowStride = postConvWidth; } - if (!image->mt) { + if (!image->mt) { assert(texImage->RowStride == postConvWidth); } } - /* Allocate memory for image */ - radeonFreeTexImageData(ctx, texImage); /* Mesa core only clears texImage->Data but not image->mt */ - - if (t->mt && - t->mt->firstLevel == level && - t->mt->lastLevel == level && - t->mt->target != GL_TEXTURE_CUBE_MAP_ARB && - !radeon_miptree_matches_image(t->mt, texImage, face, level)) { - radeon_miptree_unreference(t->mt); - t->mt = NULL; - } - - if (!t->mt) - radeon_try_alloc_miptree(rmesa, t, image, face, level); - if (t->mt && radeon_miptree_matches_image(t->mt, texImage, face, level)) { - radeon_mipmap_level *lvl; - image->mt = t->mt; - image->mtlevel = level - t->mt->firstLevel; - image->mtface = face; - radeon_miptree_reference(t->mt); - lvl = &image->mt->levels[image->mtlevel]; - dstRowStride = lvl->rowstride; - } else { - int size; - if (texImage->IsCompressed) { - size = texImage->CompressedSize; - } else { - size = texImage->Width * texImage->Height * texImage->Depth * texImage->TexFormat->TexelBytes; + /* Mesa core only clears texImage->Data but not image->mt */ + radeonFreeTexImageData(ctx, texImage); + + if (!t->bo) { + teximage_assign_miptree(rmesa, texObj, texImage, face, level); + if (!image->mt) { + int size = _mesa_format_image_size(texImage->TexFormat, + texImage->Width, + texImage->Height, + texImage->Depth); + texImage->Data = _mesa_alloc_texmemory(size); + if (RADEON_DEBUG & RADEON_TEXTURE) { + fprintf(stderr, "radeon_teximage%dd: texObj %p, texImage %p, " + " no miptree assigned, using local memory %p\n", + dims, texObj, texImage, texImage->Data); + } } - texImage->Data = _mesa_alloc_texmemory(size); } /* Upload texture image; note that the spec allows pixels to be NULL */ @@ -609,69 +774,16 @@ static void radeon_teximage( } if (pixels) { - radeon_teximage_map(image, GL_TRUE); - if (compressed) { - if (image->mt) { - uint32_t srcRowStride, bytesPerRow, rows; - srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); - bytesPerRow = srcRowStride; - rows = (height + 3) / 4; - copy_rows(texImage->Data, image->mt->levels[level].rowstride, - pixels, srcRowStride, rows, bytesPerRow); - } else { - memcpy(texImage->Data, pixels, imageSize); - } - } else { - GLuint dstRowStride; - GLuint *dstImageOffsets; - - if (image->mt) { - radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel]; - dstRowStride = lvl->rowstride; - } else { - dstRowStride = texImage->Width * texImage->TexFormat->TexelBytes; - } - - if (dims == 3) { - int i; - - dstImageOffsets = _mesa_malloc(depth * sizeof(GLuint)) ; - if (!dstImageOffsets) - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); - - for (i = 0; i < depth; ++i) { - dstImageOffsets[i] = dstRowStride/texImage->TexFormat->TexelBytes * height * i; - } - } else { - dstImageOffsets = texImage->ImageOffsets; - } - - if (!texImage->TexFormat->StoreImage(ctx, dims, - texImage->_BaseFormat, - texImage->TexFormat, - texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */ - dstRowStride, - dstImageOffsets, - width, height, depth, - format, type, pixels, packing)) - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); - - if (dims == 3) - _mesa_free(dstImageOffsets); - } - - /* SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - radeon_generate_mipmap(ctx, target, texObj); - } + radeon_store_teximage(ctx, dims, + 0, 0, 0, + width, height, depth, + imageSize, format, type, + pixels, packing, + texObj, texImage, + compressed); } _mesa_unmap_teximage_pbo(ctx, packing); - - if (pixels) - radeon_teximage_unmap(image); - - } void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level, @@ -724,7 +836,7 @@ void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level, } /** - * Update a subregion of the given texture image. + * All glTexSubImage calls go through this function. */ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int level, GLint xoffset, GLint yoffset, GLint zoffset, @@ -741,69 +853,39 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int leve radeonTexObj* t = radeon_tex_obj(texObj); radeon_texture_image* image = get_radeon_texture_image(texImage); - radeon_firevertices(rmesa); + { + struct radeon_bo *bo; + bo = !image->mt ? image->bo : image->mt->bo; + if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) { + radeon_firevertices(rmesa); + } + } + + if (RADEON_DEBUG & RADEON_TEXTURE) { + fprintf(stderr, "radeon_texsubimage%dd: texObj %p, texImage %p, face %d, level %d\n", + dims, texObj, texImage, radeon_face_for_target(target), level); + } t->validated = GL_FALSE; if (compressed) { pixels = _mesa_validate_pbo_compressed_teximage( - ctx, imageSize, pixels, packing, "glCompressedTexImage"); + ctx, imageSize, pixels, packing, "glCompressedTexSubImage"); } else { pixels = _mesa_validate_pbo_teximage(ctx, dims, - width, height, depth, format, type, pixels, packing, "glTexSubImage1D"); + width, height, depth, format, type, pixels, packing, "glTexSubImage"); } if (pixels) { - GLint dstRowStride; - radeon_teximage_map(image, GL_TRUE); - - if (image->mt) { - radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel]; - dstRowStride = lvl->rowstride; - } else { - dstRowStride = texImage->RowStride * texImage->TexFormat->TexelBytes; - } - - if (compressed) { - uint32_t srcRowStride, bytesPerRow, rows; - GLubyte *img_start; - if (!image->mt) { - dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width); - img_start = _mesa_compressed_image_address(xoffset, yoffset, 0, - texImage->TexFormat->MesaFormat, - texImage->Width, texImage->Data); - } - else { - uint32_t blocks_x = dstRowStride / (image->mt->bpp * 4); - img_start = texImage->Data + image->mt->bpp * 4 * (blocks_x * (yoffset / 4) + xoffset / 4); - } - srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); - bytesPerRow = srcRowStride; - rows = (height + 3) / 4; - - copy_rows(img_start, dstRowStride, pixels, srcRowStride, rows, bytesPerRow); - - } else { - if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, - texImage->TexFormat, texImage->Data, - xoffset, yoffset, zoffset, - dstRowStride, - texImage->ImageOffsets, - width, height, depth, - format, type, pixels, packing)) - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage"); - } - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - radeon_generate_mipmap(ctx, target, texObj); - } + radeon_store_teximage(ctx, dims, + xoffset, yoffset, zoffset, + width, height, depth, + imageSize, format, type, + pixels, packing, + texObj, texImage, + compressed); } - radeon_teximage_unmap(image); - _mesa_unmap_teximage_pbo(ctx, packing); - - } void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, @@ -859,143 +941,6 @@ void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level, format, type, pixels, packing, texObj, texImage, 0); } - - -/** - * Ensure that the given image is stored in the given miptree from now on. - */ -static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_texture_image *image, int face, int level) -{ - radeon_mipmap_level *dstlvl = &mt->levels[level - mt->firstLevel]; - unsigned char *dest; - - assert(image->mt != mt); - assert(dstlvl->width == image->base.Width); - assert(dstlvl->height == image->base.Height); - assert(dstlvl->depth == image->base.Depth); - - - radeon_bo_map(mt->bo, GL_TRUE); - dest = mt->bo->ptr + dstlvl->faces[face].offset; - - if (image->mt) { - /* Format etc. should match, so we really just need a memcpy(). - * In fact, that memcpy() could be done by the hardware in many - * cases, provided that we have a proper memory manager. - */ - radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel-image->mt->firstLevel]; - - assert(srclvl->size == dstlvl->size); - assert(srclvl->rowstride == dstlvl->rowstride); - - radeon_bo_map(image->mt->bo, GL_FALSE); - - memcpy(dest, - image->mt->bo->ptr + srclvl->faces[face].offset, - dstlvl->size); - radeon_bo_unmap(image->mt->bo); - - radeon_miptree_unreference(image->mt); - } else { - uint32_t srcrowstride; - uint32_t height; - /* need to confirm this value is correct */ - if (mt->compressed) { - height = (image->base.Height + 3) / 4; - srcrowstride = _mesa_compressed_row_stride(image->base.TexFormat->MesaFormat, image->base.Width); - } else { - height = image->base.Height * image->base.Depth; - srcrowstride = image->base.Width * image->base.TexFormat->TexelBytes; - } - -// if (mt->tilebits) -// WARN_ONCE("%s: tiling not supported yet", __FUNCTION__); - - copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride, - height, srcrowstride); - - _mesa_free_texmemory(image->base.Data); - image->base.Data = 0; - } - - radeon_bo_unmap(mt->bo); - - image->mt = mt; - image->mtface = face; - image->mtlevel = level; - radeon_miptree_reference(image->mt); -} - -int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - radeonTexObj *t = radeon_tex_obj(texObj); - radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[0][texObj->BaseLevel]); - int face, level; - - if (t->validated || t->image_override) - return GL_TRUE; - - if (RADEON_DEBUG & RADEON_TEXTURE) - fprintf(stderr, "%s: Validating texture %p now\n", __FUNCTION__, texObj); - - if (baseimage->base.Border > 0) - return GL_FALSE; - - /* Ensure a matching miptree exists. - * - * Differing mipmap trees can result when the app uses TexImage to - * change texture dimensions. - * - * Prefer to use base image's miptree if it - * exists, since that most likely contains more valid data (remember - * that the base level is usually significantly larger than the rest - * of the miptree, so cubemaps are the only possible exception). - */ - if (baseimage->mt && - baseimage->mt != t->mt && - radeon_miptree_matches_texture(baseimage->mt, &t->base)) { - radeon_miptree_unreference(t->mt); - t->mt = baseimage->mt; - radeon_miptree_reference(t->mt); - } else if (t->mt && !radeon_miptree_matches_texture(t->mt, &t->base)) { - radeon_miptree_unreference(t->mt); - t->mt = 0; - } - - if (!t->mt) { - if (RADEON_DEBUG & RADEON_TEXTURE) - fprintf(stderr, " Allocate new miptree\n"); - radeon_try_alloc_miptree(rmesa, t, baseimage, 0, texObj->BaseLevel); - if (!t->mt) { - _mesa_problem(ctx, "radeon_validate_texture failed to alloc miptree"); - return GL_FALSE; - } - } - - /* Ensure all images are stored in the single main miptree */ - for(face = 0; face < t->mt->faces; ++face) { - for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) { - radeon_texture_image *image = get_radeon_texture_image(texObj->Image[face][level]); - if (RADEON_DEBUG & RADEON_TEXTURE) - fprintf(stderr, " face %i, level %i... %p vs %p ", face, level, t->mt, image->mt); - if (t->mt == image->mt) { - if (RADEON_DEBUG & RADEON_TEXTURE) - fprintf(stderr, "OK\n"); - - continue; - } - - if (RADEON_DEBUG & RADEON_TEXTURE) - fprintf(stderr, "migrating\n"); - migrate_image_to_miptree(t->mt, image, face, level); - } - } - - return GL_TRUE; -} - - /** * Need to map texture image into memory before copying image data, * then unmap it. diff --git a/radeon/radeon_texture.h b/radeon/radeon_texture.h index 888a55b..906daf1 100644 --- a/radeon/radeon_texture.h +++ b/radeon/radeon_texture.h @@ -30,6 +30,11 @@ #ifndef RADEON_TEXTURE_H #define RADEON_TEXTURE_H + +#include "main/formats.h" + +void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride, + GLuint numrows, GLuint rowsize); struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx); void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage); @@ -40,14 +45,16 @@ void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj); void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj); int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj); GLuint radeon_face_for_target(GLenum target); -const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx, - GLint internalFormat, - GLenum format, - GLenum type); -const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx, - GLint internalFormat, - GLenum format, - GLenum type, GLboolean fbo); + +gl_format radeonChooseTextureFormat_mesa(GLcontext * ctx, + GLint internalFormat, + GLenum format, + GLenum type); + +gl_format radeonChooseTextureFormat(GLcontext * ctx, + GLint internalFormat, + GLenum format, + GLenum type, GLboolean fbo); void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level, GLint internalFormat, |