diff --git a/progs/demos/Makefile b/progs/demos/Makefile index 5b1d2a0..67bb854 100644 --- a/progs/demos/Makefile +++ b/progs/demos/Makefile @@ -19,6 +19,7 @@ PROGS = \ clearspd \ copypix \ cubemap \ + cubemap_compressed \ dinoshade \ dissolve \ drawpix \ diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 8986191..14f0ba5 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -305,13 +305,20 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache, "def_max_anisotropy"); - if ( sPriv->drm_version.major == 1 - && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) { + if /*( sPriv->drm_version.major == 1 + && */ (driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" )) { + + if (screen->kernel_mm) { + rmesa->using_hyperz = GL_TRUE; + + } else { + if ( sPriv->drm_version.minor < 13 ) fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " "disabling.\n", sPriv->drm_version.minor ); else rmesa->using_hyperz = GL_TRUE; + } } if ( sPriv->drm_version.minor >= 15 ) diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index a1b5057..d71ed32 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -54,6 +54,65 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R200_TIMEOUT 512 #define R200_IDLE_RETRY 16 +static void r200_hyper_clear(GLcontext *ctx, int flags) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + struct radeon_renderbuffer *rrb; + int depthpixperline; + uint32_t clearmask; + uint32_t depthclearvalue; + + rrb = radeon_get_depthbuffer(&r200->radeon); + if (!rrb) + return; + + depthpixperline = rrb->pitch / rrb->cpp; + depthclearvalue = r200->radeon.state.depth.clear; + + clearmask = 0; + + if (flags & RADEON_USE_HIERZ) + clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f; + + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHCLEARVALUE, 0)); + OUT_BATCH(depthclearvalue); + OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZMASKOFFSET, 0)); + OUT_BATCH(0); + OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); + OUT_BATCH(RADEON_RB3D_ZC_FLUSH_ALL); + END_BATCH(); + + if ((r200->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_HIERZ)) { + int tileoffset, nrtilesx, nrtilesy, j; + + tileoffset = 0; + nrtilesx = (rrb->base.Width >> 5); + nrtilesy = (rrb->base.Height >> 3); + for (j = 0; j < nrtilesy; j++) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH(CP_PACKET3(RADEON_CP_PACKET3_3D_CLEAR_ZMASK, 2)); + OUT_BATCH(tileoffset * 16); + OUT_BATCH(nrtilesx + 1); + OUT_BATCH(clearmask); + END_BATCH(); + tileoffset += depthpixperline >> 5; + } + + if (flags & RADEON_USE_HIERZ) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH(CP_PACKET3(RADEON_CP_PACKET3_3D_CLEAR_HIZ, 2)); + OUT_BATCH(0x0); /* First tile */ + OUT_BATCH(0x3cc0); + OUT_BATCH((0xff<<22)|(0xff<<6)| 0x003f003f); /* clearmask */ + END_BATCH(); + } + + } +} + + static void r200KernelClear(GLcontext *ctx, GLuint flags) { r200ContextPtr rmesa = R200_CONTEXT(ctx); @@ -233,21 +292,56 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) if ( !flags ) return; +/* hyper-z experiment */ if (rmesa->using_hyperz) { - flags |= RADEON_USE_COMP_ZBUF; +// flags |= RADEON_USE_COMP_ZBUF; /* if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) - flags |= RADEON_USE_HIERZ; */ - if (!((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && - ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) { - flags |= RADEON_CLEAR_FASTZ; - } - } - - if (rmesa->radeon.radeonScreen->kernel_mm) +// flags |= RADEON_USE_HIERZ; */ +// if (!((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && +// ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) { +// flags |= RADEON_CLEAR_FASTZ; +// } + + int hw_stencil = 0; + struct radeon_renderbuffer *rrbStencil + = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); + + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) + flags |= RADEON_USE_HIERZ; + + flags |= RADEON_USE_COMP_ZBUF; + hw_stencil = (rrbStencil && rrbStencil->bo); + + /* want a depth clear and if we want a stencil clear + hw stencil + and write mask */ + + if (flags & RADEON_DEPTH) { + if (!hw_stencil) + flags |= RADEON_CLEAR_FASTZ; + else { + if ((flags & RADEON_STENCIL) && + ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK)) + flags |= RADEON_CLEAR_FASTZ; + } + } + + } + + /* if (rmesa->radeon.radeonScreen->kernel_mm) radeonUserClear(ctx, orig_mask); else { r200KernelClear(ctx, flags); - rmesa->radeon.hw.all_dirty = GL_TRUE; + rmesa->radeon.hw.all_dirty = GL_TRUE; */ + if (rmesa->radeon.radeonScreen->kernel_mm) { + if (flags & RADEON_CLEAR_FASTZ) { + orig_mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); + /* clear depth using hyperz if we can */ + r200_hyper_clear(ctx, flags); + } + radeonUserClear(ctx, orig_mask); + } else { + r200KernelClear(ctx, flags); + rmesa->radeon.hw.all_dirty = GL_TRUE; } } diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index e06437b..b7750ba 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -555,6 +555,10 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) drb = radeon_get_depthbuffer(&r200->radeon); if (drb) { zbpitch = (drb->pitch / drb->cpp); +// r200 hyperz experiment + if (r200->using_hyperz) + zbpitch |= RADEON_DEPTH_HYPERZ; +// r200 hyperz exp end if (drb->cpp == 4) depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; else @@ -1266,8 +1270,8 @@ void r200InitState( r200ContextPtr rmesa ) if (rmesa->using_hyperz) { rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE | R200_Z_DECOMPRESSION_ENABLE; -/* if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) - rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/ +/* if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE; */ } rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 46a9cd5..c24d0fa 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -446,5 +446,6 @@ enum { #define RADEON_CHIPSET_TCL (1 << 2) /* tcl support - any radeon */ #define RADEON_CHIPSET_BROKEN_STENCIL (1 << 3) /* r100 stencil bug */ #define R200_CHIPSET_YCBCR_BROKEN (1 << 4) /* r200 ycbcr bug */ +#define RADEON_CHIPSET_HIERZ (1 << 5) /* r200 HierZ */ #endif /* _RADEON_CHIPSET_H */ diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index 475e93b..b39b2c5 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -241,11 +241,15 @@ r100CreateContext( const __GLcontextModes *glVisual, "def_max_anisotropy"); if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) { - if ( sPriv->drm_version.minor < 13 ) - fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " - "disabling.\n", sPriv->drm_version.minor ); - else - rmesa->using_hyperz = GL_TRUE; + if (screen->kernel_mm) { + rmesa->using_hyperz = GL_TRUE; + } else { + if ( sPriv->drm_version.minor < 13 ) + fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " + "disabling.\n", sPriv->drm_version.minor ); + else + rmesa->using_hyperz = GL_TRUE; + } } if ( sPriv->drm_version.minor >= 15 ) diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index db0e4f2..cf90c3e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -553,6 +553,51 @@ static void radeonKernelClear(GLcontext *ctx, GLuint flags) UNLOCK_HARDWARE( &rmesa->radeon ); } +static void r100_hyper_clear(GLcontext *ctx, int flags) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + struct radeon_renderbuffer *rrb; + int depthpixperline; + uint32_t clearmask; + uint32_t depthclearvalue; + + rrb = radeon_get_depthbuffer(&r100->radeon); + if (!rrb) + return; + + depthpixperline = rrb->pitch / rrb->cpp; + depthclearvalue = r100->radeon.state.depth.clear; + + clearmask = 0; + + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHCLEARVALUE, 0)); + OUT_BATCH(depthclearvalue); + OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZMASKOFFSET, 0)); + OUT_BATCH(0); + OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); + OUT_BATCH(RADEON_RB3D_ZC_FLUSH_ALL); + END_BATCH(); + + if ((r100->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_HIERZ)) { + int tileoffset, nrtilesx, nrtilesy, j; + + tileoffset = 0; + nrtilesx = (rrb->base.Width & ~63) >> 4; + nrtilesy = (rrb->base.Height >> 3); + for (j = 0; j < nrtilesy; j++) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH(CP_PACKET3(RADEON_CP_PACKET3_3D_CLEAR_ZMASK, 2)); + OUT_BATCH(tileoffset * 8); + OUT_BATCH(nrtilesx + 4); + OUT_BATCH(clearmask); + END_BATCH(); + tileoffset += depthpixperline >> 6; + } + } +} + static void radeonClear( GLcontext *ctx, GLbitfield mask ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); @@ -610,20 +655,38 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) return; if (rmesa->using_hyperz) { - flags |= RADEON_USE_COMP_ZBUF; -/* if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL) - flags |= RADEON_USE_HIERZ; */ - if (((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && - ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) { - flags |= RADEON_CLEAR_FASTZ; - } + int hw_stencil = 0; + struct radeon_renderbuffer *rrbStencil + = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); + + flags |= RADEON_USE_COMP_ZBUF; + + hw_stencil = (rrbStencil && rrbStencil->bo); + + /* want a depth clear and if we want a stencil clear + hw stencil + and write mask */ + + if (flags & RADEON_DEPTH) { + if (!hw_stencil) + flags |= RADEON_CLEAR_FASTZ; + else { + if ((flags & RADEON_STENCIL) && + ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK)) + flags |= RADEON_CLEAR_FASTZ; + } + } } - if (rmesa->radeon.radeonScreen->kernel_mm) - radeonUserClear(ctx, orig_mask); - else { - radeonKernelClear(ctx, flags); - rmesa->radeon.hw.all_dirty = GL_TRUE; + if (rmesa->radeon.radeonScreen->kernel_mm) { + if (flags & RADEON_CLEAR_FASTZ) { + orig_mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); + /* clear depth using hyperz if we can */ + r100_hyper_clear(ctx, flags); + } + radeonUserClear(ctx, orig_mask); + } else { + radeonKernelClear(ctx, flags); + rmesa->radeon.hw.all_dirty = GL_TRUE; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 93b6399..c95cfb8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -417,7 +417,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_RADEON_QG: /* all original radeons (7200) presumably have a stencil op bug */ screen->chip_family = CHIP_FAMILY_R100; - screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_BROKEN_STENCIL; + screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_BROKEN_STENCIL | RADEON_CHIPSET_HIERZ; break; case PCI_CHIP_RV200_QW: @@ -425,7 +425,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_RADEON_LW: case PCI_CHIP_RADEON_LX: screen->chip_family = CHIP_FAMILY_RV200; - screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_HIERZ; break; case PCI_CHIP_R200_BB: @@ -434,7 +434,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_R200_QL: case PCI_CHIP_R200_QM: screen->chip_family = CHIP_FAMILY_R200; - screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_HIERZ; break; case PCI_CHIP_RV250_If: @@ -454,7 +454,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_RV280_5C61: case PCI_CHIP_RV280_5C63: screen->chip_family = CHIP_FAMILY_RV280; - screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_HIERZ; break; case PCI_CHIP_RS300_5834: diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c index 91718a4..d576f4c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -457,6 +457,8 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) drb = radeon_get_depthbuffer(&r100->radeon); if (drb) { zbpitch = (drb->pitch / drb->cpp); + if (r100->using_hyperz) + zbpitch |= RADEON_DEPTH_HYPERZ; if (drb->cpp == 4) depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; else diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h index 1b33de1..ab80663 100644 --- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h +++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h @@ -1569,12 +1569,19 @@ # define RADEON_COLOR_ENDIAN_WORD_SWAP (1 << 18) # define RADEON_COLOR_ENDIAN_DWORD_SWAP (2 << 18) #define RADEON_RB3D_DEPTHOFFSET 0x1c24 +#define RADEON_RB3D_DEPTHCLEARVALUE 0x3230 #define RADEON_RB3D_DEPTHPITCH 0x1c28 # define RADEON_DEPTHPITCH_MASK 0x00001ff8 # define RADEON_DEPTH_HYPERZ (3 << 16) # define RADEON_DEPTH_ENDIAN_NO_SWAP (0 << 18) # define RADEON_DEPTH_ENDIAN_WORD_SWAP (1 << 18) # define RADEON_DEPTH_ENDIAN_DWORD_SWAP (2 << 18) +#define RADEON_RB3D_ZCACHE_MODE 0x3250 +#define RADEON_RB3D_ZCACHE_CTLSTAT 0x3254 +# define RADEON_RB3D_ZC_FLUSH (1 << 0) +# define RADEON_RB3D_ZC_FREE (1 << 2) +# define RADEON_RB3D_ZC_FLUSH_ALL 0x5 +# define RADEON_RB3D_ZC_BUSY (1 << 31) #define RADEON_RB3D_PLANEMASK 0x1d84 #define RADEON_RB3D_ROPCNTL 0x1d80 # define RADEON_ROP_MASK (15 << 8) @@ -1603,6 +1610,7 @@ # define RADEON_STENCIL_WRITE_MASK (0xff << 24) #define RADEON_RB3D_ZPASS_DATA 0x3290 #define RADEON_RB3D_ZPASS_ADDR 0x3294 +#define RADEON_RB3D_ZMASKOFFSET 0x3234 #define RADEON_RB3D_ZSTENCILCNTL 0x1c2c # define RADEON_DEPTH_FORMAT_MASK (0xf << 0) # define RADEON_DEPTH_FORMAT_16BIT_INT_Z (0 << 0) @@ -2059,6 +2067,8 @@ #define RADEON_CP_PACKET3_3D_DRAW_INDX 0xC0002A00 #define RADEON_CP_PACKET3_LOAD_PALETTE 0xC0002C00 #define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00 +#define RADEON_CP_PACKET3_3D_CLEAR_ZMASK 0xC0003200 +#define RADEON_CP_PACKET3_3D_CLEAR_HIZ 0xC0003700 #define R200_CP_CMD_3D_DRAW_VBUF_2 0xC0003400 #define R200_CP_CMD_3D_DRAW_IMMD_2 0xC0003500 #define R200_CP_CMD_3D_DRAW_INDX_2 0xC0003600