diff options
author | Michel Dänzer <daenzer@vmware.com> | 2009-10-03 16:33:32 +0200 |
---|---|---|
committer | Michel Dänzer <daenzer@vmware.com> | 2009-10-03 16:33:32 +0200 |
commit | 4b4ce36081ca151c24e028c54b59986f41731a73 (patch) | |
tree | 06b15fc1c2acdaaf48a93254ba51b08a8d1ca63d | |
parent | eade1e5be159c9f2965d611925596d33cab11d6d (diff) |
R3/5xx EXA: Minimise number of draw primitives used for Composite operations.
This should reduce the kernel CS checker overhead, if nothing else.
I'll leave porting this to other chipset families to others who can test it.
-rw-r--r-- | src/radeon.h | 3 | ||||
-rw-r--r-- | src/radeon_exa_render.c | 46 |
2 files changed, 37 insertions, 12 deletions
diff --git a/src/radeon.h b/src/radeon.h index db7b26b..0322bf0 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -687,6 +687,9 @@ struct radeon_accel_state { /* Size of tiles ... set to 65536x65536 if not tiling in that direction */ Bool src_tile_width; Bool src_tile_height; + uint32_t *draw_header; + unsigned vtx_count; + unsigned num_vtx; Bool vsync; diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c index 6053eef..c266fb7 100644 --- a/src/radeon_exa_render.c +++ b/src/radeon_exa_render.c @@ -2041,6 +2041,16 @@ static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst) ENTER_DRAW(0); if (IS_R300_3D || IS_R500_3D) { + if (info->accel_state->draw_header) { + info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, + info->accel_state->num_vtx * + info->accel_state->vtx_count); + info->accel_state->draw_header[1] = RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | + RADEON_CP_VC_CNTL_PRIM_WALK_RING | + (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT); + info->accel_state->draw_header = NULL; + } + BEGIN_ACCEL(3); OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); @@ -2126,8 +2136,10 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn, /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ -#if defined(ACCEL_CP) && defined(XF86DRM_MODE) - if (info->cs && CS_FULL(info->cs)) { +#if defined(ACCEL_CP) + if ((info->cs && CS_FULL(info->cs)) || + (!info->cs && (info->cp->indirectBuffer->used + 4 * 32) > + info->cp->indirectBuffer->total)) { FUNC_NAME(RadeonDoneComposite)(info->accel_state->dst_pix); radeon_cs_flush_indirect(pScrn); info->accel_state->exa->PrepareComposite(info->accel_state->composite_op, @@ -2202,12 +2214,26 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn, RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); } else if (IS_R300_3D || IS_R500_3D) { - BEGIN_RING(4 * vtx_count + 4); - OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, - 4 * vtx_count)); - OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | - RADEON_CP_VC_CNTL_PRIM_WALK_RING | - (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); + if (!info->accel_state->draw_header) { + BEGIN_RING(2); + + if (info->cs) + info->accel_state->draw_header = info->cs->packets + info->cs->cdw; + else + info->accel_state->draw_header = __head; + info->accel_state->num_vtx = 0; + info->accel_state->vtx_count = vtx_count; + + OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, + 4 * vtx_count)); + OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | + RADEON_CP_VC_CNTL_PRIM_WALK_RING | + (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); + ADVANCE_RING(); + } + + info->accel_state->num_vtx += 4; + BEGIN_RING(4 * vtx_count); } else { BEGIN_RING(3 * vtx_count + 2); OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, @@ -2267,10 +2293,6 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn, xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0], xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]); } - if (IS_R300_3D || IS_R500_3D) - /* flushing is pipelined, free/finish is not */ - OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); - #ifdef ACCEL_CP ADVANCE_RING(); #else |