diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-04-02 12:59:34 +0200 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-04-02 12:59:34 +0200 |
commit | a198fe4074e8257c8c3e3a460c15ed108d8d07a8 (patch) | |
tree | cff7871908635eb82b860497c869c1f58143a22d | |
parent | fd026ff56899498375b748a36cc2eaed1158484e (diff) |
-rw-r--r-- | r200/r200_cmdbuf.c | 3 | ||||
-rw-r--r-- | r200/r200_tcl.c | 56 | ||||
-rw-r--r-- | r300/r300_cmdbuf.c | 42 | ||||
-rw-r--r-- | r300/r300_fragprog_common.c | 13 | ||||
-rw-r--r-- | r300/r300_vertprog.c | 30 | ||||
-rw-r--r-- | r600/r700_state.c | 2 | ||||
-rw-r--r-- | radeon/radeon_dma.c | 7 |
7 files changed, 107 insertions, 46 deletions
diff --git a/r200/r200_cmdbuf.c b/r200/r200_cmdbuf.c index 2f2b8d9..ad43a8c 100644 --- a/r200/r200_cmdbuf.c +++ b/r200/r200_cmdbuf.c @@ -189,7 +189,8 @@ void r200FlushElts(GLcontext *ctx) if (R200_ELT_BUF_SZ > elt_used) radeonReturnDmaRegion(&rmesa->radeon, R200_ELT_BUF_SZ - elt_used); - if (radeon_is_debug_enabled(RADEON_SYNC, RADEON_CRITICAL)) { + if (radeon_is_debug_enabled(RADEON_SYNC, RADEON_CRITICAL) + && !rmesa->radeon.radeonScreen->kernel_mm) { radeon_print(RADEON_SYNC, RADEON_NORMAL, "%s: Syncing\n", __FUNCTION__); radeonFinish( rmesa->radeon.glCtx ); } diff --git a/r200/r200_tcl.c b/r200/r200_tcl.c index f3f558b..d43e145 100644 --- a/r200/r200_tcl.c +++ b/r200/r200_tcl.c @@ -404,8 +404,9 @@ static GLuint r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev ) rendering code may decide convert to elts. In that case we have to make pessimistic prediction. and use larger of 2 paths. */ - const GLuint elts = ELTS_BUFSZ(nr_aos); - const GLuint index = INDEX_BUFSZ; + const GLuint elt_count =(VB->Primitive[i].count/GET_MAX_HW_ELTS() + 1); + const GLuint elts = ELTS_BUFSZ(nr_aos) * elt_count; + const GLuint index = INDEX_BUFSZ * elt_count; const GLuint vbuf = VBUF_BUFSZ; if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) || vbuf > index + elts) @@ -687,25 +688,34 @@ static char *getFallbackString(GLuint bit) void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) { - r200ContextPtr rmesa = R200_CONTEXT(ctx); - GLuint oldfallback = rmesa->radeon.TclFallback; - - if (mode) { - rmesa->radeon.TclFallback |= bit; - if (oldfallback == 0) { - if (R200_DEBUG & RADEON_FALLBACKS) - fprintf(stderr, "R200 begin tcl fallback %s\n", - getFallbackString( bit )); - transition_to_swtnl( ctx ); - } - } - else { - rmesa->radeon.TclFallback &= ~bit; - if (oldfallback == bit) { - if (R200_DEBUG & RADEON_FALLBACKS) - fprintf(stderr, "R200 end tcl fallback %s\n", - getFallbackString( bit )); - transition_to_hwtnl( ctx ); - } - } + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint oldfallback = rmesa->radeon.TclFallback; + + if (mode) { + if (oldfallback == 0) { + /* We have to flush before transition */ + if ( rmesa->radeon.dma.flush ) + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); + + if (R200_DEBUG & RADEON_FALLBACKS) + fprintf(stderr, "R200 begin tcl fallback %s\n", + getFallbackString( bit )); + rmesa->radeon.TclFallback |= bit; + transition_to_swtnl( ctx ); + } else + rmesa->radeon.TclFallback |= bit; + } else { + if (oldfallback == bit) { + /* We have to flush before transition */ + if ( rmesa->radeon.dma.flush ) + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); + + if (R200_DEBUG & RADEON_FALLBACKS) + fprintf(stderr, "R200 end tcl fallback %s\n", + getFallbackString( bit )); + rmesa->radeon.TclFallback &= ~bit; + transition_to_hwtnl( ctx ); + } else + rmesa->radeon.TclFallback &= ~bit; + } } diff --git a/r300/r300_cmdbuf.c b/r300/r300_cmdbuf.c index 6cfa568..788dc2f 100644 --- a/r300/r300_cmdbuf.c +++ b/r300/r300_cmdbuf.c @@ -77,12 +77,29 @@ static int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) cnt = vpu_count(atom->cmd); if (r300->radeon.radeonScreen->kernel_mm) { - extra = 5; + extra = 3; } return cnt ? (cnt * 4) + extra : 0; } +static int check_vpp(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int cnt; + int extra = 1; + + if (r300->radeon.radeonScreen->kernel_mm) { + cnt = r300->selected_vp->code.constants.Count * 4; + extra = 3; + } else { + cnt = vpu_count(atom->cmd); + extra = 1; + } + + return cnt ? (cnt * 4) + extra : 0; +} + void r300_emit_vpu(struct r300_context *r300, uint32_t *data, unsigned len, @@ -101,15 +118,26 @@ static void emit_vpu_state(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); drm_r300_cmd_header_t cmd; - uint32_t addr, ndw; + uint32_t addr; cmd.u = atom->cmd[0]; addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; - ndw = atom->check(ctx, atom); r300_emit_vpu(r300, &atom->cmd[1], vpu_count(atom->cmd) * 4, addr); } +static void emit_vpp_state(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + drm_r300_cmd_header_t cmd; + uint32_t addr; + + cmd.u = atom->cmd[0]; + addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; + + r300_emit_vpu(r300, &atom->cmd[1], r300->selected_vp->code.constants.Count * 4, addr); +} + void r500_emit_fp(struct r300_context *r300, uint32_t *data, unsigned len, @@ -784,11 +812,11 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vpi.emit = emit_vpu_state; if (is_r500) { - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0); r300->hw.vpp.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpp.emit = emit_vpu_state; + r300->hw.vpp.emit = emit_vpp_state; ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); r300->hw.vps.cmd[0] = @@ -805,11 +833,11 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vpucp[i].emit = emit_vpu_state; } } else { - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0); r300->hw.vpp.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpp.emit = emit_vpu_state; + r300->hw.vpp.emit = emit_vpp_state; ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); r300->hw.vps.cmd[0] = diff --git a/r300/r300_fragprog_common.c b/r300/r300_fragprog_common.c index 61ea5e4..0646da4 100644 --- a/r300/r300_fragprog_common.c +++ b/r300/r300_fragprog_common.c @@ -256,6 +256,19 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog fp->InputsRead = compiler.Base.Program.InputsRead; + /* Clear the fog/wpos_attr if code accessing these + * attributes has been removed during compilation + */ + if (fp->fog_attr != FRAG_ATTRIB_MAX) { + if (!(fp->InputsRead & (1 << fp->fog_attr))) + fp->fog_attr = FRAG_ATTRIB_MAX; + } + + if (fp->wpos_attr != FRAG_ATTRIB_MAX) { + if (!(fp->InputsRead & (1 << fp->wpos_attr))) + fp->wpos_attr = FRAG_ATTRIB_MAX; + } + rc_destroy(&compiler.Base); } diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c index 129004f..a1fe378 100644 --- a/r300/r300_vertprog.c +++ b/r300/r300_vertprog.c @@ -263,15 +263,25 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X); if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) { - rc_copy_output(&compiler.Base, - VERT_RESULT_HPOS, - vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0); + unsigned int vp_wpos_attr = vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0; + + /* Set empty writemask for instructions writing to vp_wpos_attr + * before moving the wpos attr there. + * Such instructions will be removed by DCE. + */ + rc_move_output(&compiler.Base, vp_wpos_attr, vp->key.WPosAttr, 0); + rc_copy_output(&compiler.Base, VERT_RESULT_HPOS, vp_wpos_attr); } if (vp->key.FogAttr != FRAG_ATTRIB_MAX) { - rc_move_output(&compiler.Base, - VERT_RESULT_FOGC, - vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X); + unsigned int vp_fog_attr = vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0; + + /* Set empty writemask for instructions writing to vp_fog_attr + * before moving the fog attr there. + * Such instructions will be removed by DCE. + */ + rc_move_output(&compiler.Base, vp_fog_attr, vp->key.FogAttr, 0); + rc_move_output(&compiler.Base, VERT_RESULT_FOGC, vp_fog_attr, WRITEMASK_X); } r3xx_compile_vertex_program(&compiler); @@ -382,7 +392,11 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, vap_cntl); R300_STATECHANGE(rmesa, vpp); param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); - bump_vpu_count(rmesa->hw.vpp.cmd, param_count); + if (!rmesa->radeon.radeonScreen->kernel_mm && param_count > 255 * 4) { + WARN_ONCE("Too many VP params, expect rendering errors\n"); + } + /* Prevent the overflow (vpu.count is u8) */ + bump_vpu_count(rmesa->hw.vpp.cmd, MIN2(255 * 4, param_count)); param_count /= 4; r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code)); @@ -395,6 +409,6 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | (inst_count << R300_PVS_LAST_INST_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ((param_count - 1) << R300_PVS_MAX_CONST_ADDR_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); } diff --git a/r600/r700_state.c b/r600/r700_state.c index 6f156b5..12eaebb 100644 --- a/r600/r700_state.c +++ b/r600/r700_state.c @@ -614,7 +614,7 @@ static GLuint translate_logicop(GLenum logicop) case GL_XOR: return 0x66; case GL_EQUIV: - return 0xaa; + return 0x99; case GL_AND_REVERSE: return 0x44; case GL_AND_INVERTED: diff --git a/radeon/radeon_dma.c b/radeon/radeon_dma.c index 22499bc..6b7690c 100644 --- a/radeon/radeon_dma.c +++ b/radeon/radeon_dma.c @@ -184,9 +184,6 @@ void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n", __FUNCTION__, size, rmesa->dma.minimum_size); - if (!is_empty_list(&rmesa->dma.reserved)) - radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); - if (is_empty_list(&rmesa->dma.free) || last_elem(&rmesa->dma.free)->bo->size < size) { dma_bo = CALLOC_STRUCT(radeon_dma_bo); @@ -336,9 +333,6 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa) legacy_track_pending(rmesa->radeonScreen->bom, 0); } - if (!is_empty_list(&rmesa->dma.reserved)) - radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); - /* move waiting bos to free list. wait list provides gpu time to handle data before reuse */ foreach_s(dma_bo, temp, &rmesa->dma.wait) { @@ -368,6 +362,7 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa) /* move reserved to wait list */ foreach_s(dma_bo, temp, &rmesa->dma.reserved) { + radeon_bo_unmap(dma_bo->bo); /* free objects that are too small to be used because of large request */ if (dma_bo->bo->size < rmesa->dma.minimum_size) { radeon_bo_unref(dma_bo->bo); |