diff options
author | Corbin Simpson <MostAwesomeDude@gmail.com> | 2010-03-26 05:24:44 -0700 |
---|---|---|
committer | Corbin Simpson <MostAwesomeDude@gmail.com> | 2010-03-26 05:24:44 -0700 |
commit | b6df7aed60189d5f28a139c6fe351022ca2907a4 (patch) | |
tree | 66071ce190cddf714c5fbe5dcaf56a6d92bd462d | |
parent | 38c7a01b6c220ad04c5754602673ad3cf36ad508 (diff) |
r300/compiler: Lower CMP for vertex programs.
I think my maths is right?
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index b5c08aea49..f5b7d57eab 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -506,6 +506,46 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c, inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; } +static void transform_r300_vertex_CMP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* There is no decent CMP available, so let's rig one up. + * CMP is defined as dst = src0 < 0.0 ? src1 : src2 + * The following sequence consumes two temps and three extra slots, + * but should be equivalent: + * + * SLT tmp0, src0, 0.0 + * SGE tmp1, src0, 0.0 + * MUL tmp0, tmp0, src1 + * MAD dst, src2, tmp1, tmp0 + * + * Yes, I know, I'm a mad scientist. ~ C. */ + int tempreg0 = rc_find_free_temporary(c); + int tempreg1 = rc_find_free_temporary(c); + + /* SLT tmp0, src0, 0.0 */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstreg(RC_FILE_TEMPORARY, tempreg0), + inst->U.I.SrcReg[0], builtin_zero); + + /* SGE tmp1, src0, 0.0 */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + dstreg(RC_FILE_TEMPORARY, tempreg1), + inst->U.I.SrcReg[0], builtin_zero); + + /* MUL tmp0, tmp0, src1 */ + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + dstreg(RC_FILE_TEMPORARY, tempreg0), + srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1]); + + /* MAD dst, src2, tmp1, tmp0 */ + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, + inst->U.I.DstReg, + inst->U.I.SrcReg[2], srcreg(RC_FILE_TEMPORARY, tempreg1), srcreg(RC_FILE_TEMPORARY, tempreg0)); + + rc_remove_instruction(inst); +} + /** * For use with radeonLocalTransform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. @@ -517,6 +557,7 @@ int r300_transform_vertex_alu( { switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; |