diff options
author | Dave Airlie <airlied@redhat.com> | 2010-08-31 11:42:01 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2010-08-31 11:44:34 +1000 |
commit | be7816f2b7f0b064a47fb3f101477ad5dba74017 (patch) | |
tree | 5fd0161b14ba7469f9c0d7d66b9381e9d7b9fda6 /src/gallium/drivers/r600/r600_shader.c | |
parent | ee0153f891bb75ee14db579e6628d592032d6801 (diff) |
r600g: fixup trig functions when input is a literal
So as the trig functions used up the literal spots for the PI work, if the arg0 was an immediate
we'd hit failure, so copy the literal before starting.
add some tracking of max temp used to avoid trashing temp regs.
5 more piglits, fp1 COS,SCS,SIN tests
Diffstat (limited to 'src/gallium/drivers/r600/r600_shader.c')
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 76 |
1 files changed, 67 insertions, 9 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index be6023268a..ed953d3f1d 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -50,6 +50,7 @@ struct r600_shader_ctx { u32 value[4]; u32 *literals; u32 nliterals; + u32 max_driver_temp_used; }; struct r600_shader_tgsi_instruction { @@ -354,6 +355,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) return 0; } +static int r600_get_temp(struct r600_shader_ctx *ctx) +{ + return ctx->temp_reg + ctx->max_driver_temp_used++; +} + int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; @@ -436,6 +442,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = tgsi_is_supported(&ctx); if (r) goto out_err; + ctx.max_driver_temp_used = 0; + /* reserve first tmp for everyone */ + r600_get_temp(&ctx); opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; r = ctx.inst_info->process(&ctx); @@ -647,12 +656,13 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s } for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { + int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; alu.src[0].sel = r600_src[j].sel; alu.src[0].chan = k; - alu.dst.sel = ctx->temp_reg + j; + alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; if (k == 3) @@ -661,13 +671,52 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s if (r) return r; } - r600_src[j].sel = ctx->temp_reg + j; + r600_src[j].sel = treg; j--; } } return 0; } +/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ +static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nliteral, r; + + for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { + nliteral++; + } + } + for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = r600_src[j].sel; + alu.src[0].chan = k; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + r600_src[j].sel = treg; + j++; + } + } + return 0; +} + static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -755,6 +804,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, r = tgsi_split_constant(ctx, r600_src); if (r) return r; + + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; + lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); lit_vals[1] = fui(0.5f); @@ -834,10 +888,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; - - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; + int lasti = 0; r = tgsi_setup_trig(ctx, r600_src); if (r) @@ -858,14 +909,21 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) /* replicate result */ for (i = 0; i < 4; i++) { + if (inst->Dst[0].Register.WriteMask & (1 << i)) + lasti = i; + } + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.src[0].sel = ctx->temp_reg; alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + + alu.src[0].sel = ctx->temp_reg; r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) return r; - alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; - if (i == 3) + if (i == lasti) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) |