summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robclark@freedesktop.org>2013-09-01 11:35:56 -0400
committerRob Clark <robclark@freedesktop.org>2013-09-14 13:31:58 -0400
commit64c134cedb8845f274b017ff9ca3c3ca4d34372b (patch)
treecde826228ec9f372472f691e257597a860a725c8
parentcb9e07aa84ff2dd8492fd8a7064ff2539d7d40f9 (diff)
freedreno/a3xx/compiler: add LIT
Needed by glxgears and etuxracer ;-) Signed-off-by: Rob Clark <robclark@freedesktop.org>
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.c145
1 files changed, 142 insertions, 3 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 994f71c0d2..be6ecdc41f 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -65,10 +65,11 @@ static unsigned regmask_idx(struct ir3_register *reg)
static void regmask_set(regmask_t regmask, struct ir3_register *reg,
unsigned wrmask)
{
+ unsigned ridx = regmask_idx(reg) & ~0x3;
unsigned i;
for (i = 0; i < 4; i++) {
if (wrmask & (1 << i)) {
- unsigned idx = regmask_idx(reg) + i;
+ unsigned idx = ridx + i;
regmask[idx / 8] |= 1 << (idx % 8);
}
}
@@ -179,8 +180,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
ctx->info.file_max[TGSI_FILE_OUTPUT] + 1;
so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
- ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 +
- ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
+ ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
ret = tgsi_parse_init(&ctx->parser, tokens);
if (ret != TGSI_PARSE_OK)
@@ -602,6 +602,144 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
* native instructions:
*/
+/* LIT - Light Coefficients:
+ * dst.x = 1.0
+ * dst.y = max(src.x, 0.0)
+ * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0.0
+ * dst.w = 1.0
+ *
+ * max.f tmp.y, src.y, {0.0}
+ * nop
+ * max.f tmp.z, src.w, {-128.0}
+ * cmps.f.ge tmp.w, (neg)src.x, {0.0}
+ * max.f dst.y, src.x, {0.0}
+ * nop
+ * min.f tmp.x, tmp.z, {128.0}
+ * add.s tmp.z, tmp.w, -1
+ * log2 tmp.y, tmp.y
+ * (rpt1)nop
+ * (ss)mul.f tmp.x, tmp.x, tmp.y
+ * mov.f16f16 dst.x, {1.0}
+ * mov.f16f16 dst.w, {1.0}
+ * (rpt3)nop
+ * exp2 tmp.x, tmp.x
+ * (ss)sel.f16 dst.z, {0.0}, tmp.z, tmp.x
+ */
+static void
+trans_lit(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct ir3_register *r;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register *tmp_src;
+ struct tgsi_src_register constval0, constval1, constval128;
+ struct tgsi_src_register *src = &inst->Src[0].Register;
+ struct tgsi_dst_register *dst = get_dst(ctx, inst);
+
+ tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
+
+ if (is_const(src))
+ src = get_unconst(ctx, src);
+
+ get_immediate(ctx, &constval0, fui(0.0));
+ get_immediate(ctx, &constval1, fui(1.0));
+ get_immediate(ctx, &constval128, fui(128.0));
+
+ /* max.f tmp.y, src.y, {0.0} */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
+ add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Y);
+ add_src_reg(ctx, instr, src, TGSI_SWIZZLE_Y);
+ add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
+
+ /* nop */
+ ir3_instr_create(ctx->ir, 0, OPC_NOP);
+
+ /* max.f tmp.z, src.w, {-128.0} */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
+ add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Z);
+ add_src_reg(ctx, instr, src, TGSI_SWIZZLE_W);
+ add_src_reg(ctx, instr, &constval128,
+ constval128.SwizzleX)->flags |= IR3_REG_NEGATE;
+
+ /* cmps.f.ge tmp.w, (neg)src.x, {0.0} */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+ add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_W);
+ add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X)->flags |= IR3_REG_NEGATE;
+ add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
+ instr->cat2.condition = IR3_COND_GE;
+
+ /* max.f dst.y, src.x, {0.0} */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
+ add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_Y);
+ add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X);
+ add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
+
+ /* nop */
+ ir3_instr_create(ctx->ir, 0, OPC_NOP);
+
+ /* min.f tmp.x, tmp.z, {128.0} */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
+ add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
+ add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Z);
+ add_src_reg(ctx, instr, &constval128, constval128.SwizzleX);
+
+ /* add.s tmp.z, tmp.w, -1 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+ add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Z);
+ add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_W);
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+
+ /* log2 tmp.y, tmp.y */
+ instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
+ r = add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Y);
+ add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Y);
+ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_Y);
+
+ /* (rpt1)nop */
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1;
+
+ /* (ss)mul.f tmp.x, tmp.x, tmp.y */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
+ add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
+ add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Y);
+
+ /* mov.f16f16 dst.x, {1.0} */
+ instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr->cat1.src_type = get_ftype(ctx);
+ instr->cat1.dst_type = get_ftype(ctx);
+ add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X);
+ add_src_reg(ctx, instr, &constval1, constval1.SwizzleX);
+
+ /* mov.f16f16 dst.w, {1.0} */
+ instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr->cat1.src_type = get_ftype(ctx);
+ instr->cat1.dst_type = get_ftype(ctx);
+ add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_W);
+ add_src_reg(ctx, instr, &constval1, constval1.SwizzleX);
+
+ /* (rpt3)nop */
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 3;
+
+ /* exp2 tmp.x, tmp.x */
+ instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
+ r = add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
+ add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
+ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
+
+ /* (ss)sel.f16 dst.z, {0.0}, tmp.z, tmp.x */
+ instr = ir3_instr_create(ctx->ir, 3,
+ ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+ add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_Z);
+ add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
+ add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Z);
+ add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
+
+ put_dst(ctx, inst, dst);
+}
+
static inline void
get_swiz(unsigned *swiz, struct tgsi_src_register *src)
{
@@ -1266,6 +1404,7 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
INSTR(MOV, instr_cat1),
+ INSTR(LIT, trans_lit),
INSTR(RCP, instr_cat4, .opc = OPC_RCP),
INSTR(RSQ, instr_cat4, .opc = OPC_RSQ),
INSTR(SQRT, instr_cat4, .opc = OPC_SQRT),