summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robclark@freedesktop.org>2016-04-11 14:47:19 -0400
committerRob Clark <robclark@freedesktop.org>2016-05-04 11:25:55 -0400
commit173871dfb988c3e9fb74a8016d2b024619a5d918 (patch)
treec68e6ebb85a9453d0c9d6b82ee6d6b307d3b90d5
parentb15c7fc268785cc8c960368d287ec799fe9dc502 (diff)
freedreno/ir3: lower immeds to const
Helps reduce register pressure and instruction counts for immediates that would otherwise require a mov into gpr. total instructions in shared programs: 4455332 -> 4369297 (-1.93%) total dwords in shared programs: 8807872 -> 8614432 (-2.20%) total full registers used in shared programs: 263062 -> 250846 (-4.64%) total half registers used in shader programs: 9845 -> 9845 (0.00%) total const registers used in shared programs: 1029735 -> 1466993 (42.46%) half full const instr dwords helped 0 10415 0 17861 5912 hurt 0 1157 21458 947 33 Signed-off-by: Rob Clark <robclark@freedesktop.org>
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c7
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cp.c70
3 files changed, 80 insertions, 4 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 4470c2ac34..e1d0a4fee8 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -659,8 +659,11 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
ir3_emit_consts(vp, ring, ctx, emit->info, dirty);
if (!emit->key.binning_pass)
ir3_emit_consts(fp, ring, ctx, emit->info, dirty);
- /* mark clean after emitting consts: */
- ctx->prog.dirty = 0;
+ /* mark clean after emitting consts.. a bit ugly, but since binning
+ * pass is emitted first, we want to do this only for main draw:
+ */
+ if (!emit->key.binning_pass)
+ ctx->prog.dirty = 0;
}
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 27614f07de..0144ba492e 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -648,8 +648,11 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
ir3_emit_consts(vp, ring, ctx, emit->info, dirty);
if (!emit->key.binning_pass)
ir3_emit_consts(fp, ring, ctx, emit->info, dirty);
- /* mark clean after emitting consts: */
- ctx->prog.dirty = 0;
+ /* mark clean after emitting consts.. a bit ugly, but since binning
+ * pass is emitted first, we want to do this only for main draw:
+ */
+ if (!emit->key.binning_pass)
+ ctx->prog.dirty = 0;
}
if ((dirty & FD_DIRTY_BLEND)) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index 267664c961..60c2830df9 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -29,13 +29,16 @@
#include "freedreno_util.h"
#include "ir3.h"
+#include "ir3_shader.h"
/*
* Copy Propagate:
*/
struct ir3_cp_ctx {
+ struct ir3 *shader;
struct ir3_shader_variant *so;
+ unsigned immediate_idx;
};
/* is it a type preserving mov, with ok flags? */
@@ -233,6 +236,62 @@ static void combine_flags(unsigned *dstflags, struct ir3_instruction *src)
*dstflags &= ~IR3_REG_SABS;
}
+static struct ir3_register *
+lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags)
+{
+ unsigned swiz, idx, i;
+
+ reg = ir3_reg_clone(ctx->shader, reg);
+
+ /* in some cases, there are restrictions on (abs)/(neg) plus const..
+ * so just evaluate those and clear the flags:
+ */
+ if (new_flags & IR3_REG_SABS) {
+ reg->iim_val = abs(reg->iim_val);
+ new_flags &= ~IR3_REG_SABS;
+ }
+
+ if (new_flags & IR3_REG_FABS) {
+ reg->fim_val = fabs(reg->fim_val);
+ new_flags &= ~IR3_REG_FABS;
+ }
+
+ if (new_flags & IR3_REG_SNEG) {
+ reg->iim_val = -reg->iim_val;
+ new_flags &= ~IR3_REG_SNEG;
+ }
+
+ if (new_flags & IR3_REG_FNEG) {
+ reg->fim_val = -reg->fim_val;
+ new_flags &= ~IR3_REG_FNEG;
+ }
+
+ for (i = 0; i < ctx->immediate_idx; i++) {
+ swiz = i % 4;
+ idx = i / 4;
+
+ if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) {
+ break;
+ }
+ }
+
+ if (i == ctx->immediate_idx) {
+ /* need to generate a new immediate: */
+ swiz = i % 4;
+ idx = i / 4;
+ ctx->so->immediates[idx].val[swiz] = reg->uim_val;
+ ctx->so->immediates_count = idx + 1;
+ ctx->immediate_idx++;
+ }
+
+ new_flags &= ~IR3_REG_IMMED;
+ new_flags |= IR3_REG_CONST;
+ reg->flags = new_flags;
+ reg->num = i + (4 * ctx->so->first_immediate);
+
+ return reg;
+}
+
/**
* Handle cp for a given src register. This additionally handles
* the cases of collapsing immedate/const (which replace the src
@@ -281,6 +340,13 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
combine_flags(&new_flags, src);
if (!valid_flags(instr, n, new_flags)) {
+ /* See if lowering an immediate to const would help. */
+ if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
+ debug_assert(new_flags & IR3_REG_IMMED);
+ instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags);
+ return;
+ }
+
/* special case for "normal" mad instructions, we can
* try swapping the first two args if that fits better.
*
@@ -378,6 +444,9 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
src_reg->flags = new_flags;
src_reg->iim_val = iim_val;
instr->regs[n+1] = src_reg;
+ } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
+ /* See if lowering an immediate to const would help. */
+ instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags);
}
return;
@@ -484,6 +553,7 @@ void
ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
{
struct ir3_cp_ctx ctx = {
+ .shader = ir,
.so = so,
};