summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>2016-09-18 12:33:12 +0200
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>2016-09-18 16:42:39 +0200
commit557a29b51fa3324cfbeecff100a54c7c6a6d87cd (patch)
treeced4f6588b2248781fe8ac18773b3c5e192d8e85
parentd8b4f5fcca2ce299b8ef248b6f57896c7b85d18c (diff)
nv50/ir: optimize SUB(a, b) to MOV(a - b)
This helps shaders in UE4 demos, especially with Elemental (+1% perf). This optimization reduces spilling usage in one shader which explains the little gain. GF100/GK104: total instructions in shared programs :2838551 -> 2838045 (-0.02%) total gprs used in shared programs :396706 -> 396684 (-0.01%) total local used in shared programs :34432 -> 34416 (-0.05%) local gpr inst bytes helped 1 19 112 112 hurt 0 0 0 0 Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp10
1 files changed, 10 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index a9172f834b..74a5a854e7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -576,6 +576,16 @@ ConstantFolding::expr(Instruction *i,
return;
}
break;
+ case OP_SUB:
+ switch (i->dType) {
+ case TYPE_F32: res.data.f32 = a->data.f32 - b->data.f32; break;
+ case TYPE_F64: res.data.f64 = a->data.f64 - b->data.f64; break;
+ case TYPE_S32:
+ case TYPE_U32: res.data.u32 = a->data.u32 - b->data.u32; break;
+ default:
+ return;
+ }
+ break;
case OP_POW:
switch (i->dType) {
case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break;