summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2018-08-06 13:05:08 -0700
committerIan Romanick <ian.d.romanick@intel.com>2019-06-28 08:30:48 -0700
commit60155cac4aecf5d3ab83a594c529fc566893cbc6 (patch)
treec5229fda7b715b6b81c0fe5ae5d0a3f24a7cb46d
parentda2d2ca9c65e610ec9bbfc7fb9b616cb4f177841 (diff)
nir/algebraic: Recognize open-coded flrp(a, b, a)c1896cdfefc
No shader-db changes Ice Lake, Iron Lake, or GM45 as these platforms lack a LRP instruction. v2: Remove flrp@64 cases. Since Gen11 removes flrp@32, it seems unlikely that we'll ever have a flrp@64. Should that occur, the cases can be added back. All Gen6-Gen9 platforms had similar results. (Skylake shown) total instructions in shared programs: 15042938 -> 15042126 (<.01%) instructions in affected programs: 71776 -> 70964 (-1.13%) helped: 312 HURT: 0 helped stats (abs) min: 2 max: 3 x̄: 2.60 x̃: 3 helped stats (rel) min: 0.36% max: 4.55% x̄: 1.75% x̃: 1.28% 95% mean confidence interval for instructions value: -2.66 -2.55 95% mean confidence interval for instructions %-change: -1.89% -1.61% Instructions are helped. total cycles in shared programs: 354320991 -> 354319465 (<.01%) cycles in affected programs: 433742 -> 432216 (-0.35%) helped: 206 HURT: 78 helped stats (abs) min: 2 max: 244 x̄: 21.02 x̃: 8 helped stats (rel) min: 0.06% max: 19.59% x̄: 1.72% x̃: 0.82% HURT stats (abs) min: 1 max: 220 x̄: 35.95 x̃: 10 HURT stats (rel) min: 0.07% max: 30.48% x̄: 2.53% x̃: 0.56% 95% mean confidence interval for cycles value: -10.68 -0.06 95% mean confidence interval for cycles %-change: -0.99% -0.12% Cycles are helped.
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 33b6934e157..9f47881cfd2 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1237,6 +1237,30 @@ late_optimizations = [
(('~fadd@32', ('fmul(is_used_once)', 2.0, a), -1.0), ('flrp', -1.0, 1.0, a), '!options->lower_flrp32'),
(('~fadd@32', ('fmul(is_used_once)', -2.0, a), 1.0), ('flrp', 1.0, -1.0, a), '!options->lower_flrp32'),
+ # flrp(a, b, a)
+ # a*(1-a) + b*a
+ # a + -a*a + a*b (1)
+ # a + a*(b - a)
+ # Option 1: ffma(a, (b-a), a)
+ #
+ # Alternately, after (1):
+ # a*(1+b) + -a*a
+ # a*((1+b) + -a)
+ #
+ # Let b=1
+ #
+ # Option 2: ffma(a, 2, -(a*a))
+ # Option 3: ffma(a, 2, (-a)*a)
+ # Option 4: ffma(a, -a, (2*a)
+ # Option 5: a * (2 - a)
+ #
+ # There are a lot of other possible combinations.
+ (('~ffma@32', ('fadd', b, ('fneg', a)), a, a), ('flrp', a, b, a), '!options->lower_flrp32'),
+ (('~ffma@32', a, 2.0, ('fneg', ('fmul', a, a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
+ (('~ffma@32', a, 2.0, ('fmul', ('fneg', a), a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
+ (('~ffma@32', a, ('fneg', a), ('fmul', 2.0, a)), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
+ (('~fmul@32', a, ('fadd', 2.0, ('fneg', a))), ('flrp', a, 1.0, a), '!options->lower_flrp32'),
+
# we do these late so that we don't get in the way of creating ffmas
(('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
(('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),