summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2019-07-08 21:53:23 -0700
committerIan Romanick <ian.d.romanick@intel.com>2019-07-16 17:03:51 -0700
commitdd827ce8abed8c7fe59b37037c17af339433cd94 (patch)
treeaac3286cb3cc778211bee15e56ff259ddaaabd02
parent4ad59a81324f0ed1598eab689ee52f1e5d627e88 (diff)
add modificationsdp-narrowing
-rw-r--r--src/intel/compiler/brw_vec4_nir.cpp34
1 files changed, 34 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
index 32c5bd40406..8fc350fd060 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -1346,6 +1346,40 @@ vec4_visitor::try_VF_for_constants(const float const1[4], src_reg *op1,
num_combined = merge_constants(const2, mask, combined, num_combined,
op2_swiz, no_modification);
+ if (num_combined > 4) {
+ /* Try really, really hard to fit both vectors into a single VF
+ * constant. Try two transformations to find more redundancy.
+ *
+ * - If all the components of one constant have the same sign, that
+ * operand will get either an abs() or -abs() source modifier applied.
+ * The components of that constant don't need the same sign as a
+ * component from the other vector to match. This helps cases like
+ * const1 = { 6.0, 7.0, 4.0, 1.0}, const2 = { -6.0, 1.0, 7.0, -4.0 }
+ * fit.
+ *
+ * - If both constants have mixed signs, try appling a negation to one
+ * of the operands. This helps cases like const1 = { 6.0, -7.0, 12.0,
+ * 21.0 }, const2 = { -6.0, 7.0, -12.0, -21.0 } fit.
+ */
+ m1 = same_sign(const1, mask);
+ if (m1 != no_modification) {
+ num_combined = merge_constants(const2, mask, combined, 0,
+ op2_swiz, no_modification);
+ num_combined = merge_constants(const1, mask, combined, num_combined,
+ op1_swiz, m1);
+ } else {
+ m2 = same_sign(const2, mask);
+
+ if (m2 == no_modification)
+ m2 = neg_modification;
+
+ num_combined = merge_constants(const1, mask, combined, 0,
+ op1_swiz, no_modification);
+ num_combined = merge_constants(const2, mask, combined, num_combined,
+ op2_swiz, m2);
+ }
+ }
+
/* Too many values for a single VF. */
if (num_combined > 4)
return false;