summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2021-02-16 12:36:14 -0800
committerIan Romanick <ian.d.romanick@intel.com>2021-04-02 12:56:18 -0700
commit55fde67d22cf25a07dbefda4f94d73541a3a791a (patch)
treef16266c74ae3a8701139e88f615d936f03ecbd8c
parentc2bf945d25038bd06d8fe899daa343ed81b925d0 (diff)
WIP: nir/range_analysis: Improve analysis of ffma and flrp
TODO: This should be split into at least 3 separate commits: refactors of analyze_{fadd,fmul,fneg}, changes to ffma analysis, and changes to flrp analysis. There's probably a fourth first commit that should change the existing fmul analysis to be structured more like what's in analyze_fmul. Helps 4 shaders (for SIMD8, SIMD16, and SIMD32) in Ago of Wonders III by 1 instruction (2 instructions for SIMD32).
-rw-r--r--src/compiler/nir/nir_range_analysis.c307
1 files changed, 155 insertions, 152 deletions
diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c
index bbee87afbe6..484ba0ccce4 100644
--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -419,6 +419,152 @@ union_ranges(enum ssa_ranges a, enum ssa_ranges b)
#define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)
#endif /* !defined(NDEBUG) */
+static const enum ssa_ranges fneg_table[last_range + 1] = {
+/* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
+ _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero
+};
+
+
+/* ge_zero: ge_zero + ge_zero
+ *
+ * gt_zero: gt_zero + eq_zero
+ * | gt_zero + ge_zero
+ * | eq_zero + gt_zero # Addition is commutative
+ * | ge_zero + gt_zero # Addition is commutative
+ * | gt_zero + gt_zero
+ * ;
+ *
+ * le_zero: le_zero + le_zero
+ *
+ * lt_zero: lt_zero + eq_zero
+ * | lt_zero + le_zero
+ * | eq_zero + lt_zero # Addition is commutative
+ * | le_zero + lt_zero # Addition is commutative
+ * | lt_zero + lt_zero
+ * ;
+ *
+ * ne_zero: eq_zero + ne_zero
+ * | ne_zero + eq_zero # Addition is commutative
+ * ;
+ *
+ * eq_zero: eq_zero + eq_zero
+ * ;
+ *
+ * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
+ * ne_zero), but that could be (-5, +5) which is not ne_zero.
+ */
+static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = {
+ /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
+ /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
+ /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero },
+ /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero },
+ /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero },
+ /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero },
+ /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero },
+ /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
+};
+
+/* Due to flush-to-zero semanatics of floating-point numbers with very
+ * small mangnitudes, we can never really be sure a result will be
+ * non-zero.
+ *
+ * ge_zero: ge_zero * ge_zero
+ * | ge_zero * gt_zero
+ * | ge_zero * eq_zero
+ * | le_zero * lt_zero
+ * | lt_zero * le_zero # Multiplication is commutative
+ * | le_zero * le_zero
+ * | gt_zero * ge_zero # Multiplication is commutative
+ * | eq_zero * ge_zero # Multiplication is commutative
+ * | a * a # Left source == right source
+ * | gt_zero * gt_zero
+ * | lt_zero * lt_zero
+ * ;
+ *
+ * le_zero: ge_zero * le_zero
+ * | ge_zero * lt_zero
+ * | lt_zero * ge_zero # Multiplication is commutative
+ * | le_zero * ge_zero # Multiplication is commutative
+ * | le_zero * gt_zero
+ * | lt_zero * gt_zero
+ * | gt_zero * lt_zero # Multiplication is commutative
+ * ;
+ *
+ * eq_zero: eq_zero * <any>
+ * <any> * eq_zero # Multiplication is commutative
+ *
+ * All other cases are 'unknown'.
+ */
+static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
+ /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
+ /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
+ /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
+ /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
+ /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
+ /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
+ /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
+ /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
+};
+
+static struct ssa_result_range
+analyze_fneg(struct ssa_result_range r)
+{
+ r.range = fneg_table[r.range];
+ return r;
+}
+
+static struct ssa_result_range
+analyze_fadd(struct ssa_result_range left, struct ssa_result_range right)
+{
+ struct ssa_result_range r = {unknown, false, false, false};
+
+ r.is_integral = left.is_integral && right.is_integral;
+ r.range = fadd_table[left.range][right.range];
+
+ /* X + Y is NaN if either operand is NaN or if one operand is +Inf and
+ * the other is -Inf. If neither operand is NaN and at least one of the
+ * operands is finite, then the result cannot be NaN.
+ */
+ r.is_a_number = left.is_a_number && right.is_a_number &&
+ (left.is_finite || right.is_finite);
+
+ return r;
+}
+
+static struct ssa_result_range
+analyze_fmul(struct ssa_result_range left, struct ssa_result_range right,
+ const struct nir_alu_instr *alu)
+{
+ struct ssa_result_range r = {unknown, false, false, false};
+
+ r.is_integral = left.is_integral && right.is_integral;
+ r.range = fmul_table[left.range][right.range];
+
+ if (alu != NULL && r.range != eq_zero) {
+ /* x * x => ge_zero */
+ if (nir_alu_srcs_equal(alu, alu, 0, 1)) {
+ /* Even if x > 0, the result of x*x can be zero when x is, for
+ * example, a subnormal number.
+ */
+ r.range = ge_zero;
+ } else if (nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
+ /* -x * x => le_zero. */
+ r.range = le_zero;
+ }
+ }
+
+ /* Mulitpliation produces NaN for X * NaN and for 0 * ±Inf. If both
+ * operands are numbers and either both are finite or one is finite and the
+ * other cannot be zero, then the result must be a number.
+ */
+ r.is_a_number = (left.is_a_number && right.is_a_number) &&
+ ((left.is_finite && right.is_finite) ||
+ (!is_not_zero(left.range) && right.is_finite) ||
+ (left.is_finite && !is_not_zero(right.range)));
+
+ return r;
+}
+
/**
* Analyze an expression to determine the range of its result
*
@@ -471,100 +617,14 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
struct ssa_result_range r = {unknown, false, false, false};
- /* ge_zero: ge_zero + ge_zero
- *
- * gt_zero: gt_zero + eq_zero
- * | gt_zero + ge_zero
- * | eq_zero + gt_zero # Addition is commutative
- * | ge_zero + gt_zero # Addition is commutative
- * | gt_zero + gt_zero
- * ;
- *
- * le_zero: le_zero + le_zero
- *
- * lt_zero: lt_zero + eq_zero
- * | lt_zero + le_zero
- * | eq_zero + lt_zero # Addition is commutative
- * | le_zero + lt_zero # Addition is commutative
- * | lt_zero + lt_zero
- * ;
- *
- * ne_zero: eq_zero + ne_zero
- * | ne_zero + eq_zero # Addition is commutative
- * ;
- *
- * eq_zero: eq_zero + eq_zero
- * ;
- *
- * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
- * ne_zero), but that could be (-5, +5) which is not ne_zero.
- */
- static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = {
- /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
- /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
- /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero },
- /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero },
- /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero },
- /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero },
- /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero },
- /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
- };
-
ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table);
ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table);
- /* Due to flush-to-zero semanatics of floating-point numbers with very
- * small mangnitudes, we can never really be sure a result will be
- * non-zero.
- *
- * ge_zero: ge_zero * ge_zero
- * | ge_zero * gt_zero
- * | ge_zero * eq_zero
- * | le_zero * lt_zero
- * | lt_zero * le_zero # Multiplication is commutative
- * | le_zero * le_zero
- * | gt_zero * ge_zero # Multiplication is commutative
- * | eq_zero * ge_zero # Multiplication is commutative
- * | a * a # Left source == right source
- * | gt_zero * gt_zero
- * | lt_zero * lt_zero
- * ;
- *
- * le_zero: ge_zero * le_zero
- * | ge_zero * lt_zero
- * | lt_zero * ge_zero # Multiplication is commutative
- * | le_zero * ge_zero # Multiplication is commutative
- * | le_zero * gt_zero
- * | lt_zero * gt_zero
- * | gt_zero * lt_zero # Multiplication is commutative
- * ;
- *
- * eq_zero: eq_zero * <any>
- * <any> * eq_zero # Multiplication is commutative
- *
- * All other cases are 'unknown'.
- */
- static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
- /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
- /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
- /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
- /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
- /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
- /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
- /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
- /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
- };
-
ASSERT_TABLE_IS_COMMUTATIVE(fmul_table);
ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table);
ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table);
- static const enum ssa_ranges fneg_table[last_range + 1] = {
- /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
- _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero
- };
-
ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table);
ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table);
@@ -650,15 +710,7 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
const struct ssa_result_range right =
analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
- r.is_integral = left.is_integral && right.is_integral;
- r.range = fadd_table[left.range][right.range];
-
- /* X + Y is NaN if either operand is NaN or if one operand is +Inf and
- * the other is -Inf. If neither operand is NaN and at least one of the
- * operands is finite, then the result cannot be NaN.
- */
- r.is_a_number = left.is_a_number && right.is_a_number &&
- (left.is_finite || right.is_finite);
+ r = analyze_fadd(left, right);
break;
}
@@ -864,29 +916,7 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
const struct ssa_result_range right =
analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
- r.is_integral = left.is_integral && right.is_integral;
-
- /* x * x => ge_zero */
- if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
- /* Even if x > 0, the result of x*x can be zero when x is, for
- * example, a subnormal number.
- */
- r.range = ge_zero;
- } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
- /* -x * x => le_zero. */
- r.range = le_zero;
- } else
- r.range = fmul_table[left.range][right.range];
-
- /* Mulitpliation produces NaN for X * NaN and for 0 * ±Inf. If both
- * operands are numbers and either both are finite or one is finite and
- * the other cannot be zero, then the result must be a number.
- */
- r.is_a_number = (left.is_a_number && right.is_a_number) &&
- ((left.is_finite && right.is_finite) ||
- (!is_not_zero(left.range) && right.is_finite) ||
- (left.is_finite && !is_not_zero(right.range)));
-
+ r = analyze_fmul(left, right, alu);
break;
}
@@ -906,7 +936,7 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
case nir_op_fneg:
r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
- r.range = fneg_table[r.range];
+ r = analyze_fneg(r);
break;
case nir_op_fsat: {
@@ -1117,26 +1147,7 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
const struct ssa_result_range third =
analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2));
- r.is_integral = first.is_integral && second.is_integral &&
- third.is_integral;
-
- /* Various cases can result in NaN, so assume the worst. */
- r.is_a_number = false;
-
- enum ssa_ranges fmul_range;
-
- if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
- /* See handling of nir_op_fmul for explanation of why ge_zero is the
- * range.
- */
- fmul_range = ge_zero;
- } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
- /* -x * x => le_zero */
- fmul_range = le_zero;
- } else
- fmul_range = fmul_table[first.range][second.range];
-
- r.range = fadd_table[fmul_range][third.range];
+ r = analyze_fadd(analyze_fmul(first, second, alu), third);
break;
}
@@ -1148,20 +1159,12 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
const struct ssa_result_range third =
analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2));
- r.is_integral = first.is_integral && second.is_integral &&
- third.is_integral;
-
- /* Various cases can result in NaN, so assume the worst. */
- r.is_a_number = false;
-
/* Decompose the flrp to first + third * (second + -first) */
- const enum ssa_ranges inner_fadd_range =
- fadd_table[second.range][fneg_table[first.range]];
-
- const enum ssa_ranges fmul_range =
- fmul_table[third.range][inner_fadd_range];
-
- r.range = fadd_table[first.range][fmul_range];
+ r = analyze_fadd(first,
+ analyze_fmul(third,
+ analyze_fadd(second,
+ analyze_fneg(first)),
+ NULL));
break;
}