diff options
author | Ian Romanick <ian.d.romanick@intel.com> | 2021-02-16 12:36:14 -0800 |
---|---|---|
committer | Ian Romanick <ian.d.romanick@intel.com> | 2021-04-02 12:56:18 -0700 |
commit | 55fde67d22cf25a07dbefda4f94d73541a3a791a (patch) | |
tree | f16266c74ae3a8701139e88f615d936f03ecbd8c | |
parent | c2bf945d25038bd06d8fe899daa343ed81b925d0 (diff) |
WIP: nir/range_analysis: Improve analysis of ffma and flrp
TODO: This should be split into at least 3 separate commits: refactors
of analyze_{fadd,fmul,fneg}, changes to ffma analysis, and changes to
flrp analysis. There's probably a fourth first commit that should
change the existing fmul analysis to be structured more like what's in
analyze_fmul.
Helps 4 shaders (for SIMD8, SIMD16, and SIMD32) in Ago of Wonders III by
1 instruction (2 instructions for SIMD32).
-rw-r--r-- | src/compiler/nir/nir_range_analysis.c | 307 |
1 files changed, 155 insertions, 152 deletions
diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index bbee87afbe6..484ba0ccce4 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -419,6 +419,152 @@ union_ranges(enum ssa_ranges a, enum ssa_ranges b) #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) #endif /* !defined(NDEBUG) */ +static const enum ssa_ranges fneg_table[last_range + 1] = { +/* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ + _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero +}; + + +/* ge_zero: ge_zero + ge_zero + * + * gt_zero: gt_zero + eq_zero + * | gt_zero + ge_zero + * | eq_zero + gt_zero # Addition is commutative + * | ge_zero + gt_zero # Addition is commutative + * | gt_zero + gt_zero + * ; + * + * le_zero: le_zero + le_zero + * + * lt_zero: lt_zero + eq_zero + * | lt_zero + le_zero + * | eq_zero + lt_zero # Addition is commutative + * | le_zero + lt_zero # Addition is commutative + * | lt_zero + lt_zero + * ; + * + * ne_zero: eq_zero + ne_zero + * | ne_zero + eq_zero # Addition is commutative + * ; + * + * eq_zero: eq_zero + eq_zero + * ; + * + * All other cases are 'unknown'. The seeming odd entry is (ne_zero, + * ne_zero), but that could be (-5, +5) which is not ne_zero. + */ +static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = { + /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ + /* unknown */ { _______, _______, _______, _______, _______, _______, _______ }, + /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero }, + /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero }, + /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero }, + /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero }, + /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero }, + /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero }, +}; + +/* Due to flush-to-zero semanatics of floating-point numbers with very + * small mangnitudes, we can never really be sure a result will be + * non-zero. + * + * ge_zero: ge_zero * ge_zero + * | ge_zero * gt_zero + * | ge_zero * eq_zero + * | le_zero * lt_zero + * | lt_zero * le_zero # Multiplication is commutative + * | le_zero * le_zero + * | gt_zero * ge_zero # Multiplication is commutative + * | eq_zero * ge_zero # Multiplication is commutative + * | a * a # Left source == right source + * | gt_zero * gt_zero + * | lt_zero * lt_zero + * ; + * + * le_zero: ge_zero * le_zero + * | ge_zero * lt_zero + * | lt_zero * ge_zero # Multiplication is commutative + * | le_zero * ge_zero # Multiplication is commutative + * | le_zero * gt_zero + * | lt_zero * gt_zero + * | gt_zero * lt_zero # Multiplication is commutative + * ; + * + * eq_zero: eq_zero * <any> + * <any> * eq_zero # Multiplication is commutative + * + * All other cases are 'unknown'. + */ +static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = { + /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ + /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero }, + /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero }, + /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero }, + /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero }, + /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero }, + /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero }, + /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero } +}; + +static struct ssa_result_range +analyze_fneg(struct ssa_result_range r) +{ + r.range = fneg_table[r.range]; + return r; +} + +static struct ssa_result_range +analyze_fadd(struct ssa_result_range left, struct ssa_result_range right) +{ + struct ssa_result_range r = {unknown, false, false, false}; + + r.is_integral = left.is_integral && right.is_integral; + r.range = fadd_table[left.range][right.range]; + + /* X + Y is NaN if either operand is NaN or if one operand is +Inf and + * the other is -Inf. If neither operand is NaN and at least one of the + * operands is finite, then the result cannot be NaN. + */ + r.is_a_number = left.is_a_number && right.is_a_number && + (left.is_finite || right.is_finite); + + return r; +} + +static struct ssa_result_range +analyze_fmul(struct ssa_result_range left, struct ssa_result_range right, + const struct nir_alu_instr *alu) +{ + struct ssa_result_range r = {unknown, false, false, false}; + + r.is_integral = left.is_integral && right.is_integral; + r.range = fmul_table[left.range][right.range]; + + if (alu != NULL && r.range != eq_zero) { + /* x * x => ge_zero */ + if (nir_alu_srcs_equal(alu, alu, 0, 1)) { + /* Even if x > 0, the result of x*x can be zero when x is, for + * example, a subnormal number. + */ + r.range = ge_zero; + } else if (nir_alu_srcs_negative_equal(alu, alu, 0, 1)) { + /* -x * x => le_zero. */ + r.range = le_zero; + } + } + + /* Mulitpliation produces NaN for X * NaN and for 0 * ±Inf. If both + * operands are numbers and either both are finite or one is finite and the + * other cannot be zero, then the result must be a number. + */ + r.is_a_number = (left.is_a_number && right.is_a_number) && + ((left.is_finite && right.is_finite) || + (!is_not_zero(left.range) && right.is_finite) || + (left.is_finite && !is_not_zero(right.range))); + + return r; +} + /** * Analyze an expression to determine the range of its result * @@ -471,100 +617,14 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, struct ssa_result_range r = {unknown, false, false, false}; - /* ge_zero: ge_zero + ge_zero - * - * gt_zero: gt_zero + eq_zero - * | gt_zero + ge_zero - * | eq_zero + gt_zero # Addition is commutative - * | ge_zero + gt_zero # Addition is commutative - * | gt_zero + gt_zero - * ; - * - * le_zero: le_zero + le_zero - * - * lt_zero: lt_zero + eq_zero - * | lt_zero + le_zero - * | eq_zero + lt_zero # Addition is commutative - * | le_zero + lt_zero # Addition is commutative - * | lt_zero + lt_zero - * ; - * - * ne_zero: eq_zero + ne_zero - * | ne_zero + eq_zero # Addition is commutative - * ; - * - * eq_zero: eq_zero + eq_zero - * ; - * - * All other cases are 'unknown'. The seeming odd entry is (ne_zero, - * ne_zero), but that could be (-5, +5) which is not ne_zero. - */ - static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = { - /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ - /* unknown */ { _______, _______, _______, _______, _______, _______, _______ }, - /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero }, - /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero }, - /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero }, - /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero }, - /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero }, - /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero }, - }; - ASSERT_TABLE_IS_COMMUTATIVE(fadd_table); ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table); ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table); - /* Due to flush-to-zero semanatics of floating-point numbers with very - * small mangnitudes, we can never really be sure a result will be - * non-zero. - * - * ge_zero: ge_zero * ge_zero - * | ge_zero * gt_zero - * | ge_zero * eq_zero - * | le_zero * lt_zero - * | lt_zero * le_zero # Multiplication is commutative - * | le_zero * le_zero - * | gt_zero * ge_zero # Multiplication is commutative - * | eq_zero * ge_zero # Multiplication is commutative - * | a * a # Left source == right source - * | gt_zero * gt_zero - * | lt_zero * lt_zero - * ; - * - * le_zero: ge_zero * le_zero - * | ge_zero * lt_zero - * | lt_zero * ge_zero # Multiplication is commutative - * | le_zero * ge_zero # Multiplication is commutative - * | le_zero * gt_zero - * | lt_zero * gt_zero - * | gt_zero * lt_zero # Multiplication is commutative - * ; - * - * eq_zero: eq_zero * <any> - * <any> * eq_zero # Multiplication is commutative - * - * All other cases are 'unknown'. - */ - static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = { - /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ - /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero }, - /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero }, - /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero }, - /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero }, - /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero }, - /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero }, - /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero } - }; - ASSERT_TABLE_IS_COMMUTATIVE(fmul_table); ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table); ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table); - static const enum ssa_ranges fneg_table[last_range + 1] = { - /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ - _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero - }; - ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table); ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table); @@ -650,15 +710,7 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, const struct ssa_result_range right = analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1)); - r.is_integral = left.is_integral && right.is_integral; - r.range = fadd_table[left.range][right.range]; - - /* X + Y is NaN if either operand is NaN or if one operand is +Inf and - * the other is -Inf. If neither operand is NaN and at least one of the - * operands is finite, then the result cannot be NaN. - */ - r.is_a_number = left.is_a_number && right.is_a_number && - (left.is_finite || right.is_finite); + r = analyze_fadd(left, right); break; } @@ -864,29 +916,7 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, const struct ssa_result_range right = analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1)); - r.is_integral = left.is_integral && right.is_integral; - - /* x * x => ge_zero */ - if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) { - /* Even if x > 0, the result of x*x can be zero when x is, for - * example, a subnormal number. - */ - r.range = ge_zero; - } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) { - /* -x * x => le_zero. */ - r.range = le_zero; - } else - r.range = fmul_table[left.range][right.range]; - - /* Mulitpliation produces NaN for X * NaN and for 0 * ±Inf. If both - * operands are numbers and either both are finite or one is finite and - * the other cannot be zero, then the result must be a number. - */ - r.is_a_number = (left.is_a_number && right.is_a_number) && - ((left.is_finite && right.is_finite) || - (!is_not_zero(left.range) && right.is_finite) || - (left.is_finite && !is_not_zero(right.range))); - + r = analyze_fmul(left, right, alu); break; } @@ -906,7 +936,7 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, case nir_op_fneg: r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); - r.range = fneg_table[r.range]; + r = analyze_fneg(r); break; case nir_op_fsat: { @@ -1117,26 +1147,7 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, const struct ssa_result_range third = analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2)); - r.is_integral = first.is_integral && second.is_integral && - third.is_integral; - - /* Various cases can result in NaN, so assume the worst. */ - r.is_a_number = false; - - enum ssa_ranges fmul_range; - - if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) { - /* See handling of nir_op_fmul for explanation of why ge_zero is the - * range. - */ - fmul_range = ge_zero; - } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) { - /* -x * x => le_zero */ - fmul_range = le_zero; - } else - fmul_range = fmul_table[first.range][second.range]; - - r.range = fadd_table[fmul_range][third.range]; + r = analyze_fadd(analyze_fmul(first, second, alu), third); break; } @@ -1148,20 +1159,12 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, const struct ssa_result_range third = analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2)); - r.is_integral = first.is_integral && second.is_integral && - third.is_integral; - - /* Various cases can result in NaN, so assume the worst. */ - r.is_a_number = false; - /* Decompose the flrp to first + third * (second + -first) */ - const enum ssa_ranges inner_fadd_range = - fadd_table[second.range][fneg_table[first.range]]; - - const enum ssa_ranges fmul_range = - fmul_table[third.range][inner_fadd_range]; - - r.range = fadd_table[first.range][fmul_range]; + r = analyze_fadd(first, + analyze_fmul(third, + analyze_fadd(second, + analyze_fneg(first)), + NULL)); break; } |