From 1d5f62adbcf5c015bd7f5363ee79b017a4575d79 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 5 Jul 2020 15:03:25 -0700
Subject: nir/range-analysis: Improve "is a number" range analysis for fexp2
 and fpow

---
 src/compiler/nir/nir_range_analysis.c | 112 ++++++++++++++++++++++++++++++++--
 1 file changed, 106 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c
index 8dd2264732d..cf2bc1f25bb 100644
--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -744,17 +744,22 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
          ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero
       };
 
-      r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
+      const struct ssa_result_range right =
+         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
 
       ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table);
       ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table);
 
-      r.is_integral = r.is_integral && is_not_negative(r.range);
-      r.range = table[r.range];
+      r.is_integral = right.is_integral && is_not_negative(right.range);
+      r.range = table[right.range];
 
-      /* Various cases can result in NaN, so assume the worst. */
+      /* See the nir_op_fpow case for the explanation.  Note that "left" from
+       * fpow is 2.0, so that simplifies things a lot.
+       */
+      r.is_a_number = right.is_a_number && right.range == gt_zero;
+
+      /* A lot more information is needed to prove that the result is finite. */
       r.is_finite = false;
-      r.is_a_number = false;
       break;
    }
 
@@ -1244,8 +1249,103 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
       r.range = table[left.range][right.range];
 
       /* Various cases can result in NaN, so assume the worst. */
-      r.is_a_number = false;
 
+      /* Section 8.2 (Exponential Functions) of the GLSL 4.60 spec says (for
+       * pow):
+       *
+       *    Results are undefined if x < 0.
+       *
+       *    Results are undefined if x = 0 and y <= 0.
+       *
+       * IEEE 754 has a somewhat tighter requirement for pow.  From
+       * https://pubs.opengroup.org/onlinepubs/9699919799.2016edition/functions/pow.html
+       *
+       *    For finite values of x < 0, and finite non-integer values of y, a
+       *    domain error shall occur and either a NaN (if representable), or
+       *    an implementation-defined value shall be returned.
+       *
+       *    If the correct value would cause overflow, a range error shall
+       *    occur and pow(), powf(), and powl() shall return ±HUGE_VAL,
+       *    ±HUGE_VALF, and ±HUGE_VALL, respectively, with the same sign as
+       *    the correct value of the function.
+       *
+       *    If the correct value would cause underflow, and is not
+       *    representable, a range error may occur, and pow(), powf(), and
+       *    powl() shall return 0.0, or (if IEC 60559 Floating-Point is not
+       *    supported) an implementation-defined value no greater in magnitude
+       *    than DBL_MIN, FLT_MIN, and LDBL_MIN, respectively.
+       *
+       *    For y < 0, if x is zero, a pole error may occur and pow(), powf(),
+       *    and powl() shall return ±HUGE_VAL, ±HUGE_VALF, and ±HUGE_VALL,
+       *    respectively. On systems that support the IEC 60559
+       *    Floating-Point option, if x is ±0, a pole error shall occur and
+       *    pow(), powf(), and powl() shall return ±HUGE_VAL, ±HUGE_VALF, and
+       *    ±HUGE_VALL, respectively if y is an odd integer, or HUGE_VAL,
+       *    HUGE_VALF, and HUGE_VALL, respectively if y is not an odd
+       *    integer.
+       *
+       *    If x or y is a NaN, a NaN shall be returned (unless specified
+       *    elsewhere in this description).
+       *
+       *    For any value of y (including NaN), if x is +1, 1.0 shall be
+       *    returned.
+       *
+       *    For any value of x (including NaN), if y is ±0, 1.0 shall be
+       *    returned.
+       *
+       *    For any odd integer value of y > 0, if x is ±0, ±0 shall be
+       *    returned.
+       *
+       *    For y > 0 and not an odd integer, if x is ±0, +0 shall be
+       *    returned.
+       *
+       *    If x is -1, and y is ±Inf, 1.0 shall be returned.
+       *
+       *    For |x| < 1, if y is -Inf, +Inf shall be returned.
+       *
+       *    For |x| > 1, if y is -Inf, +0 shall be returned.
+       *
+       *    For |x| < 1, if y is +Inf, +0 shall be returned.
+       *
+       *    For |x| > 1, if y is +Inf, +Inf shall be returned.
+       *
+       *    For y an odd integer < 0, if x is -Inf, -0 shall be returned.
+       *
+       *    For y < 0 and not an odd integer, if x is -Inf, +0 shall be
+       *    returned.
+       *
+       *    For y an odd integer > 0, if x is -Inf, -Inf shall be returned.
+       *
+       *    For y > 0 and not an odd integer, if x is -Inf, +Inf shall be
+       *    returned.
+       *
+       *    For y < 0, if x is +Inf, +0 shall be returned.
+       *
+       *    For y > 0, if x is +Inf, +Inf shall be returned.
+       *
+       *    If the correct value would cause underflow, and is representable,
+       *    a range error may occur and the correct value shall be returned.
+       *
+       * HLSL documentation (see
+       * https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-pow)
+       * says pow(0, 0) can be 0, 1, or NaN depending on the GPU.  HLSL also
+       * deviates from IEEE in that, for x < 0, pow(x, 1.0) is NaN.
+       *
+       * Assume that algebraic optimizations will handle the pow(1.0, ...)
+       * and pow(..., 0.0) cases.  The remaining sets of rules can be
+       * condensed to:
+       *
+       *    - Both x and y must be numbers.
+       *
+       *    - x > 0 or x is not negative and y > 0.
+       *
+       * FINISHME: HLSL also says that for y > 0, pow(0, y) = 0, and for y <
+       * 0, pow(0, y) = Inf.  Can we relax the rule to, "x > 0 or x is not
+       * negative and y is not zero?"
+       */
+      r.is_a_number = (left.is_a_number && right.is_a_number) &&
+         (left.range == gt_zero ||
+          (is_not_negative(left.range) && right.range == gt_zero));
       break;
    }
 
-- 
cgit v1.2.3