summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2020-07-05 15:03:25 -0700
committerIan Romanick <ian.d.romanick@intel.com>2021-04-20 15:57:00 -0700
commit1d5f62adbcf5c015bd7f5363ee79b017a4575d79 (patch)
treeb02d702e351c730688452294dfff43ec2a1a5aa3
parentb29347177d094f165f425cd6a752c7947817d8b4 (diff)
nir/range-analysis: Improve "is a number" range analysis for fexp2 and fpowwip/improve-range-analysis
-rw-r--r--src/compiler/nir/nir_range_analysis.c112
1 files changed, 106 insertions, 6 deletions
diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c
index 8dd2264732d..cf2bc1f25bb 100644
--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -744,17 +744,22 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero
};
- r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
+ const struct ssa_result_range right =
+ analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table);
ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table);
- r.is_integral = r.is_integral && is_not_negative(r.range);
- r.range = table[r.range];
+ r.is_integral = right.is_integral && is_not_negative(right.range);
+ r.range = table[right.range];
- /* Various cases can result in NaN, so assume the worst. */
+ /* See the nir_op_fpow case for the explanation. Note that "left" from
+ * fpow is 2.0, so that simplifies things a lot.
+ */
+ r.is_a_number = right.is_a_number && right.range == gt_zero;
+
+ /* A lot more information is needed to prove that the result is finite. */
r.is_finite = false;
- r.is_a_number = false;
break;
}
@@ -1244,8 +1249,103 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
r.range = table[left.range][right.range];
/* Various cases can result in NaN, so assume the worst. */
- r.is_a_number = false;
+ /* Section 8.2 (Exponential Functions) of the GLSL 4.60 spec says (for
+ * pow):
+ *
+ * Results are undefined if x < 0.
+ *
+ * Results are undefined if x = 0 and y <= 0.
+ *
+ * IEEE 754 has a somewhat tighter requirement for pow. From
+ * https://pubs.opengroup.org/onlinepubs/9699919799.2016edition/functions/pow.html
+ *
+ * For finite values of x < 0, and finite non-integer values of y, a
+ * domain error shall occur and either a NaN (if representable), or
+ * an implementation-defined value shall be returned.
+ *
+ * If the correct value would cause overflow, a range error shall
+ * occur and pow(), powf(), and powl() shall return ±HUGE_VAL,
+ * ±HUGE_VALF, and ±HUGE_VALL, respectively, with the same sign as
+ * the correct value of the function.
+ *
+ * If the correct value would cause underflow, and is not
+ * representable, a range error may occur, and pow(), powf(), and
+ * powl() shall return 0.0, or (if IEC 60559 Floating-Point is not
+ * supported) an implementation-defined value no greater in magnitude
+ * than DBL_MIN, FLT_MIN, and LDBL_MIN, respectively.
+ *
+ * For y < 0, if x is zero, a pole error may occur and pow(), powf(),
+ * and powl() shall return ±HUGE_VAL, ±HUGE_VALF, and ±HUGE_VALL,
+ * respectively. On systems that support the IEC 60559
+ * Floating-Point option, if x is ±0, a pole error shall occur and
+ * pow(), powf(), and powl() shall return ±HUGE_VAL, ±HUGE_VALF, and
+ * ±HUGE_VALL, respectively if y is an odd integer, or HUGE_VAL,
+ * HUGE_VALF, and HUGE_VALL, respectively if y is not an odd
+ * integer.
+ *
+ * If x or y is a NaN, a NaN shall be returned (unless specified
+ * elsewhere in this description).
+ *
+ * For any value of y (including NaN), if x is +1, 1.0 shall be
+ * returned.
+ *
+ * For any value of x (including NaN), if y is ±0, 1.0 shall be
+ * returned.
+ *
+ * For any odd integer value of y > 0, if x is ±0, ±0 shall be
+ * returned.
+ *
+ * For y > 0 and not an odd integer, if x is ±0, +0 shall be
+ * returned.
+ *
+ * If x is -1, and y is ±Inf, 1.0 shall be returned.
+ *
+ * For |x| < 1, if y is -Inf, +Inf shall be returned.
+ *
+ * For |x| > 1, if y is -Inf, +0 shall be returned.
+ *
+ * For |x| < 1, if y is +Inf, +0 shall be returned.
+ *
+ * For |x| > 1, if y is +Inf, +Inf shall be returned.
+ *
+ * For y an odd integer < 0, if x is -Inf, -0 shall be returned.
+ *
+ * For y < 0 and not an odd integer, if x is -Inf, +0 shall be
+ * returned.
+ *
+ * For y an odd integer > 0, if x is -Inf, -Inf shall be returned.
+ *
+ * For y > 0 and not an odd integer, if x is -Inf, +Inf shall be
+ * returned.
+ *
+ * For y < 0, if x is +Inf, +0 shall be returned.
+ *
+ * For y > 0, if x is +Inf, +Inf shall be returned.
+ *
+ * If the correct value would cause underflow, and is representable,
+ * a range error may occur and the correct value shall be returned.
+ *
+ * HLSL documentation (see
+ * https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-pow)
+ * says pow(0, 0) can be 0, 1, or NaN depending on the GPU. HLSL also
+ * deviates from IEEE in that, for x < 0, pow(x, 1.0) is NaN.
+ *
+ * Assume that algebraic optimizations will handle the pow(1.0, ...)
+ * and pow(..., 0.0) cases. The remaining sets of rules can be
+ * condensed to:
+ *
+ * - Both x and y must be numbers.
+ *
+ * - x > 0 or x is not negative and y > 0.
+ *
+ * FINISHME: HLSL also says that for y > 0, pow(0, y) = 0, and for y <
+ * 0, pow(0, y) = Inf. Can we relax the rule to, "x > 0 or x is not
+ * negative and y is not zero?"
+ */
+ r.is_a_number = (left.is_a_number && right.is_a_number) &&
+ (left.range == gt_zero ||
+ (is_not_negative(left.range) && right.range == gt_zero));
break;
}