From 1d5f62adbcf5c015bd7f5363ee79b017a4575d79 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sun, 5 Jul 2020 15:03:25 -0700 Subject: nir/range-analysis: Improve "is a number" range analysis for fexp2 and fpow --- src/compiler/nir/nir_range_analysis.c | 112 ++++++++++++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 6 deletions(-) diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index 8dd2264732d..cf2bc1f25bb 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -744,17 +744,22 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero }; - r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); + const struct ssa_result_range right = + analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table); ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table); - r.is_integral = r.is_integral && is_not_negative(r.range); - r.range = table[r.range]; + r.is_integral = right.is_integral && is_not_negative(right.range); + r.range = table[right.range]; - /* Various cases can result in NaN, so assume the worst. */ + /* See the nir_op_fpow case for the explanation. Note that "left" from + * fpow is 2.0, so that simplifies things a lot. + */ + r.is_a_number = right.is_a_number && right.range == gt_zero; + + /* A lot more information is needed to prove that the result is finite. */ r.is_finite = false; - r.is_a_number = false; break; } @@ -1244,8 +1249,103 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, r.range = table[left.range][right.range]; /* Various cases can result in NaN, so assume the worst. */ - r.is_a_number = false; + /* Section 8.2 (Exponential Functions) of the GLSL 4.60 spec says (for + * pow): + * + * Results are undefined if x < 0. + * + * Results are undefined if x = 0 and y <= 0. + * + * IEEE 754 has a somewhat tighter requirement for pow. From + * https://pubs.opengroup.org/onlinepubs/9699919799.2016edition/functions/pow.html + * + * For finite values of x < 0, and finite non-integer values of y, a + * domain error shall occur and either a NaN (if representable), or + * an implementation-defined value shall be returned. + * + * If the correct value would cause overflow, a range error shall + * occur and pow(), powf(), and powl() shall return ±HUGE_VAL, + * ±HUGE_VALF, and ±HUGE_VALL, respectively, with the same sign as + * the correct value of the function. + * + * If the correct value would cause underflow, and is not + * representable, a range error may occur, and pow(), powf(), and + * powl() shall return 0.0, or (if IEC 60559 Floating-Point is not + * supported) an implementation-defined value no greater in magnitude + * than DBL_MIN, FLT_MIN, and LDBL_MIN, respectively. + * + * For y < 0, if x is zero, a pole error may occur and pow(), powf(), + * and powl() shall return ±HUGE_VAL, ±HUGE_VALF, and ±HUGE_VALL, + * respectively. On systems that support the IEC 60559 + * Floating-Point option, if x is ±0, a pole error shall occur and + * pow(), powf(), and powl() shall return ±HUGE_VAL, ±HUGE_VALF, and + * ±HUGE_VALL, respectively if y is an odd integer, or HUGE_VAL, + * HUGE_VALF, and HUGE_VALL, respectively if y is not an odd + * integer. + * + * If x or y is a NaN, a NaN shall be returned (unless specified + * elsewhere in this description). + * + * For any value of y (including NaN), if x is +1, 1.0 shall be + * returned. + * + * For any value of x (including NaN), if y is ±0, 1.0 shall be + * returned. + * + * For any odd integer value of y > 0, if x is ±0, ±0 shall be + * returned. + * + * For y > 0 and not an odd integer, if x is ±0, +0 shall be + * returned. + * + * If x is -1, and y is ±Inf, 1.0 shall be returned. + * + * For |x| < 1, if y is -Inf, +Inf shall be returned. + * + * For |x| > 1, if y is -Inf, +0 shall be returned. + * + * For |x| < 1, if y is +Inf, +0 shall be returned. + * + * For |x| > 1, if y is +Inf, +Inf shall be returned. + * + * For y an odd integer < 0, if x is -Inf, -0 shall be returned. + * + * For y < 0 and not an odd integer, if x is -Inf, +0 shall be + * returned. + * + * For y an odd integer > 0, if x is -Inf, -Inf shall be returned. + * + * For y > 0 and not an odd integer, if x is -Inf, +Inf shall be + * returned. + * + * For y < 0, if x is +Inf, +0 shall be returned. + * + * For y > 0, if x is +Inf, +Inf shall be returned. + * + * If the correct value would cause underflow, and is representable, + * a range error may occur and the correct value shall be returned. + * + * HLSL documentation (see + * https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-pow) + * says pow(0, 0) can be 0, 1, or NaN depending on the GPU. HLSL also + * deviates from IEEE in that, for x < 0, pow(x, 1.0) is NaN. + * + * Assume that algebraic optimizations will handle the pow(1.0, ...) + * and pow(..., 0.0) cases. The remaining sets of rules can be + * condensed to: + * + * - Both x and y must be numbers. + * + * - x > 0 or x is not negative and y > 0. + * + * FINISHME: HLSL also says that for y > 0, pow(0, y) = 0, and for y < + * 0, pow(0, y) = Inf. Can we relax the rule to, "x > 0 or x is not + * negative and y is not zero?" + */ + r.is_a_number = (left.is_a_number && right.is_a_number) && + (left.range == gt_zero || + (is_not_negative(left.range) && right.range == gt_zero)); break; } -- cgit v1.2.3