diff options
author | rander.wang <rander.wang@intel.com> | 2017-05-15 16:26:08 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-05-17 18:11:12 +0800 |
commit | 448f8f7a2a2901aa8807a212760539889bc3ebb8 (patch) | |
tree | ae64b2b7684b37608210fb62416983d7cf8304a2 | |
parent | 733e9685ff6d8655d32f496a3bfad454a94b8c92 (diff) |
backend: refine asin function
refine the algorithm to remove unnecessary operations
Signed-off-by: rander.wang <rander.wang@intel.com>
Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 28 |
1 files changed, 7 insertions, 21 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl index 166ee9cc..9d4100e3 100644 --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl @@ -1160,28 +1160,14 @@ INLINE float __gen_ocl_asin_util(float x) { } OVERLOADABLE float __gen_ocl_internal_asin(float x) { - uint ix; - union { uint i; float f; } u; - u.f = x; - ix = u.i & 0x7fffffff; - if(ix == 0x3f800000) { - return x * M_PI_2_F; /* asin(|1|)=+-pi/2 with inexact */ - } - if(ix > 0x3f800000) { /* |x|>= 1 */ - return NAN; /* asin(|x|>1) is NaN */ - } - - if(ix < 0x32000000) { /* if |x| < 2**-27 */ - if(HUGE_VALF + x > FLT_ONE) return x; /* return x with inexact if x!=0*/ - } + float asinX2 =__gen_ocl_asin_util(x); + float absX = fabs(x); + float asinX1 = mad(2.0f , __gen_ocl_asin_util(native_sqrt(mad(-0.5f, absX, 0.5f))) , -M_PI_2_F); - if(x < -0.5) { - return 2 * __gen_ocl_asin_util(native_sqrt((1+x) / 2)) - M_PI_2_F; - } else if(x > 0.5) { - return M_PI_2_F - 2 * __gen_ocl_asin_util(native_sqrt((1-x) / 2)); - } else { - return __gen_ocl_asin_util(x); - } + float retVal = (x < 0.0f)?asinX1:-asinX1; + retVal = (absX > 0.5f)?retVal:asinX2; + retVal = (absX > 1.0f)?NAN:retVal; + return retVal; } OVERLOADABLE float __gen_ocl_internal_asinpi(float x) { return __gen_ocl_internal_asin(x) / M_PI_F; |