diff options
author | rander.wang <rander.wang@intel.com> | 2017-05-15 16:12:24 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-05-17 18:11:00 +0800 |
commit | 4e1b94f00ed0a09f62b94b5f6889a96c92355c46 (patch) | |
tree | 5ed32f1dd1d6fd482652451602e7a17c9a5b1774 | |
parent | cbe117343fa14351f4886cb8c9102f0ebbf8b340 (diff) |
backend: refine acos
refine algorithm to remove branch
Signed-off-by: rander.wang <rander.wang@intel.com>
Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl index 576033e9..fb6e412c 100644 --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl @@ -1187,10 +1187,15 @@ OVERLOADABLE float __gen_ocl_internal_asinpi(float x) { return __gen_ocl_internal_asin(x) / M_PI_F; } OVERLOADABLE float __gen_ocl_internal_acos(float x) { - if(x > 0.5) - return 2 * __gen_ocl_asin_util(native_sqrt((1-x)/2)); - else - return M_PI_2_F - __gen_ocl_internal_asin(x); + float absX = fabs(x); + float asinX2 =__gen_ocl_asin_util(x); + float tmp = __gen_ocl_asin_util(native_sqrt(mad(-0.5f, absX, 0.5f))); + float asinX1 = mad(2.0f ,tmp, -M_PI_2_F); + + float retVal = (x < 0.0f)?asinX1:-asinX1; + retVal = (absX > 0.5f)?retVal:asinX2; + retVal = (x <= 0.5f) ? M_PI_2_F - retVal:2.0f*tmp; + return retVal; } OVERLOADABLE float __gen_ocl_internal_acospi(float x) { return __gen_ocl_internal_acos(x) / M_PI_F; |