diff options
author | rander.wang <rander.wang@intel.com> | 2017-05-15 14:51:17 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-05-17 18:10:22 +0800 |
commit | 48f5c2024ad2d7752c2f8adc452167346d163e58 (patch) | |
tree | c8e7f03ca40f045d4db5c7f971e0a148ea2296b6 | |
parent | c56b5076fcccc2718ffa63c8d461614cbbc82e67 (diff) |
backend: refine sin function
(1)refine the NAN check
(2)using sqrt to get cos
(3)remove small range check
Signed-off-by: rander.wang <rander.wang@intel.com>
Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 42 |
1 files changed, 22 insertions, 20 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl index 7c449423..b3c61234 100644 --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl @@ -489,29 +489,31 @@ OVERLOADABLE float sin(float x) if (__ocl_math_fastpath_flag) return __gen_ocl_internal_fastpath_sin(x); - const float pio4 = 7.8539812565e-01; /* 0x3f490fda */ - float y,z=0.0; - int n, ix; - + float y; + float na ; + uint n, ix; float negative = x < 0.0f? -1.0f : 1.0f; x = fabs(x); - GEN_OCL_GET_FLOAT_WORD(ix,x); - ix &= 0x7fffffff; - - /* sin(Inf or NaN) is NaN */ - if (ix >= 0x7f800000) return x-x; - - if(x <= pio4) - return negative * __kernel_sinf(x); - /* argument reduction needed */ - else { - n = __ieee754_rem_pio2f(x,&y); - float s = __kernel_sinf(y); - float c = __kernel_cosf(y,0.0f); - float ret = (n&1) ? negative*c : negative*s; - return (n&3)> 1? -1.0f*ret : ret; - } + /* cos(Inf or NaN) is NaN */ + na = x -x; + + uint n0, n1; + float v; + n = __ieee754_rem_pio2f(x,&y); + float s = __kernel_sinf(y); + float c = sqrt(mad(-s, s, 1.0f)); + n0 = (n&0x1); + n1 = (n&0x2); + v = (n0)?c:s; + v = (n1)?-v:v; + /* n&3 return + 0 sin(y) + 1 cos(y) + 2 -sin(y) + 3 -cos(y) + */ + return mad(v, negative, na); } OVERLOADABLE float cos(float x) |