From 511071cd6e27437e0d2c96f73b64d4aa21dc452f Mon Sep 17 00:00:00 2001 From: "rander.wang" Date: Mon, 15 May 2017 14:55:42 +0800 Subject: backend: refine cos function do it like sin function Signed-off-by: rander.wang Tested-by: Yang Rong --- backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 51 ++++++++++++------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl index b3c61234..edb170fa 100644 --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl @@ -521,36 +521,35 @@ OVERLOADABLE float cos(float x) if (__ocl_math_fastpath_flag) return __gen_ocl_internal_fastpath_cos(x); - const float pio4 = 7.8539812565e-01; /* 0x3f490fda */ - float y,z=0.0; - int n, ix; + float y; + float na ; + + uint n, ix; x = __gen_ocl_fabs(x); - GEN_OCL_GET_FLOAT_WORD(ix,x); - ix &= 0x7fffffff; + /* cos(Inf or NaN) is NaN */ + na = x -x; - /* cos(Inf or NaN) is NaN */ - if (ix >= 0x7f800000) return x-x; + uint n0, n1; + float v; - if(x <= pio4) - return __kernel_cosf(x, 0.f); - /* argument reduction needed */ - else { - n = __ieee754_rem_pio2f(x,&y); - n &= 3; - float c = __kernel_cosf(y, 0.0f); - float s = __kernel_sinf(y); - float v = (n&1) ? s : c; - /* n&3 return - 0 cos(y) - 1 -sin(y) - 2 -cos(y) - 3 sin(y) - */ - int mask = (n>>1) ^ n; - float sign = (mask&1) ? -1.0f : 1.0f; - return sign * v; - } + n = __ieee754_rem_pio2f(x,&y); + float s = __kernel_sinf(y); + float c = sqrt(fabs(mad(s, s, -1.0f))); + + n0 = (n&0x1); + n1 = (n&0x2); + + float ss = n1 - 1.0f; + v = (n0)?s:-c; + + /* n&3 return + 0 cos(y) + 1 -sin(y) + 2 -cos(y) + 3 sin(y) + */ + return mad(v, ss, na); } float __kernel_tanf(float x, float y, int iy) -- cgit v1.2.3