summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrander.wang <rander.wang@intel.com>2017-05-15 14:55:42 +0800
committerYang Rong <rong.r.yang@intel.com>2017-05-17 18:10:35 +0800
commit511071cd6e27437e0d2c96f73b64d4aa21dc452f (patch)
treedb5b2284f882f15e305799c33b050e280bf6b998
parent48f5c2024ad2d7752c2f8adc452167346d163e58 (diff)
backend: refine cos function
do it like sin function Signed-off-by: rander.wang <rander.wang@intel.com> Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/libocl/tmpl/ocl_math_common.tmpl.cl51
1 files changed, 25 insertions, 26 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index b3c61234..edb170fa 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -521,36 +521,35 @@ OVERLOADABLE float cos(float x)
if (__ocl_math_fastpath_flag)
return __gen_ocl_internal_fastpath_cos(x);
- const float pio4 = 7.8539812565e-01; /* 0x3f490fda */
- float y,z=0.0;
- int n, ix;
+ float y;
+ float na ;
+
+ uint n, ix;
x = __gen_ocl_fabs(x);
- GEN_OCL_GET_FLOAT_WORD(ix,x);
- ix &= 0x7fffffff;
+ /* cos(Inf or NaN) is NaN */
+ na = x -x;
- /* cos(Inf or NaN) is NaN */
- if (ix >= 0x7f800000) return x-x;
+ uint n0, n1;
+ float v;
- if(x <= pio4)
- return __kernel_cosf(x, 0.f);
- /* argument reduction needed */
- else {
- n = __ieee754_rem_pio2f(x,&y);
- n &= 3;
- float c = __kernel_cosf(y, 0.0f);
- float s = __kernel_sinf(y);
- float v = (n&1) ? s : c;
- /* n&3 return
- 0 cos(y)
- 1 -sin(y)
- 2 -cos(y)
- 3 sin(y)
- */
- int mask = (n>>1) ^ n;
- float sign = (mask&1) ? -1.0f : 1.0f;
- return sign * v;
- }
+ n = __ieee754_rem_pio2f(x,&y);
+ float s = __kernel_sinf(y);
+ float c = sqrt(fabs(mad(s, s, -1.0f)));
+
+ n0 = (n&0x1);
+ n1 = (n&0x2);
+
+ float ss = n1 - 1.0f;
+ v = (n0)?s:-c;
+
+ /* n&3 return
+ 0 cos(y)
+ 1 -sin(y)
+ 2 -cos(y)
+ 3 sin(y)
+ */
+ return mad(v, ss, na);
}
float __kernel_tanf(float x, float y, int iy)