summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@gmail.com>2014-05-30 08:59:13 +0800
committerZhigang Gong <zhigang.gong@gmail.com>2014-05-30 08:59:13 +0800
commitd696d67f8430e91179564fe6fb7a4c85c0373371 (patch)
treec47795a1354bdc6a7591f8dba7210f13e1fe77c6
parent568d34a1370cf2054b7776e7b2b7d47b775a5440 (diff)
use double to compute sin.double
Signed-off-by: Zhigang Gong <zhigang.gong@gmail.com>
-rwxr-xr-xbackend/src/ocl_stdlib.tmpl.h28
1 files changed, 28 insertions, 0 deletions
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 01bb3370..debd01f0 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -197,6 +197,14 @@ INLINE_OVERLOADABLE int __ocl_finitef (float x){
#define M_2_SQRTPI_F 1.1283791670955126F
#define M_SQRT2_F 1.4142135623730951F
#define M_SQRT1_2_F 0.7071067811865476F
+#if 1
+#define M_PI_D 3.141592653589793
+#define M_PI_2_D 1.5707963267948966
+#define M_PI_4_D 0.7853981633974483
+#define M_RCP_PI_D 0.31830988618379069
+#define M_RCP_PI_2_D 0.63661977236758138
+#define M_RCP_PI_4_D 1.2732395447351628
+#endif
/////////////////////////////////////////////////////////////////////////////
// OpenCL integer built-in macros
/////////////////////////////////////////////////////////////////////////////
@@ -1157,6 +1165,25 @@ __constant const int two_over_pi[22*9] = {
INLINE int __ieee754_rem_pio2f(float x, float *y) {
/* copied from fdlibm */
+#if 1
+ double norm;
+ norm = (double)x * M_RCP_PI_2_D;
+ float n = __gen_ocl_internal_floor((float)norm);
+ double rem = (double)x - n * M_PI_2_D;
+ if (rem >= -M_PI_4_D && rem <= M_PI_4_D) {
+ y[0] = rem;
+ y[1] = rem - (double)y[0];
+ } else if (rem < 0) {
+ rem += M_PI_2_D;
+ n += 1;
+ } else {
+ rem -= M_PI_2_D;
+ n -= 1;
+ }
+ y[0] = rem;
+ y[1] = rem - (double)y[0];
+ return n;
+#else
float z,w,t,r,fn;
float tx[3];
@@ -1255,6 +1282,7 @@ const float pio2_3t = 6.1232342629e-17;
n = __kernel_rem_pio2f(tx,y,e0,nx,2,two_over_pi);
if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
return n;
+#endif
}
INLINE_OVERLOADABLE float __kernel_sinf(float x, float y, int iy)