libocl: Imporve precision of exp()

This patch reverts most logic in 500843d36ab6631d71570130c0c08048f9b8f3fe It seems native_exp will lose some precision which can make it not satisfy OpenCL Spec. These kind of cases often come from other function that invoke internal_exp() like sinh/cosh. Signed-off-by: Ruiling Song <ruiling.song@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
author: Ruiling Song <ruiling.song@intel.com> 2014-12-22 10:00:39 +0800
committer: Zhigang Gong <zhigang.gong@intel.com> 2014-12-22 12:19:31 +0800
commit: c661cd104e007fc71f06badca7d99a29ae3ccc59 (patch)
tree: c80ca28cdd18e5398022a435b5b5c0b8c07822ae
parent: 336e6711ab4e1c3da2ee921cb779713c504d85f3 (diff)
1 files changed, 27 insertions, 9 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 4d05c07b..c7be47e7 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -1940,8 +1940,15 @@ OVERLOADABLE float __gen_ocl_internal_exp(float x) {
   float o_threshold = 8.8721679688e+01,  /* 0x42b17180 */
   u_threshold = -1.0397208405e+02,  /* 0xc2cff1b5 */
   twom100 = 7.8886090522e-31, 	 /* 2**-100=0x0d800000 */
-  ivln2	 =	1.4426950216e+00; /* 0x3fb8aa3b =1/ln2 */
-  float y,hi=0.0,lo=0.0,t;
+  ivln2	 =	1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */
+  one = 1.0,
+  huge = 1.0e+30,
+  P1 = 1.6666667163e-01, /* 0x3e2aaaab */
+  P2 = -2.7777778450e-03, /* 0xbb360b61 */
+  P3 = 6.6137559770e-05, /* 0x388ab355 */
+  P4 = -1.6533901999e-06, /* 0xb5ddea0e */
+  P5 =	4.1381369442e-08; /* 0x3331bb4c */
+  float y,hi=0.0,lo=0.0,c,t;
   int k=0,xsb;
   unsigned hx;
   float ln2HI_0 = 6.9313812256e-01;	/* 0x3f317180 */
@@ -1957,16 +1964,17 @@ OVERLOADABLE float __gen_ocl_internal_exp(float x) {
 
   /* filter out non-finite argument */
   if(hx >= 0x42b17218) {			/* if |x|>=88.721... */
-    // native_exp already handled this
-    return native_exp(x);
+    if(hx>0x7f800000)
+      return x+x;			/* NaN */
+    if(hx==0x7f800000)
+      return (xsb==0)? x:0.0; 	/* exp(+-inf)={inf,0} */
+    if(x > o_threshold) return huge*huge; /* overflow */
+    if(x < u_threshold) return twom100*twom100; /* underflow */
   }
-
   /* argument reduction */
   if(hx > 0x3eb17218) {		/* if  |x| > 0.5 ln2 */
     if(hx < 0x3F851592) {	/* and |x| < 1.5 ln2 */
-      hi = x-(xsb ==1 ? ln2HI_1 : ln2HI_0);
-      lo= xsb == 1? ln2LO_1 : ln2LO_0;
-      k = 1-xsb-xsb;
+      hi = x-(xsb ==1 ? ln2HI_1 : ln2HI_0); lo= xsb == 1? ln2LO_1 : ln2LO_0; k = 1-xsb-xsb;
     } else {
       float tmp = xsb == 1 ? half_1 : half_0;
       k  = ivln2*x+tmp;
@@ -1976,8 +1984,18 @@ OVERLOADABLE float __gen_ocl_internal_exp(float x) {
     }
     x  = hi - lo;
   }
+  else if(hx < 0x31800000)  { /* when |x|<2**-28 */
+    if(huge+x>one) return one+x;/* trigger inexact */
+  }
+  else k = 0;
 
-  y = native_exp(x);
+  /* x is now in primary range */
+  t  = x*x;
+  c  = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
+  if(k==0)
+    return one-((x*c)/(c-(float)2.0)-x);
+  else
+    y = one-((lo-(x*c)/((float)2.0-c))-hi);
   if(k >= -125) {
     unsigned hy;
     GEN_OCL_GET_FLOAT_WORD(hy,y);
author	Ruiling Song <ruiling.song@intel.com>	2014-12-22 10:00:39 +0800
committer	Zhigang Gong <zhigang.gong@intel.com>	2014-12-22 12:19:31 +0800
commit	c661cd104e007fc71f06badca7d99a29ae3ccc59 (patch)
tree	c80ca28cdd18e5398022a435b5b5c0b8c07822ae
parent	336e6711ab4e1c3da2ee921cb779713c504d85f3 (diff)