backend: refine remquo to pass cft

do the thing according to spec Signed-off-by: rander.wang <rander.wang@intel.com> Tested-by: Yang Rong <rong.r.yang@intel.com>
author: rander <rander.wang@intel.com> 2017-04-12 16:43:14 +0800
committer: Yang Rong <rong.r.yang@intel.com> 2017-05-04 19:08:31 +0800
commit: e04f8daf110215b9b4de0f0d9baf76245cfef626 (patch)
tree: 852b6cb9d49686a177d0e5dee562d86007fe5216
parent: 2f77bfe64c164579e9baa07d78d4d2f9c63f2e95 (diff)
2 files changed, 396 insertions, 65 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index bd2a264a..14414af6 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -4528,10 +4528,146 @@ OVERLOADABLE double modf(double x, private double *i)
 	return x - retVal;
 }
 
+double __fmod (double x, double y, int* ik)
+{
+	const double one = 1.0, Zero[] = {0.0, -0.0,};
+	int n,hx,hy,hz,ix,iy,sx,i;
+	uint lx,ly,lz;
+	int ifactor = 0;
+
+	hx = __HI(x);
+	lx = __LO(x);
+	hy = __HI(y);
+	ly = __LO(y);
+	sx = hx&0x80000000; 	/* sign of x */
+	hx ^=sx;		/* |x| */
+	hy &= 0x7fffffff;	/* |y| */
+
+	/* purge off exception values */
+	if((hy|ly)==0||(hx>=0x7ff00000)||	/* y=0,or x not finite */
+		((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */
+		return (x*y)/(x*y);
+	if(hx<=hy) {
+		if((hx<hy)||(lx<ly)) return x;	/* |x|<|y| return x */
+		if(lx==ly)
+		{
+			*ik = 2;
+			return Zero[(uint)sx>>31];	/* |x|=|y| return x*0*/
+		}
+	}
+
+	/* determine ix = ilogb(x) */
+	if(hx<0x00100000) { /* subnormal x */
+		if(hx==0) {
+		for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
+		} else {
+		for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
+		}
+	} else ix = (hx>>20)-1023;
+
+	/* determine iy = ilogb(y) */
+	if(hy<0x00100000) { /* subnormal y */
+		if(hy==0) {
+		for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
+		} else {
+		for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
+		}
+	} else iy = (hy>>20)-1023;
+
+	/* set up {hx,lx}, {hy,ly} and align y to x */
+	if(ix >= -1022) 
+		hx = 0x00100000|(0x000fffff&hx);
+	else {		/* subnormal x, shift x to normal */
+		n = -1022-ix;
+		if(n<=31) {
+			hx = (hx<<n)|(lx>>(32-n));
+			lx <<= n;
+		} else {
+		hx = lx<<(n-32);
+		lx = 0;
+		}
+	}
+	if(iy >= -1022) 
+		hy = 0x00100000|(0x000fffff&hy);
+	else {		/* subnormal y, shift y to normal */
+		n = -1022-iy;
+		if(n<=31) {
+			hy = (hy<<n)|(ly>>(32-n));
+			ly <<= n;
+		} else {
+		hy = ly<<(n-32);
+		ly = 0;
+		}
+	}
+
+	/* fix point fmod */
+	n = ix - iy;
+	while(n--)
+	{
+		hz=hx-hy;lz=lx-ly;
+		if(lx<ly) hz -= 1;
+
+		ifactor <<= 1;
+		if(hz<0)
+		{
+			hx = hx+hx+(lx>>31);
+			lx = lx+lx;
+		}
+		else
+		{
+			if(n<16)ifactor |= 1;
+
+			if((hz|lz)==0)		/* return sign(x)*0 */
+			{
+				*ik = (ifactor << (n + 2));
+				return Zero[(uint)sx>>31];
+			}
+
+			hx = hz+hz+(lz>>31); lx = lz+lz;
+		}
+	}
+
+	hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+	if(hz>=0){hx=hz;lx=lz;ifactor <<= 1; ifactor |= 1;}
+	else ifactor = (ifactor << 1);
+	
+	ifactor = (ifactor << 1);
+	*ik = ifactor;
+	/* convert back to floating value and restore the sign */
+	if((hx|lx)==0)			/* return sign(x)*0 */
+		return Zero[(uint)sx>>31];	
+	while(hx<0x00100000) {		/* normalize x */
+		hx = hx+hx+(lx>>31); lx = lx+lx;
+		iy -= 1;
+	}
+	if(iy>= -1022) {	/* normalize output */
+		hx = ((hx-0x00100000)|((iy+1023)<<20));
+		__setHigh(&x,hx|sx);
+		__setLow(&x, lx);
+	} else {		/* subnormal output */
+		n = -1022 - iy;
+		if(n<=20) {
+		lx = (lx>>n)|((uint)hx<<(32-n));
+		hx >>= n;
+		} else if (n<=31) {
+		lx = (hx<<(32-n))|(lx>>n); hx = sx;
+		} else {
+		lx = hx>>(n-32); hx = sx;
+		}
+	__setHigh(&x,hx|sx);
+	__setLow(&x, lx);
+
+		x *= one;		/* create necessary signal */
+	}
+	return x;		/* exact output */
+
+}
+
+
 OVERLOADABLE double remquo(double x, double p, global int *quo)
 {
-	int hx,hp;
-	unsigned sx,lx,lp;
+	int hx,hp, ik = 0;
+	unsigned sx,sy, lx,lp;
 	double p_half, zero = 0.0;
 	double xx = x;
 
@@ -4540,140 +4676,185 @@ OVERLOADABLE double remquo(double x, double p, global int *quo)
 	hp = __HI(p);		/* high word of p */
 	lp = __LO(p);		/* low  word of p */
 	sx = hx&0x80000000;
+	sy = hp&0x80000000;
 	hp &= 0x7fffffff;
 	hx &= 0x7fffffff;
 
 	/* purge off exception values */
-	if((hp|lp)==0) return (x*p)/(x*p); 	/* p = 0 */
+	if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);} 	/* p = 0 */
 	if((hx>=0x7ff00000)||			/* x not finite */
 	  ((hp>=0x7ff00000)&&			/* p is NaN */
 	  (((hp-0x7ff00000)|lp)!=0)))
-		return (x*p)/(x*p);
+		{*quo = 0; return (x*p)/(x*p);} 	
 
 
-	if (hp<=0x7fdfffff) x = fmod(x,p+p);	/* now x < 2p */
-	if (((hx-hp)|(lx-lp))==0) return zero*x;
+	if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik);	/* now x < 2p */
+
 	x  = fabs(x);
 	p  = fabs(p);
 	if (hp<0x00200000)
+	{
+		if(x+x>p)
 		{
-		if(x+x>p) {
-		x-=p;
-		if(x+x>=p) x -= p;
+			x-=p;
+			ik += 1;
+			if(x+x>=p)
+			{
+				x -= p;
+				ik += 1;
+			}
 		}
 	}
-		else
-		{
+	else
+	{
 		p_half = 0.5*p;
 		if(x>p_half)
+		{
+			x-=p;
+			ik += 1;
+			if(x>=p_half)
 			{
-		x-=p;
-		if(x>=p_half) x -= p;
+				ik += 1;
+				x -= p;
+			}
 		}
 	}
 	__setHigh(&x,  __HI(x) ^sx);
-		*quo = (xx -x)/p;
+
+	ik &= 0x7f;
+	if(sx != sy) ik = -ik;
+	*quo = ik;
 	return x;
 }
 
 OVERLOADABLE double remquo(double x, double p, local int *quo)
 {
-	int hx,hp;
-	unsigned sx,lx,lp;
+	int hx,hp, ik = 0;
+	unsigned sx,sy, lx,lp;
 	double p_half, zero = 0.0;
 	double xx = x;
 
 	hx = __HI(x);		/* high word of x */
-	lx = __LO(x);		/* low  word of x */
+	lx = __LO(x);		/* low	word of x */
 	hp = __HI(p);		/* high word of p */
-	lp = __LO(p);		/* low  word of p */
+	lp = __LO(p);		/* low	word of p */
 	sx = hx&0x80000000;
+	sy = hp&0x80000000;
 	hp &= 0x7fffffff;
 	hx &= 0x7fffffff;
 
 	/* purge off exception values */
-	if((hp|lp)==0) return (x*p)/(x*p); 	/* p = 0 */
+	if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);}	/* p = 0 */
 	if((hx>=0x7ff00000)||			/* x not finite */
 	  ((hp>=0x7ff00000)&&			/* p is NaN */
 	  (((hp-0x7ff00000)|lp)!=0)))
-		return (x*p)/(x*p);
+		{*quo = 0; return (x*p)/(x*p);} 	
 
 
-	if (hp<=0x7fdfffff) x = fmod(x,p+p);	/* now x < 2p */
-	if (((hx-hp)|(lx-lp))==0) return zero*x;
+	if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik); /* now x < 2p */
+
 	x  = fabs(x);
 	p  = fabs(p);
 	if (hp<0x00200000)
+	{
+		if(x+x>p)
 		{
-		if(x+x>p) {
-		x-=p;
-		if(x+x>=p) x -= p;
+			x-=p;
+			ik += 1;
+			if(x+x>=p)
+			{
+				x -= p;
+				ik += 1;
+			}
 		}
 	}
-		else
-		{
+	else
+	{
 		p_half = 0.5*p;
 		if(x>p_half)
+		{
+			x-=p;
+			ik += 1;
+			if(x>=p_half)
 			{
-		x-=p;
-		if(x>=p_half) x -= p;
+				ik += 1;
+				x -= p;
+			}
 		}
 	}
 	__setHigh(&x,  __HI(x) ^sx);
-		*quo = (xx -x)/p;
-	return x;
 
+	ik &= 0x7f;
+	if(sx != sy) ik = -ik;
+	*quo = ik;
+	return x;
 }
 
+
 OVERLOADABLE double remquo(double x, double p, private int *quo)
 {
-	int hx,hp;
-	unsigned sx,lx,lp;
+	int hx,hp, ik = 0;
+	unsigned sx,sy, lx,lp;
 	double p_half, zero = 0.0;
 	double xx = x;
 
 	hx = __HI(x);		/* high word of x */
-	lx = __LO(x);		/* low  word of x */
+	lx = __LO(x);		/* low	word of x */
 	hp = __HI(p);		/* high word of p */
-	lp = __LO(p);		/* low  word of p */
+	lp = __LO(p);		/* low	word of p */
 	sx = hx&0x80000000;
+	sy = hp&0x80000000;
 	hp &= 0x7fffffff;
 	hx &= 0x7fffffff;
 
 	/* purge off exception values */
-	if((hp|lp)==0) return (x*p)/(x*p); 	/* p = 0 */
+	if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);}	/* p = 0 */
 	if((hx>=0x7ff00000)||			/* x not finite */
 	  ((hp>=0x7ff00000)&&			/* p is NaN */
 	  (((hp-0x7ff00000)|lp)!=0)))
-		return (x*p)/(x*p);
+		{*quo = 0; return (x*p)/(x*p);} 	
+
 
+	if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik); /* now x < 2p */
 
-	if (hp<=0x7fdfffff) x = fmod(x,p+p);	/* now x < 2p */
-	if (((hx-hp)|(lx-lp))==0) return zero*x;
 	x  = fabs(x);
 	p  = fabs(p);
 	if (hp<0x00200000)
+	{
+		if(x+x>p)
 		{
-		if(x+x>p) {
-		x-=p;
-		if(x+x>=p) x -= p;
+			x-=p;
+			ik += 1;
+			if(x+x>=p)
+			{
+				x -= p;
+				ik += 1;
+			}
 		}
 	}
-		else
-		{
+	else
+	{
 		p_half = 0.5*p;
 		if(x>p_half)
+		{
+			x-=p;
+			ik += 1;
+			if(x>=p_half)
 			{
-		x-=p;
-		if(x>=p_half) x -= p;
+				ik += 1;
+				x -= p;
+			}
 		}
 	}
 	__setHigh(&x,  __HI(x) ^sx);
-		*quo = (xx -x)/p;
-	return x;
 
+	ik &= 0x7f;
+	if(sx != sy) ik = -ik;
+	*quo = ik;
+	return x;
 }
 
+
 OVERLOADABLE double sincos(double x, global double *cosval)
 {
 	*cosval = cos(x);
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
index f7a91c83..e01401b3 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
@@ -4041,55 +4041,205 @@ OVERLOADABLE double modf(double x, double *i)
 	return x - retVal;
 }
 
+double __fmod (double x, double y, int* ik)
+{
+	const double one = 1.0, Zero[] = {0.0, -0.0,};
+	int n,hx,hy,hz,ix,iy,sx,i;
+	uint lx,ly,lz;
+	int ifactor = 0;
+
+	hx = __HI(x);
+	lx = __LO(x);
+	hy = __HI(y);
+	ly = __LO(y);
+	sx = hx&0x80000000; 	/* sign of x */
+	hx ^=sx;		/* |x| */
+	hy &= 0x7fffffff;	/* |y| */
+
+	/* purge off exception values */
+	if((hy|ly)==0||(hx>=0x7ff00000)||	/* y=0,or x not finite */
+		((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */
+		return (x*y)/(x*y);
+	if(hx<=hy) {
+		if((hx<hy)||(lx<ly)) return x;	/* |x|<|y| return x */
+		if(lx==ly)
+		{
+			*ik = 2;
+			return Zero[(uint)sx>>31];	/* |x|=|y| return x*0*/
+		}
+	}
+
+	/* determine ix = ilogb(x) */
+	if(hx<0x00100000) { /* subnormal x */
+		if(hx==0) {
+		for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
+		} else {
+		for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
+		}
+	} else ix = (hx>>20)-1023;
+
+	/* determine iy = ilogb(y) */
+	if(hy<0x00100000) { /* subnormal y */
+		if(hy==0) {
+		for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
+		} else {
+		for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
+		}
+	} else iy = (hy>>20)-1023;
+
+	/* set up {hx,lx}, {hy,ly} and align y to x */
+	if(ix >= -1022) 
+		hx = 0x00100000|(0x000fffff&hx);
+	else {		/* subnormal x, shift x to normal */
+		n = -1022-ix;
+		if(n<=31) {
+			hx = (hx<<n)|(lx>>(32-n));
+			lx <<= n;
+		} else {
+		hx = lx<<(n-32);
+		lx = 0;
+		}
+	}
+	if(iy >= -1022) 
+		hy = 0x00100000|(0x000fffff&hy);
+	else {		/* subnormal y, shift y to normal */
+		n = -1022-iy;
+		if(n<=31) {
+			hy = (hy<<n)|(ly>>(32-n));
+			ly <<= n;
+		} else {
+		hy = ly<<(n-32);
+		ly = 0;
+		}
+	}
+
+	/* fix point fmod */
+	n = ix - iy;
+	while(n--)
+	{
+		hz=hx-hy;lz=lx-ly;
+		if(lx<ly) hz -= 1;
+
+		ifactor <<= 1;
+		if(hz<0)
+		{
+			hx = hx+hx+(lx>>31);
+			lx = lx+lx;
+		}
+		else
+		{
+			if(n<16)ifactor |= 1;
+
+			if((hz|lz)==0)		/* return sign(x)*0 */
+			{
+				*ik = (ifactor << (n + 2));
+				return Zero[(uint)sx>>31];
+			}
+
+			hx = hz+hz+(lz>>31); lx = lz+lz;
+		}
+	}
+
+	hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+	if(hz>=0){hx=hz;lx=lz;ifactor <<= 1; ifactor |= 1;}
+	else ifactor = (ifactor << 1);
+	
+	ifactor = (ifactor << 1);
+	*ik = ifactor;
+	/* convert back to floating value and restore the sign */
+	if((hx|lx)==0)			/* return sign(x)*0 */
+		return Zero[(uint)sx>>31];	
+	while(hx<0x00100000) {		/* normalize x */
+		hx = hx+hx+(lx>>31); lx = lx+lx;
+		iy -= 1;
+	}
+	if(iy>= -1022) {	/* normalize output */
+		hx = ((hx-0x00100000)|((iy+1023)<<20));
+		__setHigh(&x,hx|sx);
+		__setLow(&x, lx);
+	} else {		/* subnormal output */
+		n = -1022 - iy;
+		if(n<=20) {
+		lx = (lx>>n)|((uint)hx<<(32-n));
+		hx >>= n;
+		} else if (n<=31) {
+		lx = (hx<<(32-n))|(lx>>n); hx = sx;
+		} else {
+		lx = hx>>(n-32); hx = sx;
+		}
+	__setHigh(&x,hx|sx);
+	__setLow(&x, lx);
+
+		x *= one;		/* create necessary signal */
+	}
+	return x;		/* exact output */
+
+}
+
 OVERLOADABLE double remquo(double x, double p, int *quo)
 {
-	int hx,hp;
-	unsigned sx,lx,lp;
+	int hx,hp, ik = 0;
+	unsigned sx,sy, lx,lp;
 	double p_half, zero = 0.0;
 	double xx = x;
 
 	hx = __HI(x);		/* high word of x */
-	lx = __LO(x);		/* low  word of x */
+	lx = __LO(x);		/* low	word of x */
 	hp = __HI(p);		/* high word of p */
-	lp = __LO(p);		/* low  word of p */
+	lp = __LO(p);		/* low	word of p */
 	sx = hx&0x80000000;
+	sy = hp&0x80000000;
 	hp &= 0x7fffffff;
 	hx &= 0x7fffffff;
 
 	/* purge off exception values */
-	if((hp|lp)==0) return (x*p)/(x*p); 	/* p = 0 */
+	if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);}	/* p = 0 */
 	if((hx>=0x7ff00000)||			/* x not finite */
 	  ((hp>=0x7ff00000)&&			/* p is NaN */
 	  (((hp-0x7ff00000)|lp)!=0)))
-		return (x*p)/(x*p);
+		{*quo = 0; return (x*p)/(x*p);} 	
+
 
+	if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik); /* now x < 2p */
 
-	if (hp<=0x7fdfffff) x = fmod(x,p+p);	/* now x < 2p */
-	if (((hx-hp)|(lx-lp))==0) return zero*x;
 	x  = fabs(x);
 	p  = fabs(p);
 	if (hp<0x00200000)
+	{
+		if(x+x>p)
 		{
-		if(x+x>p) {
-		x-=p;
-		if(x+x>=p) x -= p;
+			x-=p;
+			ik += 1;
+			if(x+x>=p)
+			{
+				x -= p;
+				ik += 1;
+			}
 		}
 	}
-		else
-		{
+	else
+	{
 		p_half = 0.5*p;
 		if(x>p_half)
+		{
+			x-=p;
+			ik += 1;
+			if(x>=p_half)
 			{
-		x-=p;
-		if(x>=p_half) x -= p;
+				ik += 1;
+				x -= p;
+			}
 		}
 	}
 	__setHigh(&x,  __HI(x) ^sx);
-		*quo = (xx -x)/p;
-	return x;
 
+	ik &= 0x7f;
+	if(sx != sy) ik = -ik;
+	*quo = ik;
+	return x;
 }
 
+
 OVERLOADABLE double sincos(double x, double *cosval)
 {
 	*cosval = cos(x);
author	rander <rander.wang@intel.com>	2017-04-12 16:43:14 +0800
committer	Yang Rong <rong.r.yang@intel.com>	2017-05-04 19:08:31 +0800
commit	e04f8daf110215b9b4de0f0d9baf76245cfef626 (patch)
tree	852b6cb9d49686a177d0e5dee562d86007fe5216
parent	2f77bfe64c164579e9baa07d78d4d2f9c63f2e95 (diff)