diff options
author | rander <rander.wang@intel.com> | 2017-04-12 16:43:14 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-05-04 19:08:31 +0800 |
commit | e04f8daf110215b9b4de0f0d9baf76245cfef626 (patch) | |
tree | 852b6cb9d49686a177d0e5dee562d86007fe5216 | |
parent | 2f77bfe64c164579e9baa07d78d4d2f9c63f2e95 (diff) |
backend: refine remquo to pass cft
do the thing according to spec
Signed-off-by: rander.wang <rander.wang@intel.com>
Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math.tmpl.cl | 277 | ||||
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math_20.tmpl.cl | 184 |
2 files changed, 396 insertions, 65 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl index bd2a264a..14414af6 100644 --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl @@ -4528,10 +4528,146 @@ OVERLOADABLE double modf(double x, private double *i) return x - retVal; } +double __fmod (double x, double y, int* ik) +{ + const double one = 1.0, Zero[] = {0.0, -0.0,}; + int n,hx,hy,hz,ix,iy,sx,i; + uint lx,ly,lz; + int ifactor = 0; + + hx = __HI(x); + lx = __LO(x); + hy = __HI(y); + ly = __LO(y); + sx = hx&0x80000000; /* sign of x */ + hx ^=sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + if((hy|ly)==0||(hx>=0x7ff00000)|| /* y=0,or x not finite */ + ((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */ + return (x*y)/(x*y); + if(hx<=hy) { + if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */ + if(lx==ly) + { + *ik = 2; + return Zero[(uint)sx>>31]; /* |x|=|y| return x*0*/ + } + } + + /* determine ix = ilogb(x) */ + if(hx<0x00100000) { /* subnormal x */ + if(hx==0) { + for (ix = -1043, i=lx; i>0; i<<=1) ix -=1; + } else { + for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1; + } + } else ix = (hx>>20)-1023; + + /* determine iy = ilogb(y) */ + if(hy<0x00100000) { /* subnormal y */ + if(hy==0) { + for (iy = -1043, i=ly; i>0; i<<=1) iy -=1; + } else { + for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1; + } + } else iy = (hy>>20)-1023; + + /* set up {hx,lx}, {hy,ly} and align y to x */ + if(ix >= -1022) + hx = 0x00100000|(0x000fffff&hx); + else { /* subnormal x, shift x to normal */ + n = -1022-ix; + if(n<=31) { + hx = (hx<<n)|(lx>>(32-n)); + lx <<= n; + } else { + hx = lx<<(n-32); + lx = 0; + } + } + if(iy >= -1022) + hy = 0x00100000|(0x000fffff&hy); + else { /* subnormal y, shift y to normal */ + n = -1022-iy; + if(n<=31) { + hy = (hy<<n)|(ly>>(32-n)); + ly <<= n; + } else { + hy = ly<<(n-32); + ly = 0; + } + } + + /* fix point fmod */ + n = ix - iy; + while(n--) + { + hz=hx-hy;lz=lx-ly; + if(lx<ly) hz -= 1; + + ifactor <<= 1; + if(hz<0) + { + hx = hx+hx+(lx>>31); + lx = lx+lx; + } + else + { + if(n<16)ifactor |= 1; + + if((hz|lz)==0) /* return sign(x)*0 */ + { + *ik = (ifactor << (n + 2)); + return Zero[(uint)sx>>31]; + } + + hx = hz+hz+(lz>>31); lx = lz+lz; + } + } + + hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1; + if(hz>=0){hx=hz;lx=lz;ifactor <<= 1; ifactor |= 1;} + else ifactor = (ifactor << 1); + + ifactor = (ifactor << 1); + *ik = ifactor; + /* convert back to floating value and restore the sign */ + if((hx|lx)==0) /* return sign(x)*0 */ + return Zero[(uint)sx>>31]; + while(hx<0x00100000) { /* normalize x */ + hx = hx+hx+(lx>>31); lx = lx+lx; + iy -= 1; + } + if(iy>= -1022) { /* normalize output */ + hx = ((hx-0x00100000)|((iy+1023)<<20)); + __setHigh(&x,hx|sx); + __setLow(&x, lx); + } else { /* subnormal output */ + n = -1022 - iy; + if(n<=20) { + lx = (lx>>n)|((uint)hx<<(32-n)); + hx >>= n; + } else if (n<=31) { + lx = (hx<<(32-n))|(lx>>n); hx = sx; + } else { + lx = hx>>(n-32); hx = sx; + } + __setHigh(&x,hx|sx); + __setLow(&x, lx); + + x *= one; /* create necessary signal */ + } + return x; /* exact output */ + +} + + OVERLOADABLE double remquo(double x, double p, global int *quo) { - int hx,hp; - unsigned sx,lx,lp; + int hx,hp, ik = 0; + unsigned sx,sy, lx,lp; double p_half, zero = 0.0; double xx = x; @@ -4540,140 +4676,185 @@ OVERLOADABLE double remquo(double x, double p, global int *quo) hp = __HI(p); /* high word of p */ lp = __LO(p); /* low word of p */ sx = hx&0x80000000; + sy = hp&0x80000000; hp &= 0x7fffffff; hx &= 0x7fffffff; /* purge off exception values */ - if((hp|lp)==0) return (x*p)/(x*p); /* p = 0 */ + if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);} /* p = 0 */ if((hx>=0x7ff00000)|| /* x not finite */ ((hp>=0x7ff00000)&& /* p is NaN */ (((hp-0x7ff00000)|lp)!=0))) - return (x*p)/(x*p); + {*quo = 0; return (x*p)/(x*p);} - if (hp<=0x7fdfffff) x = fmod(x,p+p); /* now x < 2p */ - if (((hx-hp)|(lx-lp))==0) return zero*x; + if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik); /* now x < 2p */ + x = fabs(x); p = fabs(p); if (hp<0x00200000) + { + if(x+x>p) { - if(x+x>p) { - x-=p; - if(x+x>=p) x -= p; + x-=p; + ik += 1; + if(x+x>=p) + { + x -= p; + ik += 1; + } } } - else - { + else + { p_half = 0.5*p; if(x>p_half) + { + x-=p; + ik += 1; + if(x>=p_half) { - x-=p; - if(x>=p_half) x -= p; + ik += 1; + x -= p; + } } } __setHigh(&x, __HI(x) ^sx); - *quo = (xx -x)/p; + + ik &= 0x7f; + if(sx != sy) ik = -ik; + *quo = ik; return x; } OVERLOADABLE double remquo(double x, double p, local int *quo) { - int hx,hp; - unsigned sx,lx,lp; + int hx,hp, ik = 0; + unsigned sx,sy, lx,lp; double p_half, zero = 0.0; double xx = x; hx = __HI(x); /* high word of x */ - lx = __LO(x); /* low word of x */ + lx = __LO(x); /* low word of x */ hp = __HI(p); /* high word of p */ - lp = __LO(p); /* low word of p */ + lp = __LO(p); /* low word of p */ sx = hx&0x80000000; + sy = hp&0x80000000; hp &= 0x7fffffff; hx &= 0x7fffffff; /* purge off exception values */ - if((hp|lp)==0) return (x*p)/(x*p); /* p = 0 */ + if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);} /* p = 0 */ if((hx>=0x7ff00000)|| /* x not finite */ ((hp>=0x7ff00000)&& /* p is NaN */ (((hp-0x7ff00000)|lp)!=0))) - return (x*p)/(x*p); + {*quo = 0; return (x*p)/(x*p);} - if (hp<=0x7fdfffff) x = fmod(x,p+p); /* now x < 2p */ - if (((hx-hp)|(lx-lp))==0) return zero*x; + if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik); /* now x < 2p */ + x = fabs(x); p = fabs(p); if (hp<0x00200000) + { + if(x+x>p) { - if(x+x>p) { - x-=p; - if(x+x>=p) x -= p; + x-=p; + ik += 1; + if(x+x>=p) + { + x -= p; + ik += 1; + } } } - else - { + else + { p_half = 0.5*p; if(x>p_half) + { + x-=p; + ik += 1; + if(x>=p_half) { - x-=p; - if(x>=p_half) x -= p; + ik += 1; + x -= p; + } } } __setHigh(&x, __HI(x) ^sx); - *quo = (xx -x)/p; - return x; + ik &= 0x7f; + if(sx != sy) ik = -ik; + *quo = ik; + return x; } + OVERLOADABLE double remquo(double x, double p, private int *quo) { - int hx,hp; - unsigned sx,lx,lp; + int hx,hp, ik = 0; + unsigned sx,sy, lx,lp; double p_half, zero = 0.0; double xx = x; hx = __HI(x); /* high word of x */ - lx = __LO(x); /* low word of x */ + lx = __LO(x); /* low word of x */ hp = __HI(p); /* high word of p */ - lp = __LO(p); /* low word of p */ + lp = __LO(p); /* low word of p */ sx = hx&0x80000000; + sy = hp&0x80000000; hp &= 0x7fffffff; hx &= 0x7fffffff; /* purge off exception values */ - if((hp|lp)==0) return (x*p)/(x*p); /* p = 0 */ + if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);} /* p = 0 */ if((hx>=0x7ff00000)|| /* x not finite */ ((hp>=0x7ff00000)&& /* p is NaN */ (((hp-0x7ff00000)|lp)!=0))) - return (x*p)/(x*p); + {*quo = 0; return (x*p)/(x*p);} + + if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik); /* now x < 2p */ - if (hp<=0x7fdfffff) x = fmod(x,p+p); /* now x < 2p */ - if (((hx-hp)|(lx-lp))==0) return zero*x; x = fabs(x); p = fabs(p); if (hp<0x00200000) + { + if(x+x>p) { - if(x+x>p) { - x-=p; - if(x+x>=p) x -= p; + x-=p; + ik += 1; + if(x+x>=p) + { + x -= p; + ik += 1; + } } } - else - { + else + { p_half = 0.5*p; if(x>p_half) + { + x-=p; + ik += 1; + if(x>=p_half) { - x-=p; - if(x>=p_half) x -= p; + ik += 1; + x -= p; + } } } __setHigh(&x, __HI(x) ^sx); - *quo = (xx -x)/p; - return x; + ik &= 0x7f; + if(sx != sy) ik = -ik; + *quo = ik; + return x; } + OVERLOADABLE double sincos(double x, global double *cosval) { *cosval = cos(x); diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl index f7a91c83..e01401b3 100644 --- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl @@ -4041,55 +4041,205 @@ OVERLOADABLE double modf(double x, double *i) return x - retVal; } +double __fmod (double x, double y, int* ik) +{ + const double one = 1.0, Zero[] = {0.0, -0.0,}; + int n,hx,hy,hz,ix,iy,sx,i; + uint lx,ly,lz; + int ifactor = 0; + + hx = __HI(x); + lx = __LO(x); + hy = __HI(y); + ly = __LO(y); + sx = hx&0x80000000; /* sign of x */ + hx ^=sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + if((hy|ly)==0||(hx>=0x7ff00000)|| /* y=0,or x not finite */ + ((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */ + return (x*y)/(x*y); + if(hx<=hy) { + if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */ + if(lx==ly) + { + *ik = 2; + return Zero[(uint)sx>>31]; /* |x|=|y| return x*0*/ + } + } + + /* determine ix = ilogb(x) */ + if(hx<0x00100000) { /* subnormal x */ + if(hx==0) { + for (ix = -1043, i=lx; i>0; i<<=1) ix -=1; + } else { + for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1; + } + } else ix = (hx>>20)-1023; + + /* determine iy = ilogb(y) */ + if(hy<0x00100000) { /* subnormal y */ + if(hy==0) { + for (iy = -1043, i=ly; i>0; i<<=1) iy -=1; + } else { + for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1; + } + } else iy = (hy>>20)-1023; + + /* set up {hx,lx}, {hy,ly} and align y to x */ + if(ix >= -1022) + hx = 0x00100000|(0x000fffff&hx); + else { /* subnormal x, shift x to normal */ + n = -1022-ix; + if(n<=31) { + hx = (hx<<n)|(lx>>(32-n)); + lx <<= n; + } else { + hx = lx<<(n-32); + lx = 0; + } + } + if(iy >= -1022) + hy = 0x00100000|(0x000fffff&hy); + else { /* subnormal y, shift y to normal */ + n = -1022-iy; + if(n<=31) { + hy = (hy<<n)|(ly>>(32-n)); + ly <<= n; + } else { + hy = ly<<(n-32); + ly = 0; + } + } + + /* fix point fmod */ + n = ix - iy; + while(n--) + { + hz=hx-hy;lz=lx-ly; + if(lx<ly) hz -= 1; + + ifactor <<= 1; + if(hz<0) + { + hx = hx+hx+(lx>>31); + lx = lx+lx; + } + else + { + if(n<16)ifactor |= 1; + + if((hz|lz)==0) /* return sign(x)*0 */ + { + *ik = (ifactor << (n + 2)); + return Zero[(uint)sx>>31]; + } + + hx = hz+hz+(lz>>31); lx = lz+lz; + } + } + + hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1; + if(hz>=0){hx=hz;lx=lz;ifactor <<= 1; ifactor |= 1;} + else ifactor = (ifactor << 1); + + ifactor = (ifactor << 1); + *ik = ifactor; + /* convert back to floating value and restore the sign */ + if((hx|lx)==0) /* return sign(x)*0 */ + return Zero[(uint)sx>>31]; + while(hx<0x00100000) { /* normalize x */ + hx = hx+hx+(lx>>31); lx = lx+lx; + iy -= 1; + } + if(iy>= -1022) { /* normalize output */ + hx = ((hx-0x00100000)|((iy+1023)<<20)); + __setHigh(&x,hx|sx); + __setLow(&x, lx); + } else { /* subnormal output */ + n = -1022 - iy; + if(n<=20) { + lx = (lx>>n)|((uint)hx<<(32-n)); + hx >>= n; + } else if (n<=31) { + lx = (hx<<(32-n))|(lx>>n); hx = sx; + } else { + lx = hx>>(n-32); hx = sx; + } + __setHigh(&x,hx|sx); + __setLow(&x, lx); + + x *= one; /* create necessary signal */ + } + return x; /* exact output */ + +} + OVERLOADABLE double remquo(double x, double p, int *quo) { - int hx,hp; - unsigned sx,lx,lp; + int hx,hp, ik = 0; + unsigned sx,sy, lx,lp; double p_half, zero = 0.0; double xx = x; hx = __HI(x); /* high word of x */ - lx = __LO(x); /* low word of x */ + lx = __LO(x); /* low word of x */ hp = __HI(p); /* high word of p */ - lp = __LO(p); /* low word of p */ + lp = __LO(p); /* low word of p */ sx = hx&0x80000000; + sy = hp&0x80000000; hp &= 0x7fffffff; hx &= 0x7fffffff; /* purge off exception values */ - if((hp|lp)==0) return (x*p)/(x*p); /* p = 0 */ + if((hp|lp)==0){*quo = 0; return (x*p)/(x*p);} /* p = 0 */ if((hx>=0x7ff00000)|| /* x not finite */ ((hp>=0x7ff00000)&& /* p is NaN */ (((hp-0x7ff00000)|lp)!=0))) - return (x*p)/(x*p); + {*quo = 0; return (x*p)/(x*p);} + + if (hp<=0x7fdfffff) x = __fmod(x,p+p, &ik); /* now x < 2p */ - if (hp<=0x7fdfffff) x = fmod(x,p+p); /* now x < 2p */ - if (((hx-hp)|(lx-lp))==0) return zero*x; x = fabs(x); p = fabs(p); if (hp<0x00200000) + { + if(x+x>p) { - if(x+x>p) { - x-=p; - if(x+x>=p) x -= p; + x-=p; + ik += 1; + if(x+x>=p) + { + x -= p; + ik += 1; + } } } - else - { + else + { p_half = 0.5*p; if(x>p_half) + { + x-=p; + ik += 1; + if(x>=p_half) { - x-=p; - if(x>=p_half) x -= p; + ik += 1; + x -= p; + } } } __setHigh(&x, __HI(x) ^sx); - *quo = (xx -x)/p; - return x; + ik &= 0x7f; + if(sx != sy) ik = -ik; + *quo = ik; + return x; } + OVERLOADABLE double sincos(double x, double *cosval) { *cosval = cos(x); |