diff options
author | rander <rander.wang@intel.com> | 2017-04-12 16:29:33 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-05-04 19:08:31 +0800 |
commit | 2f77bfe64c164579e9baa07d78d4d2f9c63f2e95 (patch) | |
tree | 2a9a5d8f7af478cafdcfe8169ea6f67cb0676943 | |
parent | 2b80b5fb3a5ae54f23b1a0bb1bf208bd3a99f55d (diff) |
backend: refine fma to pass cft
port from openlibm and refine it to round to even
Signed-off-by: rander.wang <rander.wang@intel.com>
Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 202 |
1 files changed, 201 insertions, 1 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl index b020ef1a..f7828946 100644 --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl @@ -1519,9 +1519,209 @@ OVERLOADABLE double floor(double x) } } +/*- + * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +struct dd { + double hi; + double lo; +}; + +static inline struct dd +dd_add(double a, double b) +{ + struct dd ret; + double s; + + ret.hi = a + b; + s = ret.hi - a; + ret.lo = (a - (ret.hi - s)) + (b - s); + return (ret); +} + +static inline double +add_adjusted(double a, double b) +{ + struct dd sum; + long hibits, lobits; + + sum = dd_add(a, b); + if (sum.lo != 0) { + hibits = as_long(sum.hi); + if ((hibits & 1) == 0) { + /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ + lobits = as_long(sum.lo); + hibits += 1 - ((hibits ^ lobits) >> 62); + sum.hi = as_double(hibits); + } + } + return (sum.hi); +} + +static inline double +add_and_denormalize(double a, double b, int scale) +{ + struct dd sum; + long hibits, lobits; + int bits_lost; + + sum = dd_add(a, b); + + if (sum.lo != 0) { + hibits = as_long(sum.hi); + bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1; + if ((bits_lost != 1) ^ (int)(hibits & 1)) { + /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ + lobits = as_long(sum.lo); + hibits += 1 - (((hibits ^ lobits) >> 62) & 2); + sum.hi = as_double(hibits); + } + } + return (ldexp(sum.hi, scale)); +} + +static inline struct dd +dd_mul(double a, double b) +{ + const double split = 0x1p27 + 1.0; + struct dd ret; + double ha, hb, la, lb, p, q; + + p = a * split; + ha = a - p; + ha += p; + la = a - ha; + + p = b * split; + hb = b - p; + hb += p; + lb = b - hb; + + p = ha * hb; + q = ha * lb + la * hb; + + ret.hi = p + q; + ret.lo = p - ret.hi + q + la * lb; + return (ret); +} + +double dd_add_rte(double a, double b) +{ + struct dd ret; + double s; + + ret.hi = a + b; + s = ret.hi - a; + ret.lo = (a - (ret.hi - s)) + (b - s); + ulong hi = as_ulong(ret.hi); + ulong lo = as_ulong(ret.lo); + int hexp = (int)(((hi & 0x7FF0000000000000) >> 52) - 1023); + int lexp = (int)(((lo & 0x7FF0000000000000) >> 52) - 1023); + ulong hman = ((hi&0xFFFFFFFFFFFFF) |0x10000000000000); + ulong lman = ((lo&0xFFFFFFFFFFFFF) | 0x10000000000000); + long hsign = (hi & 0x8000000000000000); + long lsign = (lo & 0x8000000000000000); + + if((hsign == lsign) && lo && (hexp -lexp == 54)) + { + if((hi & 0x1) && lman) + hi += 1; + return as_double(hi); + } + + return ret.hi; +} + +double __frexp(double x, int *exp) +{ + double two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */ + int hx, ix, lx; + hx = __HI(x); + ix = 0x7fffffff&hx; + lx = __LO(x); + *exp = 0; + if(ix>=0x7ff00000||((ix|lx)==0)) return x; /* 0,inf,nan */ + if (ix<0x00100000) { /* subnormal */ + x *= two54; + hx = __HI(x); + ix = hx&0x7fffffff; + *exp = -54; + } + *exp += (ix>>20)-1022; + hx = (hx&0x800fffff)|0x3fe00000; + __setHigh(&x, hx); + return x; +} + OVERLOADABLE double fma(double x, double y, double z) { - return mad(x, y, z); + double xs, ys, zs, adj; + struct dd xy, r; + int oround; + int ex, ey, ez; + int spread; + + if (x == 0.0 || y == 0.0) + return (x * y + z); + if (z == 0.0) + return (x * y); + if (!isfinite(x) || !isfinite(y)) + return (x * y + z); + if (!isfinite(z)) + return (z); + + xs = __frexp(x, &ex); + ys = __frexp(y, &ey); + zs = __frexp(z, &ez); + spread = ex + ey - ez; + + if (spread < -53) { + return (z); + } + + if (spread <= 53 * 2) + zs = ldexp(zs, -spread); + else + zs = copysign(2.225073858507201383090e-308, zs); + + + xy = dd_mul(xs, ys); + r = dd_add(xy.hi, zs); + + spread = ex + ey; + + if (r.hi == 0.0) { + return (xy.hi + zs + ldexp(xy.lo, spread)); + } + + adj = add_adjusted(r.lo, xy.lo); + double base = dd_add_rte(r.hi, adj); + if (spread + ilogb(r.hi) > -1023) + return (ldexp(base, spread)); + else + return (add_and_denormalize(r.hi, adj, spread)); } OVERLOADABLE double hypot(double x, double y) |