summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--backend/src/libocl/tmpl/ocl_math_common.tmpl.cl202
1 files changed, 201 insertions, 1 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index b020ef1a..f7828946 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -1519,9 +1519,209 @@ OVERLOADABLE double floor(double x)
}
}
+/*-
+ * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+struct dd {
+ double hi;
+ double lo;
+};
+
+static inline struct dd
+dd_add(double a, double b)
+{
+ struct dd ret;
+ double s;
+
+ ret.hi = a + b;
+ s = ret.hi - a;
+ ret.lo = (a - (ret.hi - s)) + (b - s);
+ return (ret);
+}
+
+static inline double
+add_adjusted(double a, double b)
+{
+ struct dd sum;
+ long hibits, lobits;
+
+ sum = dd_add(a, b);
+ if (sum.lo != 0) {
+ hibits = as_long(sum.hi);
+ if ((hibits & 1) == 0) {
+ /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
+ lobits = as_long(sum.lo);
+ hibits += 1 - ((hibits ^ lobits) >> 62);
+ sum.hi = as_double(hibits);
+ }
+ }
+ return (sum.hi);
+}
+
+static inline double
+add_and_denormalize(double a, double b, int scale)
+{
+ struct dd sum;
+ long hibits, lobits;
+ int bits_lost;
+
+ sum = dd_add(a, b);
+
+ if (sum.lo != 0) {
+ hibits = as_long(sum.hi);
+ bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1;
+ if ((bits_lost != 1) ^ (int)(hibits & 1)) {
+ /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
+ lobits = as_long(sum.lo);
+ hibits += 1 - (((hibits ^ lobits) >> 62) & 2);
+ sum.hi = as_double(hibits);
+ }
+ }
+ return (ldexp(sum.hi, scale));
+}
+
+static inline struct dd
+dd_mul(double a, double b)
+{
+ const double split = 0x1p27 + 1.0;
+ struct dd ret;
+ double ha, hb, la, lb, p, q;
+
+ p = a * split;
+ ha = a - p;
+ ha += p;
+ la = a - ha;
+
+ p = b * split;
+ hb = b - p;
+ hb += p;
+ lb = b - hb;
+
+ p = ha * hb;
+ q = ha * lb + la * hb;
+
+ ret.hi = p + q;
+ ret.lo = p - ret.hi + q + la * lb;
+ return (ret);
+}
+
+double dd_add_rte(double a, double b)
+{
+ struct dd ret;
+ double s;
+
+ ret.hi = a + b;
+ s = ret.hi - a;
+ ret.lo = (a - (ret.hi - s)) + (b - s);
+ ulong hi = as_ulong(ret.hi);
+ ulong lo = as_ulong(ret.lo);
+ int hexp = (int)(((hi & 0x7FF0000000000000) >> 52) - 1023);
+ int lexp = (int)(((lo & 0x7FF0000000000000) >> 52) - 1023);
+ ulong hman = ((hi&0xFFFFFFFFFFFFF) |0x10000000000000);
+ ulong lman = ((lo&0xFFFFFFFFFFFFF) | 0x10000000000000);
+ long hsign = (hi & 0x8000000000000000);
+ long lsign = (lo & 0x8000000000000000);
+
+ if((hsign == lsign) && lo && (hexp -lexp == 54))
+ {
+ if((hi & 0x1) && lman)
+ hi += 1;
+ return as_double(hi);
+ }
+
+ return ret.hi;
+}
+
+double __frexp(double x, int *exp)
+{
+ double two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */
+ int hx, ix, lx;
+ hx = __HI(x);
+ ix = 0x7fffffff&hx;
+ lx = __LO(x);
+ *exp = 0;
+ if(ix>=0x7ff00000||((ix|lx)==0)) return x; /* 0,inf,nan */
+ if (ix<0x00100000) { /* subnormal */
+ x *= two54;
+ hx = __HI(x);
+ ix = hx&0x7fffffff;
+ *exp = -54;
+ }
+ *exp += (ix>>20)-1022;
+ hx = (hx&0x800fffff)|0x3fe00000;
+ __setHigh(&x, hx);
+ return x;
+}
+
OVERLOADABLE double fma(double x, double y, double z)
{
- return mad(x, y, z);
+ double xs, ys, zs, adj;
+ struct dd xy, r;
+ int oround;
+ int ex, ey, ez;
+ int spread;
+
+ if (x == 0.0 || y == 0.0)
+ return (x * y + z);
+ if (z == 0.0)
+ return (x * y);
+ if (!isfinite(x) || !isfinite(y))
+ return (x * y + z);
+ if (!isfinite(z))
+ return (z);
+
+ xs = __frexp(x, &ex);
+ ys = __frexp(y, &ey);
+ zs = __frexp(z, &ez);
+ spread = ex + ey - ez;
+
+ if (spread < -53) {
+ return (z);
+ }
+
+ if (spread <= 53 * 2)
+ zs = ldexp(zs, -spread);
+ else
+ zs = copysign(2.225073858507201383090e-308, zs);
+
+
+ xy = dd_mul(xs, ys);
+ r = dd_add(xy.hi, zs);
+
+ spread = ex + ey;
+
+ if (r.hi == 0.0) {
+ return (xy.hi + zs + ldexp(xy.lo, spread));
+ }
+
+ adj = add_adjusted(r.lo, xy.lo);
+ double base = dd_add_rte(r.hi, adj);
+ if (spread + ilogb(r.hi) > -1023)
+ return (ldexp(base, spread));
+ else
+ return (add_and_denormalize(r.hi, adj, spread));
}
OVERLOADABLE double hypot(double x, double y)