/* * Copyright (C) 2016 Marcin Koƛcielnicki * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "nvhw/fp.h" uint32_t fp32_sat(uint32_t x, bool fnz) { if (FP32_ISNAN(x)) return fnz ? 0 : FP32_CNAN; if (FP32_SIGN(x)) return 0; else if (x >= 0x3f800000) return 0x3f800000; else return x; } uint32_t fp32_rint(uint32_t x, enum fp_rm rm) { bool sx; int ex; uint32_t fx; if (FP32_ISNAN(x)) return FP32_CNAN; if (FP32_ISINF(x)) return x; fp32_parsefin(x, &sx, &ex, &fx, true); /* For ex larger than that, result is already an integer, and our shift would overflow. */ if (ex < FP32_MIDE + 23) { int shift = (FP32_MIDE + 23) - ex; fx = shr32(fx, shift, fp_adjust_rm(rm, sx)); if (fx == 0) { ex = 0; } else { ex = FP32_MIDE + 23; fx = norm32(fx, &ex, 23); } } return fp32_mkfin(sx, ex, fx, rm, true); } uint32_t fp32_minmax(uint32_t a, uint32_t b, bool min) { if (FP32_ISNAN(a)) { if (FP32_ISNAN(b)) return FP32_CNAN; a = b; } else if (FP32_ISNAN(b)) { b = a; } bool sa, sb; int ea, eb; uint32_t fa, fb; /* Flush denormals. */ fp32_parsefin(a, &sa, &ea, &fa, true); a = fp32_mkfin(sa, ea, fa, FP_RN, true); fp32_parsefin(b, &sb, &eb, &fb, true); b = fp32_mkfin(sb, eb, fb, FP_RN, true); bool flip = min; if (sa != sb) { /* Different signs, pick the positive one */ if (sa) flip = !flip; return flip ? b : a; } else { /* Same signs, compare exp & fract */ if (sa) flip = !flip; if (a < b) flip = !flip; return flip ? b : a; } } enum fp_cmp fp32_cmp(uint32_t a, uint32_t b, bool ftz) { if (FP32_ISNAN(a) || FP32_ISNAN(b)) return FP_UN; bool sa, sb; int ea, eb; uint32_t fa, fb; fp32_parsefin(a, &sa, &ea, &fa, ftz); fp32_parsefin(b, &sb, &eb, &fb, ftz); if (fa == 0 && fb == 0) { /* Zeros - equal regardless of sign. */ return FP_EQ; } else if (sa != sb) { /* Different signs, both finite or infinity. */ return sa ? FP_LT : FP_GT; } else if (ea == eb && fa == fb) { /* Get the equality case out of the way. */ return FP_EQ; } else { /* Same signs, both finite or infinity. */ if ((a < b) ^ sa) return FP_LT; else return FP_GT; } } uint32_t fp32_add(uint32_t a, uint32_t b, enum fp_rm rm) { if (FP32_ISNAN(a) || FP32_ISNAN(b)) return FP32_CNAN; if (FP32_ISINF(a)) { if (FP32_ISINF(b) && a != b) { /* Inf-Inf */ return FP32_CNAN; } return a; } if (FP32_ISINF(b)) return b; /* Two honest real numbers involved. */ bool sa, sb, sr; int ea, eb, er; uint32_t fa, fb; fp32_parsefin(a, &sa, &ea, &fa, true); fp32_parsefin(b, &sb, &eb, &fb, true); er = (ea > eb ? ea : eb) + 1; int32_t res = 0; fa = shr32(fa, er - ea - 4, FP_RT); fb = shr32(fb, er - eb - 4, FP_RT); res += sa ? -fa : fa; res += sb ? -fb : fb; if (res == 0) { /* Got a proper 0. */ er = 0; /* Make a -0 if both inputs were negative (ie. both -0). */ sr = sa && sb; } else { /* Compute sign, make sure accumulator is positive. */ sr = res < 0; if (sr) res = -res; /* 23 bits of input +1 bit of implicit 0 +3 bits for proper rounding +1 bit because we add two numbers --- 28 bit accumulator. Make sure bit 27 is actually set - normalize the number. */ res = norm32(res, &er, 27); /* Round it. */ res = shr32(res, 4, fp_adjust_rm(rm, sr)); } return fp32_mkfin(sr, er, res, rm, true); } uint32_t fp32_mul(uint32_t a, uint32_t b, enum fp_rm rm, bool zero_wins) { bool sa, sb, sr; int ea, eb, er; uint32_t fa, fb; fp32_parsefin(a, &sa, &ea, &fa, true); fp32_parsefin(b, &sb, &eb, &fb, true); if (zero_wins && (fa == 0 || fb == 0)) return 0; if (FP32_ISNAN(a) || FP32_ISNAN(b)) return FP32_CNAN; sr = sa ^ sb; if (FP32_ISINF(a) || FP32_ISINF(b)) { if (fa == 0 || fb == 0) { /* Inf*0 is NaN */ return FP32_CNAN; } else { /* Inf*finite and Inf*Inf are +-Inf */ return FP32_INF(sr); } } er = ea + eb - FP32_MIDE + 1; uint64_t res = (uint64_t)fa * fb; if (res == 0) { er = 0; } else { /* We multiplied two 24-bit numbers starting with 1s. The first 1 has to be at either 46th or 47th position. Make it 47. */ res = norm64(res, &er, 47); res = shr64(res, 24, fp_adjust_rm(rm, sr)); } return fp32_mkfin(sr, er, res, rm, true); } uint32_t fp32_mad(uint32_t a, uint32_t b, uint32_t c, bool zero_wins) { bool sa, sb, sc, ss, sr; int ea, eb, ec, es, er; uint32_t fa, fb, fc; fp32_parsefin(a, &sa, &ea, &fa, true); fp32_parsefin(b, &sb, &eb, &fb, true); fp32_parsefin(c, &sc, &ec, &fc, true); if (zero_wins && (fa == 0 || fb == 0)) return fp32_add(c, 0, FP_RN); ss = sa ^ sb; if (FP32_ISNAN(a) || FP32_ISNAN(b) || FP32_ISNAN(c)) return FP32_CNAN; if (FP32_ISINF(a) || FP32_ISINF(b)) { if (fa == 0 || fb == 0) { /* 0*inf */ return FP32_CNAN; } else if (FP32_ISINF(c) && sc != ss) { /* inf*inf - inf */ return FP32_CNAN; } else { return FP32_INF(ss); } } if (FP32_ISINF(c)) return FP32_INF(sc); if (fa == 0 || fb == 0 || fc == 0) return fp32_add(fp32_mul(a, b, FP_RN, zero_wins), c, FP_RN); es = ea + eb - FP32_MIDE + 1; int64_t res = (uint64_t)fa * fb; /* We multiplied two 24-bit numbers starting with 1s. The first 1 has to be at either 46th or 47th position. Make it 47. */ res = norm64(res, &es, 47); res = shr64(res, 24, fc ? FP_RZ : FP_RN); /* Two honest real numbers involved. */ er = (es > ec ? es : ec) + 1; res = shr32(res, er - es - 4, FP_RT); if (ss) res = -res; fc = shr32(fc, er - ec - 4, FP_RT); res += sc ? -(int64_t)fc : fc; if (!res) { /* Got a proper 0. */ er = 0; /* Make a -0 if both inputs were negative (ie. both -0). */ sr = ss && sc; } else { /* Compute sign, make sure accumulator is positive. */ sr = res < 0; if (sr) res = -res; /* 23 bits of input +1 bit of implicit 0 +3 bits for proper rounding +1 bit because we add two numbers --- 28 bit accumulator. Make sure bit 27 is actually set - normalize the number. */ res = norm32(res, &er, 27); if (er < 1) { res >>= 1-er; er = 1; } /* Round it. */ res = shr32(res, 4, FP_RN); } return fp32_mkfin(sr, er, res, FP_RN, true); } uint32_t fp16_to_fp32(uint16_t x) { bool sx; int ex; uint16_t sfx; uint32_t fx; fp16_parsefin(x, &sx, &ex, &sfx, false); if (FP16_ISNAN(x)) return FP32_CNAN; if (FP16_ISINF(x)) return FP32_INF(sx); ex += FP32_MIDE - FP16_MIDE; fx = sfx << 13; fx = norm32(fx, &ex, 23); return fp32_mkfin(sx, ex, fx, FP_RN, true); } uint16_t fp32_to_fp16(uint32_t x, enum fp_rm rm, bool rint) { bool sx; int ex; uint32_t fx; fp32_parsefin(x, &sx, &ex, &fx, true); if (FP32_ISNAN(x)) return FP16_NAN; if (FP32_ISINF(x)) return FP16_INF(sx); if (ex <= 0x5d) /* 0, rounding be damned */ return FP16_MAKE(sx, 0, 0); ex += FP16_MIDE - FP32_MIDE; int shift = 13; if (ex <= 0) { /* Going to be a denormal... */ shift -= ex - 1; ex = 1; } fx = shr32(fx, shift, rint ? FP_RZ : fp_adjust_rm(rm, sx)); return fp16_mkfin(sx, ex, fx, rm); } uint32_t fp32_from_u64(uint64_t x, enum fp_rm rm) { if (!x) return 0; int ex = FP32_MIDE + 63; uint32_t fx; /* first, shift it until MSB is lit */ x = norm64(x, &ex, 63); fx = shr64(x, 40, rm); return fp32_mkfin(false, ex, fx, rm, true); } uint64_t fp32_to_u64(uint32_t x, enum fp_rm rm, bool ftz) { bool sx; int ex; uint32_t fx; fp32_parsefin(x, &sx, &ex, &fx, ftz); if (FP32_ISNAN(x) || sx) return 0; if (FP32_ISINF(x)) return -1ull; ex -= FP32_MIDE + 23; if (ex > 40) return -1ull; return shr64(fx, -ex, rm); } enum fp_cmp fp64_cmp(uint64_t a, uint64_t b) { if (FP64_ISNAN(a) || FP64_ISNAN(b)) return FP_UN; bool sa, sb; int ea, eb; uint64_t fa, fb; fp64_parsefin(a, &sa, &ea, &fa); fp64_parsefin(b, &sb, &eb, &fb); if (fa == 0 && fb == 0) { /* Zeros - equal regardless of sign. */ return FP_EQ; } else if (sa != sb) { /* Different signs, both finite or infinity. */ return sa ? FP_LT : FP_GT; } else if (ea == eb && fa == fb) { /* Get the equality case out of the way. */ return FP_EQ; } else { /* Same signs, both finite or infinity. */ if ((a < b) ^ sa) return FP_LT; else return FP_GT; } } uint64_t fp64_rint(uint64_t x, enum fp_rm rm) { bool sx; int ex; uint64_t fx; if (FP64_ISNAN(x)) return x; if (FP64_ISINF(x)) return x; fp64_parsefin(x, &sx, &ex, &fx); /* For ex larger than that, result is already an integer, and our shift would overflow. */ if (ex < FP64_MIDE + 52) { int shift = (FP64_MIDE + 52) - ex; fx = shr64(fx, shift, fp_adjust_rm(rm, sx)); if (fx == 0) { ex = 0; } else { ex = FP64_MIDE + 52; fx = norm64(fx, &ex, 52); } } return fp64_mkfin(sx, ex, fx, rm); } uint64_t fp64_from_u64(uint64_t x, enum fp_rm rm) { if (!x) return 0; int ex = FP64_MIDE + 63; uint64_t fx; /* first, shift it until MSB is lit */ x = norm64(x, &ex, 63); fx = shr64(x, 11, rm); return fp64_mkfin(false, ex, fx, rm); } uint64_t fp64_to_u64(uint64_t x, enum fp_rm rm) { bool sx; int ex; uint64_t fx; fp64_parsefin(x, &sx, &ex, &fx); if (FP64_ISNAN(x) || sx) return 0; if (FP64_ISINF(x)) return -1ull; ex -= FP64_MIDE + 52; if (ex > 11) return -1ull; return shr64(fx, -ex, rm); } uint64_t fp32_to_fp64(uint32_t x) { bool sx; int ex; uint32_t sfx; uint64_t fx; fp32_parsefin(x, &sx, &ex, &sfx, false); if (FP32_ISNAN(x)) return FP64_NAN(sx, (uint64_t)(((sfx - FP32_IONE) | 1 << 22)) << (52 - 23)); if (FP32_ISINF(x)) return FP64_INF(sx); ex += FP64_MIDE - FP32_MIDE; fx = (uint64_t)sfx << (52 - 23); fx = norm64(fx, &ex, 52); return fp64_mkfin(sx, ex, fx, FP_RN); } uint32_t fp64_to_fp32(uint64_t x, enum fp_rm rm, bool rint) { bool sx; int ex; uint64_t fx; fp64_parsefin(x, &sx, &ex, &fx); if (FP64_ISNAN(x)) return FP32_NAN(sx, (fx - FP64_IONE) >> (52 - 23) | (1 << 22)); if (FP64_ISINF(x)) return FP32_INF(sx); ex += FP32_MIDE - FP64_MIDE; int shift = 52 - 23; if (ex <= 0) { /* Going to be a denormal... */ shift -= ex - 1; ex = 1; } fx = shr64(fx, shift, rint ? FP_RZ : fp_adjust_rm(rm, sx)); return fp32_mkfin(sx, ex, fx, rm, false); } uint64_t fp64_minmax(uint64_t a, uint64_t b, bool min) { if (FP64_ISNAN(a)) { if (FP64_ISNAN(b)) return b; a = b; } else if (FP64_ISNAN(b)) { b = a; } bool sa, sb; int ea, eb; uint64_t fa, fb; fp64_parsefin(a, &sa, &ea, &fa); fp64_parsefin(b, &sb, &eb, &fb); bool flip = min; if (sa != sb) { /* Different signs, pick the positive one */ if (sa) flip = !flip; return flip ? b : a; } else { /* Same signs, compare exp & fract */ if (sa) flip = !flip; if (a < b) flip = !flip; return flip ? b : a; } } uint64_t fp64_add(uint64_t a, uint64_t b, enum fp_rm rm) { if (FP64_ISNAN(b)) return b; if (FP64_ISNAN(a)) return a; if (FP64_ISINF(a)) { if (FP64_ISINF(b) && a != b) { /* Inf-Inf */ return FP64_NAN(true, 1ull << 51); } return a; } if (FP64_ISINF(b)) return b; /* Two honest real numbers involved. */ bool sa, sb, sr; int ea, eb, er; uint64_t fa, fb; fp64_parsefin(a, &sa, &ea, &fa); fp64_parsefin(b, &sb, &eb, &fb); er = (ea > eb ? ea : eb) + 1; int64_t res = 0; fa = shr64(fa, er - ea - 4, FP_RT); fb = shr64(fb, er - eb - 4, FP_RT); res += sa ? -fa : fa; res += sb ? -fb : fb; if (res == 0) { /* Got a proper 0. */ /* Make a -0 if both inputs were negative (ie. both -0). */ if (sa == sb) sr = sa; else sr = (rm == FP_RM); } else { /* Compute sign, make sure accumulator is positive. */ sr = res < 0; if (sr) res = -res; res = norm64(res, &er, 56); if (er < 1) { res >>= 1 - er; er = 1; } /* Round it. */ res = shr64(res, 4, fp_adjust_rm(rm, sr)); } return fp64_mkfin(sr, er, res, rm); } uint64_t fp64_fma(uint64_t a, uint64_t b, uint64_t c, enum fp_rm rm) { bool sa, sb, sc, ss, sr; int ea, eb, ec, es, er; uint64_t fa, fb, fc; fp64_parsefin(a, &sa, &ea, &fa); fp64_parsefin(b, &sb, &eb, &fb); fp64_parsefin(c, &sc, &ec, &fc); if (FP64_ISNAN(b)) return b; if (FP64_ISNAN(c)) return c; if (FP64_ISNAN(a)) return a; ss = sa ^ sb; if (FP64_ISINF(a) || FP64_ISINF(b)) { if (fa == 0 || fb == 0) { /* Inf*0 is NaN */ return FP64_NAN(true, 1ull << 51); } else { /* Inf*finite and Inf*Inf are +-Inf */ if (FP64_ISINF(c) && sc != ss) { /* Inf-Inf */ return FP64_NAN(true, 1ull << 51); } else { return FP64_INF(ss); } } } else if (FP64_ISINF(c)) { return c; } es = ea + eb - FP64_MIDE + 1; struct uint128 res = mul128(fa, fb * 8); res = norm128(res, &es, 108); if (!res.lo && !res.hi) es = ec; er = es; if (er < ec) er = ec; er++; if (er < 1) er = 1; res = shr128(res, er - 1 - es, FP_RT); if (ss) res = neg128(res); int sh = er - 4 - ec - 53; if (sh > 0) { fc = shr64(fc, sh, FP_RT); sh = 0; } if (sc) res = sub128(res, fc, -sh); else res = add128(res, fc, -sh); if (res.hi == 0 && res.lo == 0) { /* Got a proper 0. */ /* Make a -0 if both inputs were negative (ie. both -0). */ if (ss == sc) sr = ss; else sr = (rm == FP_RM); } else { sr = res.hi >> 63 & 1; if (sr) res = neg128(res); res = norm128(res, &er, 109); if (er < 1) { res = shr128(res, 1-er, FP_RZ); er = 1; } res = shr128(res, 57, fp_adjust_rm(rm, sr)); } return fp64_mkfin(sr, er, res.lo, rm); } uint64_t fp64_mul(uint64_t a, uint64_t b, enum fp_rm rm) { return fp64_fma(a, b, rm == FP_RM ? 0 : 1ull << 63, rm); }