diff options
author | Mike Kaganski <mike.kaganski@collabora.com> | 2022-02-15 09:20:52 +0300 |
---|---|---|
committer | Mike Kaganski <mike.kaganski@collabora.com> | 2022-02-17 21:46:58 +0100 |
commit | 9eb9083ff2fdaeb96399a0830a4394de4e29ef64 (patch) | |
tree | 046cc63551286fb3e59c6b79ebeea505a23b2eb0 /sal | |
parent | ee373f34ae1509e8d9fffaf4b5140ee9c35e8d41 (diff) |
Use Dragonbox to implement doubleTo*String*
This header-only library is accurate in decimal representation of
doubles; provides API that allows to create custom representation
- so it's possible to use custom decimal separators and grouping.
This allows to unify all corner cases: integers, numbers close to
DBL_MAX, up-rounding to the next decade.
Note that Dragonbox creates the shortest decimal representation
of the number, that is unambiguously convertible back to the same
number; thus it may hide trailing digits that are unneeded for
such conversion.
The functional changes are minimal, and beneficial:
1. Rounding numbers close to DBL_MAX now takes into account the
bEraseTrailingDecZeros argument, as it should, allowing to have
"1.8E+308" for rounding DBL_MAX to 2 decimals without trailing
zeroes, instead of previous "1.80E+308".
2. Incorrect rounding is fixed in some cases, e.g. 9.9999999999999929
rounded to 10 previously using rtl_math_DecimalPlaces_Max.
3. Representing the number in the shortest way may change display
of some printed numbers. E.g., 5th greatest double is represented
as "1.797693134862315E+308" instead of a bit longer, but giving
the same double on roundtrip, "1.7976931348623149E+308". This would
generally look better for some numbers similar to the famous 0.1,
where users would likely expect more "round" representation where
it's unambiguous (but we still truncate to 15 significant decimals
anyway - so there's no point in pretending to provide exact digits
for actual binary representation).
These are reflected in the unit tests affected by the change.
Change-Id: I05e20274a30eec499593ee3e9ec070e1269232a2
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/129948
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'sal')
-rw-r--r-- | sal/Library_sal.mk | 1 | ||||
-rw-r--r-- | sal/qa/rtl/math/test-rtl-math.cxx | 13 | ||||
-rw-r--r-- | sal/rtl/math.cxx | 288 |
3 files changed, 77 insertions, 225 deletions
diff --git a/sal/Library_sal.mk b/sal/Library_sal.mk index d8d409195a39..aae7c97f0310 100644 --- a/sal/Library_sal.mk +++ b/sal/Library_sal.mk @@ -43,6 +43,7 @@ $(eval $(call gb_Library_use_libraries,sal,\ )) $(eval $(call gb_Library_use_externals,sal,\ + dragonbox \ dtoa \ valgrind \ zlib \ diff --git a/sal/qa/rtl/math/test-rtl-math.cxx b/sal/qa/rtl/math/test-rtl-math.cxx index aaadccc8e97c..94840dbdb7e8 100644 --- a/sal/qa/rtl/math/test-rtl-math.cxx +++ b/sal/qa/rtl/math/test-rtl-math.cxx @@ -432,7 +432,8 @@ public: fVal = std::nextafter( fVal, 0); aRes = rtl::math::doubleToUString( fVal, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true); - CPPUNIT_ASSERT_EQUAL( OUString("1.7976931348623149E+308"), aRes); + CPPUNIT_ASSERT_EQUAL( OUString("1.797693134862315E+308"), aRes); + CPPUNIT_ASSERT_EQUAL(fVal, rtl::math::stringToDouble(aRes, '.', ',')); // Test roundtrip // DBL_MAX and 4 nextafters rounded to 15 decimals fVal = DBL_MAX; @@ -463,24 +464,20 @@ public: // DBL_MAX rounded to 2 decimals fVal = DBL_MAX; aRes = rtl::math::doubleToUString( fVal, rtl_math_StringFormat_Automatic, 2, '.', true); - CPPUNIT_ASSERT_EQUAL( OUString("1.80E+308"), aRes); + CPPUNIT_ASSERT_EQUAL( OUString("1.8E+308"), aRes); // Crashed after commit eae24a9488814e77254d175c11fc4a138c1dbd30 fVal = 123456.789; aRes = rtl::math::doubleToUString(fVal, rtl_math_StringFormat_E, 2, '.', false); CPPUNIT_ASSERT_EQUAL(OUString("1.23E+005"), aRes); - // Testing "after-treatment of up-rounding to the next decade" branch - // See void doubleToString in sal/rtl/math.cxx - // 1. Yet empty buffer fVal = 9.9999999999999929; aRes = rtl::math::doubleToUString(fVal, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true); - CPPUNIT_ASSERT_EQUAL(OUString("10"), aRes); + CPPUNIT_ASSERT_EQUAL(OUString("9.99999999999999"), aRes); - // 2. Buffer with some content fVal = 0.99999999999999933; aRes = rtl::math::doubleToUString(fVal, rtl_math_StringFormat_F, rtl_math_DecimalPlaces_Max, '.', true); - CPPUNIT_ASSERT_EQUAL(OUString("1"), aRes); + CPPUNIT_ASSERT_EQUAL(OUString("0.999999999999999"), aRes); } void test_approx() { diff --git a/sal/rtl/math.cxx b/sal/rtl/math.cxx index bec83c380b32..edb894cb7820 100644 --- a/sal/rtl/math.cxx +++ b/sal/rtl/math.cxx @@ -19,6 +19,7 @@ #include <rtl/math.h> +#include <o3tl/safeint.hxx> #include <osl/diagnose.h> #include <rtl/character.hxx> #include <rtl/math.hxx> @@ -34,6 +35,7 @@ #include <memory> #include <stdlib.h> +#include <dragonbox/dragonbox.h> #include <dtoa.h> constexpr int minExp = -323, maxExp = 308; @@ -104,11 +106,6 @@ static double getN10Exp(int nExp) namespace { -double const nCorrVal[] = { - 0, 9e-1, 9e-2, 9e-3, 9e-4, 9e-5, 9e-6, 9e-7, 9e-8, - 9e-9, 9e-10, 9e-11, 9e-12, 9e-13, 9e-14, 9e-15 -}; - struct StringTraits { typedef char Char; @@ -245,6 +242,38 @@ int getBitsInFracPart(double fAbsValue) return std::max(nBitsInFracPart, 0); } +constexpr sal_uInt64 eX[] = { 10ull, + 100ull, + 1000ull, + 10000ull, + 100000ull, + 1000000ull, + 10000000ull, + 100000000ull, + 1000000000ull, + 10000000000ull, + 100000000000ull, + 1000000000000ull, + 10000000000000ull, + 100000000000000ull, + 1000000000000000ull, + 10000000000000000ull, + 100000000000000000ull, + 1000000000000000000ull, + 10000000000000000000ull }; + +int decimalDigits(sal_uInt64 n) +{ + return std::distance(std::begin(eX), std::upper_bound(std::begin(eX), std::end(eX), n)) + 1; +} + +sal_uInt64 roundToPow10(sal_uInt64 n, int e) +{ + assert(e > 0 && o3tl::make_unsigned(e) <= std::size(eX)); + const sal_uInt64 d = eX[e - 1]; + return (n + d / 2) / d * d; +} + template< typename T > void doubleToString(typename T::String ** pResult, sal_Int32 * pResultCapacity, sal_Int32 nResultOffset, @@ -254,18 +283,6 @@ void doubleToString(typename T::String ** pResult, typename T::Char cGroupSeparator, bool bEraseTrailingDecZeros) { - static double const nRoundVal[] = { - 5.0e+0, 0.5e+0, 0.5e-1, 0.5e-2, 0.5e-3, 0.5e-4, 0.5e-5, 0.5e-6, - 0.5e-7, 0.5e-8, 0.5e-9, 0.5e-10,0.5e-11,0.5e-12,0.5e-13,0.5e-14 - }; - - // sign adjustment, instead of testing for fValue<0.0 this will also fetch - // -0.0 - bool bSign = std::signbit(fValue); - - if (bSign) - fValue = -fValue; - if (std::isnan(fValue)) { // #i112652# XMLSchema-2 @@ -283,8 +300,7 @@ void doubleToString(typename T::String ** pResult, return; } - bool bHuge = fValue == HUGE_VAL; // g++ 3.0.1 requires it this way... - if (bHuge || std::isinf(fValue)) + if (std::isinf(fValue)) { // #i112652# XMLSchema-2 sal_Int32 nCapacity = RTL_CONSTASCII_LENGTH("-INF"); @@ -295,7 +311,7 @@ void doubleToString(typename T::String ** pResult, nResultOffset = 0; } - if ( bSign ) + if (std::signbit(fValue)) T::appendAscii(pResult, pResultCapacity, &nResultOffset, RTL_CONSTASCII_STRINGPARAM("-")); @@ -305,6 +321,18 @@ void doubleToString(typename T::String ** pResult, return; } + decltype(jkj::dragonbox::to_decimal(fValue)) aParts{}; + if (fValue) // to_decimal is documented to only handle non-zero finite numbers + aParts = jkj::dragonbox::to_decimal(fValue); + else + aParts.is_negative = std::signbit(fValue); // Handle -0.0 + + int nOrigDigits = decimalDigits(aParts.significand); + int nExp = nOrigDigits + aParts.exponent - 1; + int nRoundDigits = 15; + if (aParts.is_negative) + fValue = -fValue; + // Unfortunately the old rounding below writes 1.79769313486232e+308 for // DBL_MAX and 4 subsequent nextafter(...,0). static const double fB1 = std::nextafter( DBL_MAX, 0); @@ -318,77 +346,17 @@ void doubleToString(typename T::String ** pResult, // they exceed range they should not be written to exchange strings or // file formats. - // Writing pDig up to decimals(-1,-2) then appending one digit from - // pRou xor one or two digits from pSlot[]. - constexpr char pDig[] = "7976931348623157"; - constexpr char pRou[] = "8087931359623267"; // the only up-carry is 80 - static_assert(SAL_N_ELEMENTS(pDig) == SAL_N_ELEMENTS(pRou), "digit count mismatch"); - constexpr sal_Int32 nDig2 = RTL_CONSTASCII_LENGTH(pRou) - 2; - sal_Int32 nCapacity = RTL_CONSTASCII_LENGTH(pRou) + 8; // + "-1.E+308" - const char pSlot[5][2][3] = - { // rounded, not - "67", "57", // DBL_MAX - "65", "55", - "53", "53", - "51", "51", - "59", "49", - }; - - if (!pResultCapacity) - { - pResultCapacity = &nCapacity; - T::createBuffer(pResult, pResultCapacity); - nResultOffset = 0; - } - - if (bSign) - T::appendAscii(pResult, pResultCapacity, &nResultOffset, - RTL_CONSTASCII_STRINGPARAM("-")); - - nDecPlaces = std::clamp<sal_Int32>( nDecPlaces, 0, RTL_CONSTASCII_LENGTH(pRou)); - if (nDecPlaces == 0) - { - T::appendAscii(pResult, pResultCapacity, &nResultOffset, - RTL_CONSTASCII_STRINGPARAM("2")); - } - else - { - T::appendAscii(pResult, pResultCapacity, &nResultOffset, - RTL_CONSTASCII_STRINGPARAM("1")); - T::appendChars(pResult, pResultCapacity, &nResultOffset, &cDecSeparator, 1); - if (nDecPlaces <= 2) - { - T::appendAscii(pResult, pResultCapacity, &nResultOffset, pRou, nDecPlaces); - } - else if (nDecPlaces <= nDig2) - { - T::appendAscii(pResult, pResultCapacity, &nResultOffset, pDig, nDecPlaces - 1); - T::appendAscii(pResult, pResultCapacity, &nResultOffset, pRou + nDecPlaces - 1, 1); - } - else - { - const sal_Int32 nDec = nDecPlaces - nDig2; - nDecPlaces -= nDec; - // nDec-1 is also offset into slot, rounded(1-1=0) or not(2-1=1) - const size_t nSlot = ((fValue < fB3) ? 4 : ((fValue < fB2) ? 3 - : ((fValue < fB1) ? 2 : ((fValue < DBL_MAX) ? 1 : 0)))); - - T::appendAscii(pResult, pResultCapacity, &nResultOffset, pDig, nDecPlaces); - T::appendAscii(pResult, pResultCapacity, &nResultOffset, pSlot[nSlot][nDec-1], nDec); - } - } - T::appendAscii(pResult, pResultCapacity, &nResultOffset, - RTL_CONSTASCII_STRINGPARAM("E+308")); - - return; + eFormat = rtl_math_StringFormat_E; + nDecPlaces = std::clamp<sal_Int32>( nDecPlaces, 0, 16); + nRoundDigits = 17; } // Use integer representation for integer values that fit into the // mantissa (1.((2^53)-1)) with a precision of 1 for highest accuracy. if ((eFormat == rtl_math_StringFormat_Automatic || - eFormat == rtl_math_StringFormat_F) && isRepresentableInteger(fValue)) + eFormat == rtl_math_StringFormat_F) && aParts.exponent >= 0 && fValue < 0x1p53) { - sal_Int64 nInt = static_cast< sal_Int64 >(fValue); + eFormat = rtl_math_StringFormat_F; if (nDecPlaces == rtl_math_DecimalPlaces_Max) nDecPlaces = 0; else @@ -397,68 +365,7 @@ void doubleToString(typename T::String ** pResult, if (bEraseTrailingDecZeros && nDecPlaces > 0) nDecPlaces = 0; - // Round before decimal position. - if (nDecPlaces < 0) - { - sal_Int64 nRounding = static_cast< sal_Int64 >(getN10Exp(-nDecPlaces - 1)); - const sal_Int64 nTemp = (nInt / nRounding + 5) / 10; - nInt = nTemp * 10 * nRounding; - } - - // Max 1 sign, 16 integer digits, 15 group separators, 1 decimal - // separator, 15 decimals digits. - typename T::Char aBuf[64]; - typename T::Char* pEnd = aBuf + 40; - typename T::Char* pStart = pEnd; - - // Backward fill. - sal_Int32 nGrouping = cGroupSeparator && pGroups ? *pGroups : 0; - sal_Int32 nGroupDigits = 0; - do - { - typename T::Char nDigit = nInt % 10; - nInt /= 10; - *--pStart = nDigit + '0'; - if (nGrouping && nGrouping == ++nGroupDigits && nInt) - { - *--pStart = cGroupSeparator; - if (*(pGroups + 1)) - nGrouping = *++pGroups; - nGroupDigits = 0; - } - } - while (nInt); - if (bSign) - *--pStart = '-'; - - // Append decimals. - if (nDecPlaces > 0) - { - *pEnd++ = cDecSeparator; - pEnd = std::fill_n(pEnd, nDecPlaces, '0'); - } - - if (!pResultCapacity) - T::createString(pResult, pStart, pEnd - pStart); - else - T::appendChars(pResult, pResultCapacity, &nResultOffset, pStart, pEnd - pStart); - - return; - } - - // find the exponent - int nExp = 0; - if ( fValue > 0.0 ) - { - // Cap nExp at a small value beyond which "fValue /= N10Exp" would lose precision (or N10Exp - // might even be zero); that will produce output with the decimal point in a non-normalized - // position, but the current quality of output for such small values is probably abysmal, - // anyway: - nExp = std::max( - static_cast< int >(floor(log10(fValue))), std::numeric_limits<double>::min_exponent10); - double const N10Exp = getN10Exp(nExp); - assert(N10Exp != 0); - fValue /= N10Exp; + nRoundDigits = nOrigDigits; // no rounding } switch (eFormat) @@ -531,12 +438,14 @@ void doubleToString(typename T::String ** pResult, nDigits += nExp; // Round the number - if(nDigits >= 0) + nRoundDigits = std::min<int>(nDigits, nRoundDigits); + if(nDigits >= 0 && nOrigDigits > nRoundDigits) { - fValue += nRoundVal[std::min<sal_Int32>(nDigits, 15)]; - if (fValue >= 10) + aParts.significand = roundToPow10(aParts.significand, nOrigDigits - nRoundDigits); + assert(aParts.significand <= eX[nOrigDigits - 1]); + if (aParts.significand == eX[nOrigDigits - 1]) // up-rounding to the next decade { - fValue = 1.0; + nOrigDigits++; nExp++; if (eFormat == rtl_math_StringFormat_F) @@ -552,7 +461,7 @@ void doubleToString(typename T::String ** pResult, assert(nBuf <= 1024); typename T::Char* pBuf = static_cast<typename T::Char*>(alloca(nBuf * sizeof(typename T::Char))); typename T::Char * p = pBuf; - if ( bSign ) + if (aParts.is_negative) *p++ = '-'; bool bHasDec = false; @@ -612,77 +521,22 @@ void doubleToString(typename T::String ** pResult, // print the number if (nDigits > 0) { - for (int i = 0; ; i++) + for (int nCurExp = nOrigDigits - 1;;) { - if (i < 15) // was 16 in ancient versions, which leads to inaccuracies + int nDigit; + if (aParts.significand > 0 && nCurExp > 0) { - int nDigit; - if (nDigits-1 == 0 && i > 0 && i < 14) - nDigit = floor( fValue + nCorrVal[15-i]); - else - nDigit = fValue + 1E-15; - - if (nDigit >= 10) - { // after-treatment of up-rounding to the next decade - typename T::Char* p1 = pBuf; - // Assert that no one changed the logic we rely on. - assert(!bSign || *p1 == '-'); - // Do not touch leading minus sign put earlier. - if (bSign) - ++p1; - assert(p1 <= p); - if (p1 == p) - { - *p++ = '1'; - if (eFormat != rtl_math_StringFormat_F) - { - *p++ = cDecSeparator; - nExp++; - bHasDec = true; - } - *p++ = '0'; - } - else - { - for (typename T::Char* p2 = p - 1; p2 >= p1; --p2) - { - typename T::Char cS = *p2; - if (cS == cDecSeparator) - continue; - if (cS != '9') - { - ++*p2; - break; - } - *p2 = '0'; - if (p2 == p1) // The number consisted of all 9s replaced to all 0s - { - if (eFormat == rtl_math_StringFormat_F) - { // move everything to the right before inserting '1' - std::memmove(p2 + 1, p2, (p++ - p2) * sizeof(*p)); - } - else - { - nExp++; - } - *p2 = '1'; - } - } - - *p++ = '0'; - } - fValue = 0.0; - } - else - { - *p++ = nDigit + '0'; - fValue = (fValue - nDigit) * 10.0; - } + --nCurExp; + nDigit = aParts.significand / eX[nCurExp]; + aParts.significand %= eX[nCurExp]; } else { - *p++ = '0'; + nDigit = aParts.significand; + aParts.significand = 0; } + assert(nDigit >= 0 && nDigit < 10); + *p++ = nDigit + '0'; if (!--nDigits) break; // for |