Use Dragonbox to implement doubleTo*String*

This header-only library is accurate in decimal representation of doubles; provides API that allows to create custom representation - so it's possible to use custom decimal separators and grouping. This allows to unify all corner cases: integers, numbers close to DBL_MAX, up-rounding to the next decade. Note that Dragonbox creates the shortest decimal representation of the number, that is unambiguously convertible back to the same number; thus it may hide trailing digits that are unneeded for such conversion. The functional changes are minimal, and beneficial: 1. Rounding numbers close to DBL_MAX now takes into account the bEraseTrailingDecZeros argument, as it should, allowing to have "1.8E+308" for rounding DBL_MAX to 2 decimals without trailing zeroes, instead of previous "1.80E+308". 2. Incorrect rounding is fixed in some cases, e.g. 9.9999999999999929 rounded to 10 previously using rtl_math_DecimalPlaces_Max. 3. Representing the number in the shortest way may change display of some printed numbers. E.g., 5th greatest double is represented as "1.797693134862315E+308" instead of a bit longer, but giving the same double on roundtrip, "1.7976931348623149E+308". This would generally look better for some numbers similar to the famous 0.1, where users would likely expect more "round" representation where it's unambiguous (but we still truncate to 15 significant decimals anyway - so there's no point in pretending to provide exact digits for actual binary representation). These are reflected in the unit tests affected by the change. Change-Id: I05e20274a30eec499593ee3e9ec070e1269232a2 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/129948 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
author: Mike Kaganski <mike.kaganski@collabora.com> 2022-02-15 09:20:52 +0300
committer: Mike Kaganski <mike.kaganski@collabora.com> 2022-02-17 21:46:58 +0100
commit: 9eb9083ff2fdaeb96399a0830a4394de4e29ef64 (patch)
tree: 046cc63551286fb3e59c6b79ebeea505a23b2eb0 /sal
parent: ee373f34ae1509e8d9fffaf4b5140ee9c35e8d41 (diff)
3 files changed, 77 insertions, 225 deletions
diff --git a/sal/Library_sal.mk b/sal/Library_sal.mk
index d8d409195a39..aae7c97f0310 100644
--- a/sal/Library_sal.mk
+++ b/sal/Library_sal.mk
@@ -43,6 +43,7 @@ $(eval $(call gb_Library_use_libraries,sal,\
 ))
 
 $(eval $(call gb_Library_use_externals,sal,\
+    dragonbox \
     dtoa \
     valgrind \
     zlib \
diff --git a/sal/qa/rtl/math/test-rtl-math.cxx b/sal/qa/rtl/math/test-rtl-math.cxx
index aaadccc8e97c..94840dbdb7e8 100644
--- a/sal/qa/rtl/math/test-rtl-math.cxx
+++ b/sal/qa/rtl/math/test-rtl-math.cxx
@@ -432,7 +432,8 @@ public:
         fVal = std::nextafter( fVal, 0);
         aRes = rtl::math::doubleToUString( fVal, rtl_math_StringFormat_Automatic,
                 rtl_math_DecimalPlaces_Max, '.', true);
-        CPPUNIT_ASSERT_EQUAL( OUString("1.7976931348623149E+308"), aRes);
+        CPPUNIT_ASSERT_EQUAL( OUString("1.797693134862315E+308"), aRes);
+        CPPUNIT_ASSERT_EQUAL(fVal, rtl::math::stringToDouble(aRes, '.', ',')); // Test roundtrip
 
         // DBL_MAX and 4 nextafters rounded to 15 decimals
         fVal = DBL_MAX;
@@ -463,24 +464,20 @@ public:
         // DBL_MAX rounded to 2 decimals
         fVal = DBL_MAX;
         aRes = rtl::math::doubleToUString( fVal, rtl_math_StringFormat_Automatic, 2, '.', true);
-        CPPUNIT_ASSERT_EQUAL( OUString("1.80E+308"), aRes);
+        CPPUNIT_ASSERT_EQUAL( OUString("1.8E+308"), aRes);
 
         // Crashed after commit eae24a9488814e77254d175c11fc4a138c1dbd30
         fVal = 123456.789;
         aRes = rtl::math::doubleToUString(fVal, rtl_math_StringFormat_E, 2, '.', false);
         CPPUNIT_ASSERT_EQUAL(OUString("1.23E+005"), aRes);
 
-        // Testing "after-treatment of up-rounding to the next decade" branch
-        // See void doubleToString in sal/rtl/math.cxx
-        // 1. Yet empty buffer
         fVal = 9.9999999999999929;
         aRes = rtl::math::doubleToUString(fVal, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true);
-        CPPUNIT_ASSERT_EQUAL(OUString("10"), aRes);
+        CPPUNIT_ASSERT_EQUAL(OUString("9.99999999999999"), aRes);
 
-        // 2. Buffer with some content
         fVal = 0.99999999999999933;
         aRes = rtl::math::doubleToUString(fVal, rtl_math_StringFormat_F, rtl_math_DecimalPlaces_Max, '.', true);
-        CPPUNIT_ASSERT_EQUAL(OUString("1"), aRes);
+        CPPUNIT_ASSERT_EQUAL(OUString("0.999999999999999"), aRes);
     }
 
     void test_approx() {
diff --git a/sal/rtl/math.cxx b/sal/rtl/math.cxx
index bec83c380b32..edb894cb7820 100644
--- a/sal/rtl/math.cxx
+++ b/sal/rtl/math.cxx
@@ -19,6 +19,7 @@
 
 #include <rtl/math.h>
 
+#include <o3tl/safeint.hxx>
 #include <osl/diagnose.h>
 #include <rtl/character.hxx>
 #include <rtl/math.hxx>
@@ -34,6 +35,7 @@
 #include <memory>
 #include <stdlib.h>
 
+#include <dragonbox/dragonbox.h>
 #include <dtoa.h>
 
 constexpr int minExp = -323, maxExp = 308;
@@ -104,11 +106,6 @@ static double getN10Exp(int nExp)
 
 namespace {
 
-double const nCorrVal[] = {
-    0, 9e-1, 9e-2, 9e-3, 9e-4, 9e-5, 9e-6, 9e-7, 9e-8,
-    9e-9, 9e-10, 9e-11, 9e-12, 9e-13, 9e-14, 9e-15
-};
-
 struct StringTraits
 {
     typedef char Char;
@@ -245,6 +242,38 @@ int getBitsInFracPart(double fAbsValue)
     return std::max(nBitsInFracPart, 0);
 }
 
+constexpr sal_uInt64 eX[] = { 10ull,
+                              100ull,
+                              1000ull,
+                              10000ull,
+                              100000ull,
+                              1000000ull,
+                              10000000ull,
+                              100000000ull,
+                              1000000000ull,
+                              10000000000ull,
+                              100000000000ull,
+                              1000000000000ull,
+                              10000000000000ull,
+                              100000000000000ull,
+                              1000000000000000ull,
+                              10000000000000000ull,
+                              100000000000000000ull,
+                              1000000000000000000ull,
+                              10000000000000000000ull };
+
+int decimalDigits(sal_uInt64 n)
+{
+    return std::distance(std::begin(eX), std::upper_bound(std::begin(eX), std::end(eX), n)) + 1;
+}
+
+sal_uInt64 roundToPow10(sal_uInt64 n, int e)
+{
+    assert(e > 0 && o3tl::make_unsigned(e) <= std::size(eX));
+    const sal_uInt64 d = eX[e - 1];
+    return (n + d / 2) / d * d;
+}
+
 template< typename T >
 void doubleToString(typename T::String ** pResult,
                            sal_Int32 * pResultCapacity, sal_Int32 nResultOffset,
@@ -254,18 +283,6 @@ void doubleToString(typename T::String ** pResult,
                            typename T::Char cGroupSeparator,
                            bool bEraseTrailingDecZeros)
 {
-    static double const nRoundVal[] = {
-        5.0e+0, 0.5e+0, 0.5e-1, 0.5e-2, 0.5e-3, 0.5e-4, 0.5e-5, 0.5e-6,
-        0.5e-7, 0.5e-8, 0.5e-9, 0.5e-10,0.5e-11,0.5e-12,0.5e-13,0.5e-14
-    };
-
-    // sign adjustment, instead of testing for fValue<0.0 this will also fetch
-    // -0.0
-    bool bSign = std::signbit(fValue);
-
-    if (bSign)
-        fValue = -fValue;
-
     if (std::isnan(fValue))
     {
         // #i112652# XMLSchema-2
@@ -283,8 +300,7 @@ void doubleToString(typename T::String ** pResult,
         return;
     }
 
-    bool bHuge = fValue == HUGE_VAL; // g++ 3.0.1 requires it this way...
-    if (bHuge || std::isinf(fValue))
+    if (std::isinf(fValue))
     {
         // #i112652# XMLSchema-2
         sal_Int32 nCapacity = RTL_CONSTASCII_LENGTH("-INF");
@@ -295,7 +311,7 @@ void doubleToString(typename T::String ** pResult,
             nResultOffset = 0;
         }
 
-        if ( bSign )
+        if (std::signbit(fValue))
             T::appendAscii(pResult, pResultCapacity, &nResultOffset,
                            RTL_CONSTASCII_STRINGPARAM("-"));
 
@@ -305,6 +321,18 @@ void doubleToString(typename T::String ** pResult,
         return;
     }
 
+    decltype(jkj::dragonbox::to_decimal(fValue)) aParts{};
+    if (fValue) // to_decimal is documented to only handle non-zero finite numbers
+        aParts = jkj::dragonbox::to_decimal(fValue);
+    else
+        aParts.is_negative = std::signbit(fValue); // Handle -0.0
+
+    int nOrigDigits = decimalDigits(aParts.significand);
+    int nExp = nOrigDigits + aParts.exponent - 1;
+    int nRoundDigits = 15;
+    if (aParts.is_negative)
+        fValue = -fValue;
+
     // Unfortunately the old rounding below writes 1.79769313486232e+308 for
     // DBL_MAX and 4 subsequent nextafter(...,0).
     static const double fB1 = std::nextafter( DBL_MAX, 0);
@@ -318,77 +346,17 @@ void doubleToString(typename T::String ** pResult,
         // they exceed range they should not be written to exchange strings or
         // file formats.
 
-        // Writing pDig up to decimals(-1,-2) then appending one digit from
-        // pRou xor one or two digits from pSlot[].
-        constexpr char pDig[] = "7976931348623157";
-        constexpr char pRou[] = "8087931359623267";     // the only up-carry is 80
-        static_assert(SAL_N_ELEMENTS(pDig) == SAL_N_ELEMENTS(pRou), "digit count mismatch");
-        constexpr sal_Int32 nDig2 = RTL_CONSTASCII_LENGTH(pRou) - 2;
-        sal_Int32 nCapacity = RTL_CONSTASCII_LENGTH(pRou) + 8;  // + "-1.E+308"
-        const char pSlot[5][2][3] =
-        { // rounded, not
-            "67", "57",     // DBL_MAX
-            "65", "55",
-            "53", "53",
-            "51", "51",
-            "59", "49",
-        };
-
-        if (!pResultCapacity)
-        {
-            pResultCapacity = &nCapacity;
-            T::createBuffer(pResult, pResultCapacity);
-            nResultOffset = 0;
-        }
-
-        if (bSign)
-            T::appendAscii(pResult, pResultCapacity, &nResultOffset,
-                           RTL_CONSTASCII_STRINGPARAM("-"));
-
-        nDecPlaces = std::clamp<sal_Int32>( nDecPlaces, 0, RTL_CONSTASCII_LENGTH(pRou));
-        if (nDecPlaces == 0)
-        {
-            T::appendAscii(pResult, pResultCapacity, &nResultOffset,
-                           RTL_CONSTASCII_STRINGPARAM("2"));
-        }
-        else
-        {
-            T::appendAscii(pResult, pResultCapacity, &nResultOffset,
-                           RTL_CONSTASCII_STRINGPARAM("1"));
-            T::appendChars(pResult, pResultCapacity, &nResultOffset, &cDecSeparator, 1);
-            if (nDecPlaces <= 2)
-            {
-                T::appendAscii(pResult, pResultCapacity, &nResultOffset, pRou, nDecPlaces);
-            }
-            else if (nDecPlaces <= nDig2)
-            {
-                T::appendAscii(pResult, pResultCapacity, &nResultOffset, pDig, nDecPlaces - 1);
-                T::appendAscii(pResult, pResultCapacity, &nResultOffset, pRou + nDecPlaces - 1, 1);
-            }
-            else
-            {
-                const sal_Int32 nDec = nDecPlaces - nDig2;
-                nDecPlaces -= nDec;
-                // nDec-1 is also offset into slot, rounded(1-1=0) or not(2-1=1)
-                const size_t nSlot = ((fValue < fB3) ? 4 : ((fValue < fB2) ? 3
-                            : ((fValue < fB1) ? 2 : ((fValue < DBL_MAX) ? 1 : 0))));
-
-                T::appendAscii(pResult, pResultCapacity, &nResultOffset, pDig, nDecPlaces);
-                T::appendAscii(pResult, pResultCapacity, &nResultOffset, pSlot[nSlot][nDec-1], nDec);
-            }
-        }
-        T::appendAscii(pResult, pResultCapacity, &nResultOffset,
-                       RTL_CONSTASCII_STRINGPARAM("E+308"));
-
-        return;
+        eFormat = rtl_math_StringFormat_E;
+        nDecPlaces = std::clamp<sal_Int32>( nDecPlaces, 0, 16);
+        nRoundDigits = 17;
     }
 
     // Use integer representation for integer values that fit into the
     // mantissa (1.((2^53)-1)) with a precision of 1 for highest accuracy.
     if ((eFormat == rtl_math_StringFormat_Automatic ||
-         eFormat == rtl_math_StringFormat_F) && isRepresentableInteger(fValue))
+         eFormat == rtl_math_StringFormat_F) && aParts.exponent >= 0 && fValue < 0x1p53)
     {
-        sal_Int64 nInt = static_cast< sal_Int64 >(fValue);
+        eFormat = rtl_math_StringFormat_F;
         if (nDecPlaces == rtl_math_DecimalPlaces_Max)
             nDecPlaces = 0;
         else
@@ -397,68 +365,7 @@ void doubleToString(typename T::String ** pResult,
         if (bEraseTrailingDecZeros && nDecPlaces > 0)
             nDecPlaces = 0;
 
-        // Round before decimal position.
-        if (nDecPlaces < 0)
-        {
-            sal_Int64 nRounding = static_cast< sal_Int64 >(getN10Exp(-nDecPlaces - 1));
-            const sal_Int64 nTemp = (nInt / nRounding + 5) / 10;
-            nInt = nTemp * 10 * nRounding;
-        }
-
-        // Max 1 sign, 16 integer digits, 15 group separators, 1 decimal
-        // separator, 15 decimals digits.
-        typename T::Char aBuf[64];
-        typename T::Char* pEnd = aBuf + 40;
-        typename T::Char* pStart = pEnd;
-
-        // Backward fill.
-        sal_Int32 nGrouping = cGroupSeparator && pGroups ? *pGroups : 0;
-        sal_Int32 nGroupDigits = 0;
-        do
-        {
-            typename T::Char nDigit = nInt % 10;
-            nInt /= 10;
-            *--pStart = nDigit + '0';
-            if (nGrouping && nGrouping == ++nGroupDigits && nInt)
-            {
-                *--pStart = cGroupSeparator;
-                if (*(pGroups + 1))
-                    nGrouping = *++pGroups;
-                nGroupDigits = 0;
-            }
-        }
-        while (nInt);
-        if (bSign)
-            *--pStart = '-';
-
-        // Append decimals.
-        if (nDecPlaces > 0)
-        {
-            *pEnd++ = cDecSeparator;
-            pEnd = std::fill_n(pEnd, nDecPlaces, '0');
-        }
-
-        if (!pResultCapacity)
-            T::createString(pResult, pStart, pEnd - pStart);
-        else
-            T::appendChars(pResult, pResultCapacity, &nResultOffset, pStart, pEnd - pStart);
-
-        return;
-    }
-
-    // find the exponent
-    int nExp = 0;
-    if ( fValue > 0.0 )
-    {
-        // Cap nExp at a small value beyond which "fValue /= N10Exp" would lose precision (or N10Exp
-        // might even be zero); that will produce output with the decimal point in a non-normalized
-        // position, but the current quality of output for such small values is probably abysmal,
-        // anyway:
-        nExp = std::max(
-            static_cast< int >(floor(log10(fValue))), std::numeric_limits<double>::min_exponent10);
-        double const N10Exp = getN10Exp(nExp);
-        assert(N10Exp != 0);
-        fValue /= N10Exp;
+        nRoundDigits = nOrigDigits; // no rounding
     }
 
     switch (eFormat)
@@ -531,12 +438,14 @@ void doubleToString(typename T::String ** pResult,
         nDigits += nExp;
 
     // Round the number
-    if(nDigits >= 0)
+    nRoundDigits = std::min<int>(nDigits, nRoundDigits);
+    if(nDigits >= 0 && nOrigDigits > nRoundDigits)
     {
-        fValue += nRoundVal[std::min<sal_Int32>(nDigits, 15)];
-        if (fValue >= 10)
+        aParts.significand = roundToPow10(aParts.significand, nOrigDigits - nRoundDigits);
+        assert(aParts.significand <= eX[nOrigDigits - 1]);
+        if (aParts.significand == eX[nOrigDigits - 1]) // up-rounding to the next decade
         {
-            fValue = 1.0;
+            nOrigDigits++;
             nExp++;
 
             if (eFormat == rtl_math_StringFormat_F)
@@ -552,7 +461,7 @@ void doubleToString(typename T::String ** pResult,
     assert(nBuf <= 1024);
     typename T::Char* pBuf = static_cast<typename T::Char*>(alloca(nBuf * sizeof(typename T::Char)));
     typename T::Char * p = pBuf;
-    if ( bSign )
+    if (aParts.is_negative)
         *p++ = '-';
 
     bool bHasDec = false;
@@ -612,77 +521,22 @@ void doubleToString(typename T::String ** pResult,
     // print the number
     if (nDigits > 0)
     {
-        for (int i = 0; ; i++)
+        for (int nCurExp = nOrigDigits - 1;;)
         {
-            if (i < 15)     // was 16 in ancient versions, which leads to inaccuracies
+            int nDigit;
+            if (aParts.significand > 0 && nCurExp > 0)
             {
-                int nDigit;
-                if (nDigits-1 == 0 && i > 0 && i < 14)
-                    nDigit = floor( fValue + nCorrVal[15-i]);
-                else
-                    nDigit = fValue + 1E-15;
-
-                if (nDigit >= 10)
-                {   // after-treatment of up-rounding to the next decade
-                    typename T::Char* p1 = pBuf;
-                    // Assert that no one changed the logic we rely on.
-                    assert(!bSign || *p1 == '-');
-                    // Do not touch leading minus sign put earlier.
-                    if (bSign)
-                        ++p1;
-                    assert(p1 <= p);
-                    if (p1 == p)
-                    {
-                        *p++ = '1';
-                        if (eFormat != rtl_math_StringFormat_F)
-                        {
-                            *p++ = cDecSeparator;
-                            nExp++;
-                            bHasDec = true;
-                        }
-                        *p++ = '0';
-                    }
-                    else
-                    {
-                        for (typename T::Char* p2 = p - 1; p2 >= p1; --p2)
-                        {
-                            typename T::Char cS = *p2;
-                            if (cS == cDecSeparator)
-                                continue;
-                            if (cS != '9')
-                            {
-                                ++*p2;
-                                break;
-                            }
-                            *p2 = '0';
-                            if (p2 == p1) // The number consisted of all 9s replaced to all 0s
-                            {
-                                if (eFormat == rtl_math_StringFormat_F)
-                                { // move everything to the right before inserting '1'
-                                    std::memmove(p2 + 1, p2, (p++ - p2) * sizeof(*p));
-                                }
-                                else
-                                {
-                                    nExp++;
-                                }
-                                *p2 = '1';
-                            }
-                        }
-
-                        *p++ = '0';
-                    }
-                    fValue = 0.0;
-                }
-                else
-                {
-                    *p++ = nDigit + '0';
-                    fValue = (fValue - nDigit) * 10.0;
-                }
+                --nCurExp;
+                nDigit = aParts.significand / eX[nCurExp];
+                aParts.significand %= eX[nCurExp];
             }
             else
             {
-                *p++ = '0';
+                nDigit = aParts.significand;
+                aParts.significand = 0;
             }
+            assert(nDigit >= 0 && nDigit < 10);
+            *p++ = nDigit + '0';
 
             if (!--nDigits)
                 break;  // for
author	Mike Kaganski <mike.kaganski@collabora.com>	2022-02-15 09:20:52 +0300
committer	Mike Kaganski <mike.kaganski@collabora.com>	2022-02-17 21:46:58 +0100
commit	9eb9083ff2fdaeb96399a0830a4394de4e29ef64 (patch)
tree	046cc63551286fb3e59c6b79ebeea505a23b2eb0 /sal
parent	ee373f34ae1509e8d9fffaf4b5140ee9c35e8d41 (diff)