diff options
author | Mike Kaganski <mike.kaganski@collabora.com> | 2021-12-24 15:25:52 +0300 |
---|---|---|
committer | Mike Kaganski <mike.kaganski@collabora.com> | 2021-12-24 21:42:08 +0100 |
commit | 8b333c76945960fc62a01829666ba234f59a6d94 (patch) | |
tree | 4393e992348ac00faf04256d5f51f9e3aea2e4e6 /sax | |
parent | fd4acfaca9fc012313f03f46e927add6feb6a553 (diff) |
Use rtl functions instead of own surrogate checking/combining
Change-Id: I3eb05d8f5b0761bc3b672d4c855eb469f8cc1a29
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/127375
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'sax')
-rw-r--r-- | sax/source/expatwrap/saxwriter.cxx | 40 |
1 files changed, 23 insertions, 17 deletions
diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx index 37eb58f099db..e19a31211d98 100644 --- a/sax/source/expatwrap/saxwriter.cxx +++ b/sax/source/expatwrap/saxwriter.cxx @@ -447,20 +447,22 @@ bool SaxWriterHelper::convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen, } // Deal with other unicode cases - if (c >= 0xd800 && c < 0xdc00) + if (rtl::isHighSurrogate(c)) { // 1. surrogate: save (until 2. surrogate) - OSL_ENSURE(nSurrogate == 0, "left-over Unicode surrogate"); - nSurrogate = ((c & 0x03ff) + 0x0040); + if (nSurrogate != 0) // left-over lone 1st Unicode surrogate + { + OSL_FAIL("left-over Unicode surrogate"); + bRet = false; + } + nSurrogate = c; } - else if (c >= 0xdc00 && c < 0xe000) + else if (rtl::isLowSurrogate(c)) { // 2. surrogate: write as UTF-8 - OSL_ENSURE(nSurrogate != 0, "lone 2nd Unicode surrogate"); - - nSurrogate = (nSurrogate << 10) | (c & 0x03ff); - if (rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 0x00010000) + if (nSurrogate) // can only be 1st surrogate { + nSurrogate = rtl::combineSurrogates(nSurrogate, c); sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)), sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)), sal_Int8(0x80 | ((nSurrogate >> 6) & 0x3F)), @@ -479,7 +481,7 @@ bool SaxWriterHelper::convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen, rPos++; } } - else + else // lone 2nd surrogate { OSL_FAIL("illegal Unicode character"); bRet = false; @@ -526,13 +528,18 @@ bool SaxWriterHelper::convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen, rPos = writeSequence(); // reset left-over surrogate - if ((nSurrogate != 0) && (c < 0xd800 || c >= 0xdc00)) + if ((nSurrogate != 0) && !rtl::isHighSurrogate(c)) { - OSL_ENSURE(nSurrogate != 0, "left-over Unicode surrogate"); + OSL_FAIL("left-over Unicode surrogate"); nSurrogate = 0; bRet = false; } } + if (nSurrogate != 0) // trailing lone 1st surrogate + { + OSL_FAIL("left-over Unicode surrogate"); + bRet = false; + } return bRet; } @@ -951,16 +958,15 @@ sal_Int32 SaxWriterHelper::calcXMLByteLength(const OUString& rStr, bool bDoNorma } // Deal with other unicode cases - if (c >= 0xd800 && c < 0xdc00) + if (rtl::isHighSurrogate(c)) { // save surrogate - nSurrogate = ((c & 0x03ff) + 0x0040); + nSurrogate = c; } - else if (c >= 0xdc00 && c < 0xe000) + else if (rtl::isLowSurrogate(c)) { // 2. surrogate: write as UTF-8 (if range is OK - nSurrogate = (nSurrogate << 10) | (c & 0x03ff); - if (rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 0x00010000) + if (nSurrogate) nOutputLength += 4; nSurrogate = 0; } @@ -975,7 +981,7 @@ sal_Int32 SaxWriterHelper::calcXMLByteLength(const OUString& rStr, bool bDoNorma } // surrogate processing - if ((nSurrogate != 0) && (c < 0xd800 || c >= 0xdc00)) + if ((nSurrogate != 0) && !rtl::isHighSurrogate(c)) nSurrogate = 0; } |