diff options
author | Stephan Bergmann <sbergman@redhat.com> | 2023-05-04 14:09:53 +0200 |
---|---|---|
committer | Stephan Bergmann <sbergman@redhat.com> | 2023-05-04 17:51:23 +0200 |
commit | fa0c012d6c06e9a92093dacf997fe3151272648e (patch) | |
tree | 579a8e3e495b7ae36b46a0822282fc64e39c72c9 /sal | |
parent | 2711b42088ad8e19c2bccd265f032b4acaba7b9e (diff) |
Provide std::u16string_view based o3tl::iterateCodePoints
...as requested in the comments of
<https://gerrit.libreoffice.org/c/core/+/151303> "a11y: Fix returning unpaired
surrogates when retrieving characters" (incl. the additional preAdjustIndex
parameter).
The type of the indexUtf16 parameter obviously needed to be adapted to
std::u16string_view's std::size_t. But there is no obvious best choice for the
type of the incrementCodePoints parameter (int? std::ssize_t?), so lets leave it
as sal_Int32.
For simplicity of avoiding a Library_o3tl, and to allow o3tl::iterateCodePoints
to be used in the implementation of rtl_uString_iterateCodePoints now,
o3tl::iterateCodePoints is provided as an inline function defined in the include
file.
Change-Id: I8280ca11d2a943bd2b7150a266807b358f321a72
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151366
Tested-by: Jenkins
Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
Diffstat (limited to 'sal')
-rw-r--r-- | sal/rtl/ustring.cxx | 49 |
1 files changed, 13 insertions, 36 deletions
diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx index 45ab6e166871..fc23cf37a338 100644 --- a/sal/rtl/ustring.cxx +++ b/sal/rtl/ustring.cxx @@ -26,6 +26,9 @@ #include <string> #include <config_options.h> +#include <o3tl/intcmp.hxx> +#include <o3tl/safeint.hxx> +#include <o3tl/string_view.hxx> #include <osl/diagnose.h> #include <osl/interlck.h> #include <osl/mutex.h> @@ -769,43 +772,17 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints( rtl_uString const * string, sal_Int32 * indexUtf16, sal_Int32 incrementCodePoints) { - sal_Int32 n; - sal_Unicode cu; - sal_uInt32 cp; assert(string != nullptr && indexUtf16 != nullptr); - n = *indexUtf16; - assert(n >= 0 && n <= string->length); - while (incrementCodePoints < 0) { - assert(n > 0); - cu = string->buffer[--n]; - if (rtl::isLowSurrogate(cu) && n != 0 && - rtl::isHighSurrogate(string->buffer[n - 1])) - { - --n; - } - ++incrementCodePoints; - } - assert(n >= 0 && n < string->length); - cu = string->buffer[n]; - if (rtl::isHighSurrogate(cu) && string->length - n >= 2 && - rtl::isLowSurrogate(string->buffer[n + 1])) - { - cp = rtl::combineSurrogates(cu, string->buffer[n + 1]); - } else { - cp = cu; - } - while (incrementCodePoints > 0) { - assert(n < string->length); - cu = string->buffer[n++]; - if (rtl::isHighSurrogate(cu) && n != string->length && - rtl::isLowSurrogate(string->buffer[n])) - { - ++n; - } - --incrementCodePoints; - } - assert(n >= 0 && n <= string->length); - *indexUtf16 = n; + assert( + *indexUtf16 >= 0 + && o3tl::cmp_less_equal(*indexUtf16, std::numeric_limits<std::size_t>::max())); + // using o3tl::cmp_less_equal nicely avoids potential + // -Wtautological-constant-out-of-range-compare + std::size_t i = *indexUtf16; + auto const cp = o3tl::iterateCodePoints( + std::u16string_view(string->buffer, string->length), &i, incrementCodePoints); + assert(i <= o3tl::make_unsigned(std::numeric_limits<sal_Int32>::max())); + *indexUtf16 = i; return cp; } |