diff options
author | Oliver Sander <oliver.sander@tu-dresden.de> | 2024-04-08 07:50:40 +0200 |
---|---|---|
committer | Albert Astals Cid <aacid@kde.org> | 2024-04-20 10:30:56 +0000 |
commit | 98fabb298b0e8eaef9193bbce68c99c85473a314 (patch) | |
tree | 697f588b989b86354a6e82528daa9267ec5908ff | |
parent | c60e2ce44ae1c0af902c5139a4570d36e1b602cc (diff) |
Move method GooString::hasUnicodeMarker to UTF.h
... and rename it to hasUnicodeByteOrderMark.
This allows to replace GooString by std::string in a few places.
(In a future commit)
-rw-r--r-- | cpp/poppler-private.cpp | 4 | ||||
-rw-r--r-- | glib/poppler-document.cc | 8 | ||||
-rw-r--r-- | goo/GooString.h | 2 | ||||
-rw-r--r-- | poppler/Annot.cc | 23 | ||||
-rw-r--r-- | poppler/Form.cc | 15 | ||||
-rw-r--r-- | poppler/PDFDoc.cc | 4 | ||||
-rw-r--r-- | poppler/PageLabelInfo.cc | 4 | ||||
-rw-r--r-- | poppler/PageLabelInfo_p.h | 5 | ||||
-rw-r--r-- | poppler/UTF.cc | 6 | ||||
-rw-r--r-- | poppler/UTF.h | 6 | ||||
-rw-r--r-- | qt5/src/poppler-private.cc | 5 | ||||
-rw-r--r-- | qt5/tests/check_pagelabelinfo.cpp | 2 | ||||
-rw-r--r-- | qt5/tests/check_strings.cpp | 4 | ||||
-rw-r--r-- | qt6/src/poppler-private.cc | 5 | ||||
-rw-r--r-- | qt6/tests/check_pagelabelinfo.cpp | 2 | ||||
-rw-r--r-- | qt6/tests/check_strings.cpp | 3 | ||||
-rw-r--r-- | utils/pdfdetach.cc | 9 | ||||
-rw-r--r-- | utils/pdftohtml.cc | 5 |
18 files changed, 64 insertions, 48 deletions
diff --git a/cpp/poppler-private.cpp b/cpp/poppler-private.cpp index 0189fc72..6953724f 100644 --- a/cpp/poppler-private.cpp +++ b/cpp/poppler-private.cpp @@ -6,6 +6,7 @@ * Copyright (C) 2017-2019 Albert Astals Cid <aacid@kde.org> * Copyright (C) 2018 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp> * Copyright (C) 2020 Adam Reichold <adam.reichold@t-online.de> + * Copyright (C) 2024 Oliver Sander <oliver.sander@tu-dresden.de> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,6 +27,7 @@ #include "GooString.h" #include "Page.h" +#include "UTF.h" #include <ctime> #include <iostream> @@ -64,7 +66,7 @@ ustring detail::unicode_GooString_to_ustring(const GooString *str) const int len = str->getLength(); const bool is_unicodeLE = str->hasUnicodeMarkerLE(); - const bool is_unicode = str->hasUnicodeMarker() || is_unicodeLE; + const bool is_unicode = hasUnicodeByteOrderMark(str->toStr()) || is_unicodeLE; int i = is_unicode ? 2 : 0; ustring::size_type ret_len = len - i; if (is_unicode) { diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc index 024ac3b3..9c516146 100644 --- a/glib/poppler-document.cc +++ b/glib/poppler-document.cc @@ -4,7 +4,7 @@ * Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com> * Copyright (C) 2018, 2019, 2021, 2022 Marek Kasik <mkasik@redhat.com> * Copyright (C) 2019 Masamichi Hosoda <trueroad@trueroad.jp> - * Copyright (C) 2019, 2021 Oliver Sander <oliver.sander@tu-dresden.de> + * Copyright (C) 2019, 2021, 2024 Oliver Sander <oliver.sander@tu-dresden.de> * Copyright (C) 2020, 2022 Albert Astals Cid <aacid@kde.org> * Copyright (C) 2021 André Guerreiro <aguerreiro1985@gmail.com> * Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> @@ -1099,7 +1099,7 @@ char *_poppler_goo_string_to_utf8(const GooString *s) char *result; - if (s->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(s->toStr())) { result = g_convert(s->c_str() + 2, s->getLength() - 2, "UTF-8", "UTF-16BE", nullptr, nullptr, nullptr); } else if (s->hasUnicodeMarkerLE()) { result = g_convert(s->c_str() + 2, s->getLength() - 2, "UTF-8", "UTF-16LE", nullptr, nullptr, nullptr); @@ -1139,7 +1139,7 @@ static GooString *_poppler_goo_string_from_utf8(const gchar *src) GooString *result = new GooString(utf16, outlen); g_free(utf16); - if (!result->hasUnicodeMarker()) { + if (!hasUnicodeByteOrderMark(result->toStr())) { result->prependUnicodeMarker(); } @@ -3733,7 +3733,7 @@ gboolean _poppler_convert_pdf_date_to_gtime(const GooString *date, time_t *gdate gchar *date_string; gboolean retval; - if (date->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(date->toStr())) { date_string = g_convert(date->c_str() + 2, date->getLength() - 2, "UTF-8", "UTF-16BE", nullptr, nullptr, nullptr); } else { date_string = g_strndup(date->c_str(), date->getLength()); diff --git a/goo/GooString.h b/goo/GooString.h index aa3c91c5..33f037e0 100644 --- a/goo/GooString.h +++ b/goo/GooString.h @@ -243,8 +243,6 @@ public: // Return true if string ends with suffix using std::string::ends_with; - bool hasUnicodeMarker() const { return hasUnicodeMarker(*this); } - static bool hasUnicodeMarker(const std::string &s) { return s.size() >= 2 && s[0] == '\xfe' && s[1] == '\xff'; } bool hasUnicodeMarkerLE() const { return hasUnicodeMarkerLE(*this); } static bool hasUnicodeMarkerLE(const std::string &s) { return s.size() >= 2 && s[0] == '\xff' && s[1] == '\xfe'; } diff --git a/poppler/Annot.cc b/poppler/Annot.cc index 8b954530..5736c990 100644 --- a/poppler/Annot.cc +++ b/poppler/Annot.cc @@ -41,7 +41,7 @@ // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> // Copyright (C) 2018 Dileep Sankhla <sankhla.dileep96@gmail.com> // Copyright (C) 2018-2020 Tobias Deiminger <haxtibal@posteo.de> -// Copyright (C) 2018-2020, 2022 Oliver Sander <oliver.sander@tu-dresden.de> +// Copyright (C) 2018-2020, 2022, 2024 Oliver Sander <oliver.sander@tu-dresden.de> // Copyright (C) 2019 Umang Malik <umang99m@gmail.com> // Copyright (C) 2019 João Netto <joaonetto901@gmail.com> // Copyright (C) 2020, 2024 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by Technische Universität Dresden @@ -89,6 +89,7 @@ #include "FileSpec.h" #include "DateInfo.h" #include "Link.h" +#include "UTF.h" #include <cstring> #include <algorithm> @@ -1502,7 +1503,7 @@ void Annot::setContents(std::unique_ptr<GooString> &&new_content) if (new_content) { contents = std::move(new_content); // append the unicode marker <FE FF> if needed - if (!contents->hasUnicodeMarker()) { + if (!hasUnicodeByteOrderMark(contents->toStr())) { contents->prependUnicodeMarker(); } } else { @@ -2209,7 +2210,7 @@ void AnnotMarkup::setLabel(std::unique_ptr<GooString> &&new_label) if (new_label) { label = std::move(new_label); // append the unicode marker <FE FF> if needed - if (!label->hasUnicodeMarker()) { + if (!hasUnicodeByteOrderMark(label->toStr())) { label->prependUnicodeMarker(); } } else { @@ -2934,7 +2935,7 @@ void AnnotFreeText::setStyleString(GooString *new_string) if (new_string) { styleString = std::make_unique<GooString>(new_string); // append the unicode marker <FE FF> if needed - if (!styleString->hasUnicodeMarker()) { + if (!hasUnicodeByteOrderMark(styleString->toStr())) { styleString->prependUnicodeMarker(); } } else { @@ -3027,7 +3028,7 @@ public: double blockWidth; bool newFontNeeded = false; GooString outputText; - const bool isUnicode = text->hasUnicodeMarker(); + const bool isUnicode = hasUnicodeByteOrderMark(text->toStr()); int charCount; Annot::layoutText(text, &outputText, &i, *font, &blockWidth, availableWidth ? *availableWidth : 0.0, &charCount, noReencode, !noReencode ? &newFontNeeded : nullptr); @@ -3134,7 +3135,7 @@ public: double Annot::calculateFontSize(const Form *form, const GfxFont *font, const GooString *text, double wMax, double hMax, const bool forceZapfDingbats) { - const bool isUnicode = text->hasUnicodeMarker(); + const bool isUnicode = hasUnicodeByteOrderMark(text->toStr()); double fontSize; for (fontSize = 20; fontSize > 1; --fontSize) { @@ -3143,7 +3144,7 @@ double Annot::calculateFontSize(const Form *form, const GfxFont *font, const Goo int i = 0; while (i < text->getLength()) { GooString lineText(text->toStr().substr(i)); - if (!lineText.hasUnicodeMarker() && isUnicode) { + if (!hasUnicodeByteOrderMark(lineText.toStr()) && isUnicode) { lineText.prependUnicodeMarker(); } const HorizontalTextLayouter textLayouter(&lineText, form, font, availableWidthInFontSize, forceZapfDingbats); @@ -3179,7 +3180,7 @@ static DrawMultiLineTextResult drawMultiLineText(const GooString &text, double a const double availableTextWidthInFontPtSize = availableWidth / fontSize; while (i < text.getLength()) { GooString lineText(text.toStr().substr(i)); - if (!lineText.hasUnicodeMarker() && text.hasUnicodeMarker()) { + if (!hasUnicodeByteOrderMark(lineText.toStr()) && hasUnicodeByteOrderMark(text.toStr())) { lineText.prependUnicodeMarker(); } const HorizontalTextLayouter textLayouter(&lineText, form, &font, availableTextWidthInFontPtSize, false); @@ -3233,7 +3234,7 @@ static DrawMultiLineTextResult drawMultiLineText(const GooString &text, double a if (i == 0) { i += textLayouter.consumedText; } else { - i += textLayouter.consumedText - (text.hasUnicodeMarker() ? 2 : 0); + i += textLayouter.consumedText - (hasUnicodeByteOrderMark(text.toStr()) ? 2 : 0); } } return result; @@ -4283,7 +4284,7 @@ void Annot::layoutText(const GooString *text, GooString *outBuf, int *i, const G if (!text) { return; } - bool unicode = text->hasUnicodeMarker(); + bool unicode = hasUnicodeByteOrderMark(text->toStr()); bool spacePrev; // previous character was a space // State for backtracking when more text has been processed than fits within @@ -4586,7 +4587,7 @@ bool AnnotAppearanceBuilder::drawText(const GooString *text, const Form *form, c // for a password field, replace all characters with asterisks if (flags & TurnTextToStarsDrawTextFlag) { int len; - if (text->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(text->toStr())) { len = (text->getLength() - 2) / 2; } else { len = text->getLength(); diff --git a/poppler/Form.cc b/poppler/Form.cc index 022c08d5..3acf81f5 100644 --- a/poppler/Form.cc +++ b/poppler/Form.cc @@ -74,6 +74,7 @@ #include "Lexer.h" #include "Parser.h" #include "CIDFontsWidthsBuilder.h" +#include "UTF.h" #include "fofi/FoFiTrueType.h" #include "fofi/FoFiIdentifier.h" @@ -1247,7 +1248,7 @@ GooString *FormField::getFullyQualifiedName() if (unicode_encoded) { fullyQualifiedName->insert(0, "\0.", 2); // 2-byte unicode period - if (parent_name->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(parent_name->toStr())) { fullyQualifiedName->insert(0, parent_name->c_str() + 2, parent_name->getLength() - 2); // Remove the unicode BOM } else { int tmp_length; @@ -1257,7 +1258,7 @@ GooString *FormField::getFullyQualifiedName() } } else { fullyQualifiedName->insert(0, '.'); // 1-byte ascii period - if (parent_name->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(parent_name->toStr())) { unicode_encoded = true; fullyQualifiedName = convertToUtf16(fullyQualifiedName); fullyQualifiedName->insert(0, parent_name->c_str() + 2, parent_name->getLength() - 2); // Remove the unicode BOM @@ -1275,7 +1276,7 @@ GooString *FormField::getFullyQualifiedName() if (partialName) { if (unicode_encoded) { - if (partialName->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(partialName->toStr())) { fullyQualifiedName->append(partialName->c_str() + 2, partialName->getLength() - 2); // Remove the unicode BOM } else { int tmp_length; @@ -1284,7 +1285,7 @@ GooString *FormField::getFullyQualifiedName() delete[] tmp_str; } } else { - if (partialName->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(partialName->toStr())) { unicode_encoded = true; fullyQualifiedName = convertToUtf16(fullyQualifiedName); fullyQualifiedName->append(partialName->c_str() + 2, partialName->getLength() - 2); // Remove the unicode BOM @@ -1678,7 +1679,7 @@ void FormFieldText::fillContent(FillValueType fillType) obj1 = Form::fieldLookup(dict, fillType == fillDefaultValue ? "DV" : "V"); if (obj1.isString()) { - if (obj1.getString()->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(obj1.getString()->toStr())) { if (obj1.getString()->getLength() > 2) { if (fillType == fillDefaultValue) { defaultContent = obj1.getString()->copy(); @@ -1716,7 +1717,7 @@ void FormFieldText::setContentCopy(const GooString *new_content) content = new_content->copy(); // append the unicode marker <FE FF> if needed - if (!content->hasUnicodeMarker()) { + if (!hasUnicodeByteOrderMark(content->toStr())) { content->prependUnicodeMarker(); } Form *form = doc->getCatalog()->getForm(); @@ -2165,7 +2166,7 @@ void FormFieldChoice::setEditChoice(const GooString *new_content) editedChoice = new_content->copy(); // append the unicode marker <FE FF> if needed - if (!editedChoice->hasUnicodeMarker()) { + if (!hasUnicodeByteOrderMark(editedChoice->toStr())) { editedChoice->prependUnicodeMarker(); } } diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index 7d08f202..613a1e25 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -46,7 +46,7 @@ // Copyright (C) 2020 Nelson Benítez León <nbenitezl@gmail.com> // Copyright (C) 2020 Thorsten Behrens <Thorsten.Behrens@CIB.de> // Copyright (C) 2020 Adam Sampson <ats@offog.org> -// Copyright (C) 2021-2023 Oliver Sander <oliver.sander@tu-dresden.de> +// Copyright (C) 2021-2024 Oliver Sander <oliver.sander@tu-dresden.de> // Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com> // Copyright (C) 2021 RM <rm+git@arcsin.org> // Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net. @@ -1311,7 +1311,7 @@ void PDFDoc::writeString(const GooString *s, OutStream *outStr, const unsigned c } // Write data - if (s->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(s->toStr())) { // unicode string don't necessary end with \0 const char *c = s->c_str(); std::stringstream stream; diff --git a/poppler/PageLabelInfo.cc b/poppler/PageLabelInfo.cc index 011f56d4..59e055d1 100644 --- a/poppler/PageLabelInfo.cc +++ b/poppler/PageLabelInfo.cc @@ -118,7 +118,7 @@ bool PageLabelInfo::labelToIndex(GooString *label, int *index) const { const char *const str = label->c_str(); const std::size_t strLen = label->getLength(); - const bool strUnicode = label->hasUnicodeMarker(); + const bool strUnicode = hasUnicodeByteOrderMark(label->toStr()); int number; bool ok; @@ -211,7 +211,7 @@ bool PageLabelInfo::indexToLabel(int index, GooString *label) const label->clear(); label->append(matching_interval->prefix.c_str(), matching_interval->prefix.size()); - if (label->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(label->toStr())) { int i, len; char ucs2_char[2]; diff --git a/poppler/PageLabelInfo_p.h b/poppler/PageLabelInfo_p.h index 5fa1a5e2..9d43ad25 100644 --- a/poppler/PageLabelInfo_p.h +++ b/poppler/PageLabelInfo_p.h @@ -7,7 +7,7 @@ // Copyright (C) 2011 Simon Kellner <kellner@kit.edu> // Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it> // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> -// Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de> +// Copyright (C) 2019, 2024 Oliver Sander <oliver.sander@tu-dresden.de> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -23,11 +23,12 @@ #include "goo/GooString.h" #include "Error.h" +#include "UTF.h" static std::pair<int, bool> fromDecimal(const std::string &str, const bool unicode) { if (unicode && (str.size() % 2 == 0)) { - if (GooString::hasUnicodeMarker(str)) { + if (hasUnicodeByteOrderMark(str)) { // strip the marker if it is there return fromDecimal(str.substr(2), true /*unicode*/); } diff --git a/poppler/UTF.cc b/poppler/UTF.cc index f8dd16f2..48645780 100644 --- a/poppler/UTF.cc +++ b/poppler/UTF.cc @@ -23,7 +23,7 @@ // Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net. // Copyright (C) 2023, 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> // Copyright (C) 2023 Even Rouault <even.rouault@spatialys.com> -// Copyright (C) 2023 Oliver Sander <oliver.sander@tu-dresden.de> +// Copyright (C) 2023, 2024 Oliver Sander <oliver.sander@tu-dresden.de> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -88,7 +88,7 @@ std::vector<Unicode> TextStringToUCS4(const std::string &textStr) return {}; } - if (GooString::hasUnicodeMarker(textStr)) { + if (hasUnicodeByteOrderMark(textStr)) { isUnicode = true; isUnicodeLE = false; } else if (GooString::hasUnicodeMarkerLE(textStr)) { @@ -558,7 +558,7 @@ std::string TextStringToUtf8(const std::string &textStr) len = textStr.size(); s = textStr.c_str(); - if (GooString::hasUnicodeMarker(textStr)) { + if (hasUnicodeByteOrderMark(textStr)) { uint16_t *utf16; len = len / 2 - 1; utf16 = new uint16_t[len]; diff --git a/poppler/UTF.h b/poppler/UTF.h index 9c3c6817..9f0290db 100644 --- a/poppler/UTF.h +++ b/poppler/UTF.h @@ -48,6 +48,12 @@ inline bool UnicodeIsValid(Unicode ucs4) return (ucs4 < 0x110000) && ((ucs4 & 0xfffff800) != 0xd800) && (ucs4 < 0xfdd0 || ucs4 > 0xfdef) && ((ucs4 & 0xfffe) != 0xfffe); } +// check whether string starts with Big-Endian byte order mark +inline bool hasUnicodeByteOrderMark(const std::string &s) +{ + return s.starts_with(unicodeByteOrderMark); +} + // is a unicode whitespace character bool UnicodeIsWhitespace(Unicode ucs4); diff --git a/qt5/src/poppler-private.cc b/qt5/src/poppler-private.cc index 1ad33005..3f4d286f 100644 --- a/qt5/src/poppler-private.cc +++ b/qt5/src/poppler-private.cc @@ -7,7 +7,7 @@ * Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com> * Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich * Copyright (C) 2018-2020 Adam Reichold <adam.reichold@t-online.de> - * Copyright (C) 2019, 2020 Oliver Sander <oliver.sander@tu-dresden.de> + * Copyright (C) 2019, 2020, 2024 Oliver Sander <oliver.sander@tu-dresden.de> * Copyright (C) 2019 João Netto <joaonetto901@gmail.com> * Copyright (C) 2021 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com> * Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com> @@ -43,6 +43,7 @@ #include <Outline.h> #include <PDFDocEncoding.h> #include <UnicodeMap.h> +#include <UTF.h> #ifdef ANDROID # include <QtCore/QString> @@ -121,7 +122,7 @@ QString UnicodeParsedString(const std::string &s1) return QString(); } - if (GooString::hasUnicodeMarker(s1) || GooString::hasUnicodeMarkerLE(s1)) { + if (hasUnicodeByteOrderMark(s1) || GooString::hasUnicodeMarkerLE(s1)) { return QString::fromUtf16(reinterpret_cast<const ushort *>(s1.c_str()), s1.size() / 2); } else { int stringLength; diff --git a/qt5/tests/check_pagelabelinfo.cpp b/qt5/tests/check_pagelabelinfo.cpp index f45afa59..3098bbf4 100644 --- a/qt5/tests/check_pagelabelinfo.cpp +++ b/qt5/tests/check_pagelabelinfo.cpp @@ -31,7 +31,7 @@ void TestPageLabelInfo::testFromDecimal() void TestPageLabelInfo::testFromDecimalUnicode() { std::unique_ptr<GooString> str(Poppler::QStringToUnicodeGooString(QString::fromLocal8Bit("2342"))); - const auto res = fromDecimal(str->toStr(), str->hasUnicodeMarker()); + const auto res = fromDecimal(str->toStr(), hasUnicodeByteOrderMark(str->toStr())); QCOMPARE(res.first, 2342); QCOMPARE(res.second, true); } diff --git a/qt5/tests/check_strings.cpp b/qt5/tests/check_strings.cpp index 5f18b42f..a9044db8 100644 --- a/qt5/tests/check_strings.cpp +++ b/qt5/tests/check_strings.cpp @@ -1,6 +1,7 @@ /* * Copyright (C) 2010, 2011, Pino Toscano <pino@kde.org> * Copyright (C) 2021 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com> + * Copyright (C) 2024 Oliver Sander <oliver.sander@tu-dresden.de> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,6 +24,7 @@ #include <poppler-private.h> #include <GlobalParams.h> +#include <UTF.h> Q_DECLARE_METATYPE(GooString *) Q_DECLARE_METATYPE(Unicode *) @@ -194,7 +196,7 @@ void TestStrings::check_QStringToUnicodeGooString() QVERIFY(goo->toStr().empty()); QCOMPARE(goo->getLength(), 0); } else { - QVERIFY(goo->hasUnicodeMarker()); + QVERIFY(hasUnicodeByteOrderMark(goo->toStr())); QCOMPARE(goo->getLength(), string.length() * 2 + 2); QCOMPARE(result, QByteArray::fromRawData(goo->c_str() + 2, goo->getLength() - 2)); } diff --git a/qt6/src/poppler-private.cc b/qt6/src/poppler-private.cc index 9b8b117d..052360bd 100644 --- a/qt6/src/poppler-private.cc +++ b/qt6/src/poppler-private.cc @@ -7,7 +7,7 @@ * Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com> * Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich * Copyright (C) 2018-2020 Adam Reichold <adam.reichold@t-online.de> - * Copyright (C) 2019, 2020 Oliver Sander <oliver.sander@tu-dresden.de> + * Copyright (C) 2019, 2020, 2024 Oliver Sander <oliver.sander@tu-dresden.de> * Copyright (C) 2019 João Netto <joaonetto901@gmail.com> * Copyright (C) 2021 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com> * Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com> @@ -43,6 +43,7 @@ #include <Outline.h> #include <PDFDocEncoding.h> #include <UnicodeMap.h> +#include <UTF.h> #ifdef ANDROID # include <QtCore/QString> @@ -121,7 +122,7 @@ QString UnicodeParsedString(const std::string &s1) return QString(); } - if (GooString::hasUnicodeMarker(s1) || GooString::hasUnicodeMarkerLE(s1)) { + if (hasUnicodeByteOrderMark(s1) || GooString::hasUnicodeMarkerLE(s1)) { return QString::fromUtf16(reinterpret_cast<const char16_t *>(s1.c_str()), s1.size() / 2); } else { int stringLength; diff --git a/qt6/tests/check_pagelabelinfo.cpp b/qt6/tests/check_pagelabelinfo.cpp index f45afa59..3098bbf4 100644 --- a/qt6/tests/check_pagelabelinfo.cpp +++ b/qt6/tests/check_pagelabelinfo.cpp @@ -31,7 +31,7 @@ void TestPageLabelInfo::testFromDecimal() void TestPageLabelInfo::testFromDecimalUnicode() { std::unique_ptr<GooString> str(Poppler::QStringToUnicodeGooString(QString::fromLocal8Bit("2342"))); - const auto res = fromDecimal(str->toStr(), str->hasUnicodeMarker()); + const auto res = fromDecimal(str->toStr(), hasUnicodeByteOrderMark(str->toStr())); QCOMPARE(res.first, 2342); QCOMPARE(res.second, true); } diff --git a/qt6/tests/check_strings.cpp b/qt6/tests/check_strings.cpp index 2157f2e6..e1a4c172 100644 --- a/qt6/tests/check_strings.cpp +++ b/qt6/tests/check_strings.cpp @@ -23,6 +23,7 @@ #include <poppler-private.h> #include <GlobalParams.h> +#include "UTF.h" Q_DECLARE_METATYPE(GooString *) Q_DECLARE_METATYPE(Unicode *) @@ -194,7 +195,7 @@ void TestStrings::check_QStringToUnicodeGooString() QVERIFY(goo->toStr().empty()); QCOMPARE(goo->getLength(), 0); } else { - QVERIFY(goo->hasUnicodeMarker()); + QVERIFY(hasUnicodeByteOrderMark(goo->toStr())); QCOMPARE(goo->getLength(), string.length() * 2 + 2); QCOMPARE(result, QByteArray::fromRawData(goo->c_str() + 2, goo->getLength() - 2)); } diff --git a/utils/pdfdetach.cc b/utils/pdfdetach.cc index fc3e9c75..aa530850 100644 --- a/utils/pdfdetach.cc +++ b/utils/pdfdetach.cc @@ -18,7 +18,7 @@ // Copyright (C) 2014, 2017 Adrian Johnson <ajohnson@redneon.com> // Copyright (C) 2018, 2020, 2022, 2024 Albert Astals Cid <aacid@kde.org> // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> -// Copyright (C) 2019, 2021 Oliver Sander <oliver.sander@tu-dresden.de> +// Copyright (C) 2019, 2021, 2024 Oliver Sander <oliver.sander@tu-dresden.de> // Copyright (C) 2020 <r.coeffier@bee-buzziness.com> // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> // @@ -43,6 +43,7 @@ #include "UnicodeMap.h" #include "PDFDocEncoding.h" #include "Error.h" +#include "UTF.h" #include "Win32Console.h" #include <filesystem> @@ -171,7 +172,7 @@ int main(int argc, char *argv[]) if (!s1) { return 3; } - if (s1->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(s1->toStr())) { isUnicode = true; j = 2; } else { @@ -208,7 +209,7 @@ int main(int argc, char *argv[]) if (!s1) { return 3; } - if (s1->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(s1->toStr())) { isUnicode = true; j = 2; } else { @@ -273,7 +274,7 @@ int main(int argc, char *argv[]) if (!s1) { return 3; } - if (s1->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(s1->toStr())) { isUnicode = true; j = 2; } else { diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc index a69a64af..999ad004 100644 --- a/utils/pdftohtml.cc +++ b/utils/pdftohtml.cc @@ -28,7 +28,7 @@ // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich // Copyright (C) 2018 Thibaut Brard <thibaut.brard@gmail.com> // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> -// Copyright (C) 2019, 2021 Oliver Sander <oliver.sander@tu-dresden.de> +// Copyright (C) 2019, 2021, 2024 Oliver Sander <oliver.sander@tu-dresden.de> // Copyright (C) 2021 Hubert Figuiere <hub@figuiere.net> // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> // @@ -72,6 +72,7 @@ #include "goo/gfile.h" #include "Win32Console.h" #include "InMemoryFile.h" +#include "UTF.h" static int firstPage = 1; static int lastPage = 0; @@ -409,7 +410,7 @@ static std::unique_ptr<GooString> getInfoString(Dict *infoDict, const char *key) rawString = obj.getString(); // Convert rawString to unicode - if (rawString->hasUnicodeMarker()) { + if (hasUnicodeByteOrderMark(rawString->toStr())) { isUnicode = true; unicodeLength = (obj.getString()->getLength() - 2) / 2; } else { |