summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOliver Sander <oliver.sander@tu-dresden.de>2024-04-08 07:50:40 +0200
committerAlbert Astals Cid <aacid@kde.org>2024-04-20 10:30:56 +0000
commit98fabb298b0e8eaef9193bbce68c99c85473a314 (patch)
tree697f588b989b86354a6e82528daa9267ec5908ff
parentc60e2ce44ae1c0af902c5139a4570d36e1b602cc (diff)
Move method GooString::hasUnicodeMarker to UTF.h
... and rename it to hasUnicodeByteOrderMark. This allows to replace GooString by std::string in a few places. (In a future commit)
-rw-r--r--cpp/poppler-private.cpp4
-rw-r--r--glib/poppler-document.cc8
-rw-r--r--goo/GooString.h2
-rw-r--r--poppler/Annot.cc23
-rw-r--r--poppler/Form.cc15
-rw-r--r--poppler/PDFDoc.cc4
-rw-r--r--poppler/PageLabelInfo.cc4
-rw-r--r--poppler/PageLabelInfo_p.h5
-rw-r--r--poppler/UTF.cc6
-rw-r--r--poppler/UTF.h6
-rw-r--r--qt5/src/poppler-private.cc5
-rw-r--r--qt5/tests/check_pagelabelinfo.cpp2
-rw-r--r--qt5/tests/check_strings.cpp4
-rw-r--r--qt6/src/poppler-private.cc5
-rw-r--r--qt6/tests/check_pagelabelinfo.cpp2
-rw-r--r--qt6/tests/check_strings.cpp3
-rw-r--r--utils/pdfdetach.cc9
-rw-r--r--utils/pdftohtml.cc5
18 files changed, 64 insertions, 48 deletions
diff --git a/cpp/poppler-private.cpp b/cpp/poppler-private.cpp
index 0189fc72..6953724f 100644
--- a/cpp/poppler-private.cpp
+++ b/cpp/poppler-private.cpp
@@ -6,6 +6,7 @@
* Copyright (C) 2017-2019 Albert Astals Cid <aacid@kde.org>
* Copyright (C) 2018 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
* Copyright (C) 2020 Adam Reichold <adam.reichold@t-online.de>
+ * Copyright (C) 2024 Oliver Sander <oliver.sander@tu-dresden.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -26,6 +27,7 @@
#include "GooString.h"
#include "Page.h"
+#include "UTF.h"
#include <ctime>
#include <iostream>
@@ -64,7 +66,7 @@ ustring detail::unicode_GooString_to_ustring(const GooString *str)
const int len = str->getLength();
const bool is_unicodeLE = str->hasUnicodeMarkerLE();
- const bool is_unicode = str->hasUnicodeMarker() || is_unicodeLE;
+ const bool is_unicode = hasUnicodeByteOrderMark(str->toStr()) || is_unicodeLE;
int i = is_unicode ? 2 : 0;
ustring::size_type ret_len = len - i;
if (is_unicode) {
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index 024ac3b3..9c516146 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -4,7 +4,7 @@
* Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
* Copyright (C) 2018, 2019, 2021, 2022 Marek Kasik <mkasik@redhat.com>
* Copyright (C) 2019 Masamichi Hosoda <trueroad@trueroad.jp>
- * Copyright (C) 2019, 2021 Oliver Sander <oliver.sander@tu-dresden.de>
+ * Copyright (C) 2019, 2021, 2024 Oliver Sander <oliver.sander@tu-dresden.de>
* Copyright (C) 2020, 2022 Albert Astals Cid <aacid@kde.org>
* Copyright (C) 2021 André Guerreiro <aguerreiro1985@gmail.com>
* Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
@@ -1099,7 +1099,7 @@ char *_poppler_goo_string_to_utf8(const GooString *s)
char *result;
- if (s->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(s->toStr())) {
result = g_convert(s->c_str() + 2, s->getLength() - 2, "UTF-8", "UTF-16BE", nullptr, nullptr, nullptr);
} else if (s->hasUnicodeMarkerLE()) {
result = g_convert(s->c_str() + 2, s->getLength() - 2, "UTF-8", "UTF-16LE", nullptr, nullptr, nullptr);
@@ -1139,7 +1139,7 @@ static GooString *_poppler_goo_string_from_utf8(const gchar *src)
GooString *result = new GooString(utf16, outlen);
g_free(utf16);
- if (!result->hasUnicodeMarker()) {
+ if (!hasUnicodeByteOrderMark(result->toStr())) {
result->prependUnicodeMarker();
}
@@ -3733,7 +3733,7 @@ gboolean _poppler_convert_pdf_date_to_gtime(const GooString *date, time_t *gdate
gchar *date_string;
gboolean retval;
- if (date->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(date->toStr())) {
date_string = g_convert(date->c_str() + 2, date->getLength() - 2, "UTF-8", "UTF-16BE", nullptr, nullptr, nullptr);
} else {
date_string = g_strndup(date->c_str(), date->getLength());
diff --git a/goo/GooString.h b/goo/GooString.h
index aa3c91c5..33f037e0 100644
--- a/goo/GooString.h
+++ b/goo/GooString.h
@@ -243,8 +243,6 @@ public:
// Return true if string ends with suffix
using std::string::ends_with;
- bool hasUnicodeMarker() const { return hasUnicodeMarker(*this); }
- static bool hasUnicodeMarker(const std::string &s) { return s.size() >= 2 && s[0] == '\xfe' && s[1] == '\xff'; }
bool hasUnicodeMarkerLE() const { return hasUnicodeMarkerLE(*this); }
static bool hasUnicodeMarkerLE(const std::string &s) { return s.size() >= 2 && s[0] == '\xff' && s[1] == '\xfe'; }
diff --git a/poppler/Annot.cc b/poppler/Annot.cc
index 8b954530..5736c990 100644
--- a/poppler/Annot.cc
+++ b/poppler/Annot.cc
@@ -41,7 +41,7 @@
// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
// Copyright (C) 2018 Dileep Sankhla <sankhla.dileep96@gmail.com>
// Copyright (C) 2018-2020 Tobias Deiminger <haxtibal@posteo.de>
-// Copyright (C) 2018-2020, 2022 Oliver Sander <oliver.sander@tu-dresden.de>
+// Copyright (C) 2018-2020, 2022, 2024 Oliver Sander <oliver.sander@tu-dresden.de>
// Copyright (C) 2019 Umang Malik <umang99m@gmail.com>
// Copyright (C) 2019 João Netto <joaonetto901@gmail.com>
// Copyright (C) 2020, 2024 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by Technische Universität Dresden
@@ -89,6 +89,7 @@
#include "FileSpec.h"
#include "DateInfo.h"
#include "Link.h"
+#include "UTF.h"
#include <cstring>
#include <algorithm>
@@ -1502,7 +1503,7 @@ void Annot::setContents(std::unique_ptr<GooString> &&new_content)
if (new_content) {
contents = std::move(new_content);
// append the unicode marker <FE FF> if needed
- if (!contents->hasUnicodeMarker()) {
+ if (!hasUnicodeByteOrderMark(contents->toStr())) {
contents->prependUnicodeMarker();
}
} else {
@@ -2209,7 +2210,7 @@ void AnnotMarkup::setLabel(std::unique_ptr<GooString> &&new_label)
if (new_label) {
label = std::move(new_label);
// append the unicode marker <FE FF> if needed
- if (!label->hasUnicodeMarker()) {
+ if (!hasUnicodeByteOrderMark(label->toStr())) {
label->prependUnicodeMarker();
}
} else {
@@ -2934,7 +2935,7 @@ void AnnotFreeText::setStyleString(GooString *new_string)
if (new_string) {
styleString = std::make_unique<GooString>(new_string);
// append the unicode marker <FE FF> if needed
- if (!styleString->hasUnicodeMarker()) {
+ if (!hasUnicodeByteOrderMark(styleString->toStr())) {
styleString->prependUnicodeMarker();
}
} else {
@@ -3027,7 +3028,7 @@ public:
double blockWidth;
bool newFontNeeded = false;
GooString outputText;
- const bool isUnicode = text->hasUnicodeMarker();
+ const bool isUnicode = hasUnicodeByteOrderMark(text->toStr());
int charCount;
Annot::layoutText(text, &outputText, &i, *font, &blockWidth, availableWidth ? *availableWidth : 0.0, &charCount, noReencode, !noReencode ? &newFontNeeded : nullptr);
@@ -3134,7 +3135,7 @@ public:
double Annot::calculateFontSize(const Form *form, const GfxFont *font, const GooString *text, double wMax, double hMax, const bool forceZapfDingbats)
{
- const bool isUnicode = text->hasUnicodeMarker();
+ const bool isUnicode = hasUnicodeByteOrderMark(text->toStr());
double fontSize;
for (fontSize = 20; fontSize > 1; --fontSize) {
@@ -3143,7 +3144,7 @@ double Annot::calculateFontSize(const Form *form, const GfxFont *font, const Goo
int i = 0;
while (i < text->getLength()) {
GooString lineText(text->toStr().substr(i));
- if (!lineText.hasUnicodeMarker() && isUnicode) {
+ if (!hasUnicodeByteOrderMark(lineText.toStr()) && isUnicode) {
lineText.prependUnicodeMarker();
}
const HorizontalTextLayouter textLayouter(&lineText, form, font, availableWidthInFontSize, forceZapfDingbats);
@@ -3179,7 +3180,7 @@ static DrawMultiLineTextResult drawMultiLineText(const GooString &text, double a
const double availableTextWidthInFontPtSize = availableWidth / fontSize;
while (i < text.getLength()) {
GooString lineText(text.toStr().substr(i));
- if (!lineText.hasUnicodeMarker() && text.hasUnicodeMarker()) {
+ if (!hasUnicodeByteOrderMark(lineText.toStr()) && hasUnicodeByteOrderMark(text.toStr())) {
lineText.prependUnicodeMarker();
}
const HorizontalTextLayouter textLayouter(&lineText, form, &font, availableTextWidthInFontPtSize, false);
@@ -3233,7 +3234,7 @@ static DrawMultiLineTextResult drawMultiLineText(const GooString &text, double a
if (i == 0) {
i += textLayouter.consumedText;
} else {
- i += textLayouter.consumedText - (text.hasUnicodeMarker() ? 2 : 0);
+ i += textLayouter.consumedText - (hasUnicodeByteOrderMark(text.toStr()) ? 2 : 0);
}
}
return result;
@@ -4283,7 +4284,7 @@ void Annot::layoutText(const GooString *text, GooString *outBuf, int *i, const G
if (!text) {
return;
}
- bool unicode = text->hasUnicodeMarker();
+ bool unicode = hasUnicodeByteOrderMark(text->toStr());
bool spacePrev; // previous character was a space
// State for backtracking when more text has been processed than fits within
@@ -4586,7 +4587,7 @@ bool AnnotAppearanceBuilder::drawText(const GooString *text, const Form *form, c
// for a password field, replace all characters with asterisks
if (flags & TurnTextToStarsDrawTextFlag) {
int len;
- if (text->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(text->toStr())) {
len = (text->getLength() - 2) / 2;
} else {
len = text->getLength();
diff --git a/poppler/Form.cc b/poppler/Form.cc
index 022c08d5..3acf81f5 100644
--- a/poppler/Form.cc
+++ b/poppler/Form.cc
@@ -74,6 +74,7 @@
#include "Lexer.h"
#include "Parser.h"
#include "CIDFontsWidthsBuilder.h"
+#include "UTF.h"
#include "fofi/FoFiTrueType.h"
#include "fofi/FoFiIdentifier.h"
@@ -1247,7 +1248,7 @@ GooString *FormField::getFullyQualifiedName()
if (unicode_encoded) {
fullyQualifiedName->insert(0, "\0.", 2); // 2-byte unicode period
- if (parent_name->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(parent_name->toStr())) {
fullyQualifiedName->insert(0, parent_name->c_str() + 2, parent_name->getLength() - 2); // Remove the unicode BOM
} else {
int tmp_length;
@@ -1257,7 +1258,7 @@ GooString *FormField::getFullyQualifiedName()
}
} else {
fullyQualifiedName->insert(0, '.'); // 1-byte ascii period
- if (parent_name->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(parent_name->toStr())) {
unicode_encoded = true;
fullyQualifiedName = convertToUtf16(fullyQualifiedName);
fullyQualifiedName->insert(0, parent_name->c_str() + 2, parent_name->getLength() - 2); // Remove the unicode BOM
@@ -1275,7 +1276,7 @@ GooString *FormField::getFullyQualifiedName()
if (partialName) {
if (unicode_encoded) {
- if (partialName->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(partialName->toStr())) {
fullyQualifiedName->append(partialName->c_str() + 2, partialName->getLength() - 2); // Remove the unicode BOM
} else {
int tmp_length;
@@ -1284,7 +1285,7 @@ GooString *FormField::getFullyQualifiedName()
delete[] tmp_str;
}
} else {
- if (partialName->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(partialName->toStr())) {
unicode_encoded = true;
fullyQualifiedName = convertToUtf16(fullyQualifiedName);
fullyQualifiedName->append(partialName->c_str() + 2, partialName->getLength() - 2); // Remove the unicode BOM
@@ -1678,7 +1679,7 @@ void FormFieldText::fillContent(FillValueType fillType)
obj1 = Form::fieldLookup(dict, fillType == fillDefaultValue ? "DV" : "V");
if (obj1.isString()) {
- if (obj1.getString()->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(obj1.getString()->toStr())) {
if (obj1.getString()->getLength() > 2) {
if (fillType == fillDefaultValue) {
defaultContent = obj1.getString()->copy();
@@ -1716,7 +1717,7 @@ void FormFieldText::setContentCopy(const GooString *new_content)
content = new_content->copy();
// append the unicode marker <FE FF> if needed
- if (!content->hasUnicodeMarker()) {
+ if (!hasUnicodeByteOrderMark(content->toStr())) {
content->prependUnicodeMarker();
}
Form *form = doc->getCatalog()->getForm();
@@ -2165,7 +2166,7 @@ void FormFieldChoice::setEditChoice(const GooString *new_content)
editedChoice = new_content->copy();
// append the unicode marker <FE FF> if needed
- if (!editedChoice->hasUnicodeMarker()) {
+ if (!hasUnicodeByteOrderMark(editedChoice->toStr())) {
editedChoice->prependUnicodeMarker();
}
}
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 7d08f202..613a1e25 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -46,7 +46,7 @@
// Copyright (C) 2020 Nelson Benítez León <nbenitezl@gmail.com>
// Copyright (C) 2020 Thorsten Behrens <Thorsten.Behrens@CIB.de>
// Copyright (C) 2020 Adam Sampson <ats@offog.org>
-// Copyright (C) 2021-2023 Oliver Sander <oliver.sander@tu-dresden.de>
+// Copyright (C) 2021-2024 Oliver Sander <oliver.sander@tu-dresden.de>
// Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com>
// Copyright (C) 2021 RM <rm+git@arcsin.org>
// Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net.
@@ -1311,7 +1311,7 @@ void PDFDoc::writeString(const GooString *s, OutStream *outStr, const unsigned c
}
// Write data
- if (s->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(s->toStr())) {
// unicode string don't necessary end with \0
const char *c = s->c_str();
std::stringstream stream;
diff --git a/poppler/PageLabelInfo.cc b/poppler/PageLabelInfo.cc
index 011f56d4..59e055d1 100644
--- a/poppler/PageLabelInfo.cc
+++ b/poppler/PageLabelInfo.cc
@@ -118,7 +118,7 @@ bool PageLabelInfo::labelToIndex(GooString *label, int *index) const
{
const char *const str = label->c_str();
const std::size_t strLen = label->getLength();
- const bool strUnicode = label->hasUnicodeMarker();
+ const bool strUnicode = hasUnicodeByteOrderMark(label->toStr());
int number;
bool ok;
@@ -211,7 +211,7 @@ bool PageLabelInfo::indexToLabel(int index, GooString *label) const
label->clear();
label->append(matching_interval->prefix.c_str(), matching_interval->prefix.size());
- if (label->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(label->toStr())) {
int i, len;
char ucs2_char[2];
diff --git a/poppler/PageLabelInfo_p.h b/poppler/PageLabelInfo_p.h
index 5fa1a5e2..9d43ad25 100644
--- a/poppler/PageLabelInfo_p.h
+++ b/poppler/PageLabelInfo_p.h
@@ -7,7 +7,7 @@
// Copyright (C) 2011 Simon Kellner <kellner@kit.edu>
// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
-// Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
+// Copyright (C) 2019, 2024 Oliver Sander <oliver.sander@tu-dresden.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -23,11 +23,12 @@
#include "goo/GooString.h"
#include "Error.h"
+#include "UTF.h"
static std::pair<int, bool> fromDecimal(const std::string &str, const bool unicode)
{
if (unicode && (str.size() % 2 == 0)) {
- if (GooString::hasUnicodeMarker(str)) {
+ if (hasUnicodeByteOrderMark(str)) {
// strip the marker if it is there
return fromDecimal(str.substr(2), true /*unicode*/);
}
diff --git a/poppler/UTF.cc b/poppler/UTF.cc
index f8dd16f2..48645780 100644
--- a/poppler/UTF.cc
+++ b/poppler/UTF.cc
@@ -23,7 +23,7 @@
// Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net.
// Copyright (C) 2023, 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
// Copyright (C) 2023 Even Rouault <even.rouault@spatialys.com>
-// Copyright (C) 2023 Oliver Sander <oliver.sander@tu-dresden.de>
+// Copyright (C) 2023, 2024 Oliver Sander <oliver.sander@tu-dresden.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -88,7 +88,7 @@ std::vector<Unicode> TextStringToUCS4(const std::string &textStr)
return {};
}
- if (GooString::hasUnicodeMarker(textStr)) {
+ if (hasUnicodeByteOrderMark(textStr)) {
isUnicode = true;
isUnicodeLE = false;
} else if (GooString::hasUnicodeMarkerLE(textStr)) {
@@ -558,7 +558,7 @@ std::string TextStringToUtf8(const std::string &textStr)
len = textStr.size();
s = textStr.c_str();
- if (GooString::hasUnicodeMarker(textStr)) {
+ if (hasUnicodeByteOrderMark(textStr)) {
uint16_t *utf16;
len = len / 2 - 1;
utf16 = new uint16_t[len];
diff --git a/poppler/UTF.h b/poppler/UTF.h
index 9c3c6817..9f0290db 100644
--- a/poppler/UTF.h
+++ b/poppler/UTF.h
@@ -48,6 +48,12 @@ inline bool UnicodeIsValid(Unicode ucs4)
return (ucs4 < 0x110000) && ((ucs4 & 0xfffff800) != 0xd800) && (ucs4 < 0xfdd0 || ucs4 > 0xfdef) && ((ucs4 & 0xfffe) != 0xfffe);
}
+// check whether string starts with Big-Endian byte order mark
+inline bool hasUnicodeByteOrderMark(const std::string &s)
+{
+ return s.starts_with(unicodeByteOrderMark);
+}
+
// is a unicode whitespace character
bool UnicodeIsWhitespace(Unicode ucs4);
diff --git a/qt5/src/poppler-private.cc b/qt5/src/poppler-private.cc
index 1ad33005..3f4d286f 100644
--- a/qt5/src/poppler-private.cc
+++ b/qt5/src/poppler-private.cc
@@ -7,7 +7,7 @@
* Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
* Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
* Copyright (C) 2018-2020 Adam Reichold <adam.reichold@t-online.de>
- * Copyright (C) 2019, 2020 Oliver Sander <oliver.sander@tu-dresden.de>
+ * Copyright (C) 2019, 2020, 2024 Oliver Sander <oliver.sander@tu-dresden.de>
* Copyright (C) 2019 João Netto <joaonetto901@gmail.com>
* Copyright (C) 2021 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>
* Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com>
@@ -43,6 +43,7 @@
#include <Outline.h>
#include <PDFDocEncoding.h>
#include <UnicodeMap.h>
+#include <UTF.h>
#ifdef ANDROID
# include <QtCore/QString>
@@ -121,7 +122,7 @@ QString UnicodeParsedString(const std::string &s1)
return QString();
}
- if (GooString::hasUnicodeMarker(s1) || GooString::hasUnicodeMarkerLE(s1)) {
+ if (hasUnicodeByteOrderMark(s1) || GooString::hasUnicodeMarkerLE(s1)) {
return QString::fromUtf16(reinterpret_cast<const ushort *>(s1.c_str()), s1.size() / 2);
} else {
int stringLength;
diff --git a/qt5/tests/check_pagelabelinfo.cpp b/qt5/tests/check_pagelabelinfo.cpp
index f45afa59..3098bbf4 100644
--- a/qt5/tests/check_pagelabelinfo.cpp
+++ b/qt5/tests/check_pagelabelinfo.cpp
@@ -31,7 +31,7 @@ void TestPageLabelInfo::testFromDecimal()
void TestPageLabelInfo::testFromDecimalUnicode()
{
std::unique_ptr<GooString> str(Poppler::QStringToUnicodeGooString(QString::fromLocal8Bit("2342")));
- const auto res = fromDecimal(str->toStr(), str->hasUnicodeMarker());
+ const auto res = fromDecimal(str->toStr(), hasUnicodeByteOrderMark(str->toStr()));
QCOMPARE(res.first, 2342);
QCOMPARE(res.second, true);
}
diff --git a/qt5/tests/check_strings.cpp b/qt5/tests/check_strings.cpp
index 5f18b42f..a9044db8 100644
--- a/qt5/tests/check_strings.cpp
+++ b/qt5/tests/check_strings.cpp
@@ -1,6 +1,7 @@
/*
* Copyright (C) 2010, 2011, Pino Toscano <pino@kde.org>
* Copyright (C) 2021 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>
+ * Copyright (C) 2024 Oliver Sander <oliver.sander@tu-dresden.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -23,6 +24,7 @@
#include <poppler-private.h>
#include <GlobalParams.h>
+#include <UTF.h>
Q_DECLARE_METATYPE(GooString *)
Q_DECLARE_METATYPE(Unicode *)
@@ -194,7 +196,7 @@ void TestStrings::check_QStringToUnicodeGooString()
QVERIFY(goo->toStr().empty());
QCOMPARE(goo->getLength(), 0);
} else {
- QVERIFY(goo->hasUnicodeMarker());
+ QVERIFY(hasUnicodeByteOrderMark(goo->toStr()));
QCOMPARE(goo->getLength(), string.length() * 2 + 2);
QCOMPARE(result, QByteArray::fromRawData(goo->c_str() + 2, goo->getLength() - 2));
}
diff --git a/qt6/src/poppler-private.cc b/qt6/src/poppler-private.cc
index 9b8b117d..052360bd 100644
--- a/qt6/src/poppler-private.cc
+++ b/qt6/src/poppler-private.cc
@@ -7,7 +7,7 @@
* Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
* Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
* Copyright (C) 2018-2020 Adam Reichold <adam.reichold@t-online.de>
- * Copyright (C) 2019, 2020 Oliver Sander <oliver.sander@tu-dresden.de>
+ * Copyright (C) 2019, 2020, 2024 Oliver Sander <oliver.sander@tu-dresden.de>
* Copyright (C) 2019 João Netto <joaonetto901@gmail.com>
* Copyright (C) 2021 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>
* Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com>
@@ -43,6 +43,7 @@
#include <Outline.h>
#include <PDFDocEncoding.h>
#include <UnicodeMap.h>
+#include <UTF.h>
#ifdef ANDROID
# include <QtCore/QString>
@@ -121,7 +122,7 @@ QString UnicodeParsedString(const std::string &s1)
return QString();
}
- if (GooString::hasUnicodeMarker(s1) || GooString::hasUnicodeMarkerLE(s1)) {
+ if (hasUnicodeByteOrderMark(s1) || GooString::hasUnicodeMarkerLE(s1)) {
return QString::fromUtf16(reinterpret_cast<const char16_t *>(s1.c_str()), s1.size() / 2);
} else {
int stringLength;
diff --git a/qt6/tests/check_pagelabelinfo.cpp b/qt6/tests/check_pagelabelinfo.cpp
index f45afa59..3098bbf4 100644
--- a/qt6/tests/check_pagelabelinfo.cpp
+++ b/qt6/tests/check_pagelabelinfo.cpp
@@ -31,7 +31,7 @@ void TestPageLabelInfo::testFromDecimal()
void TestPageLabelInfo::testFromDecimalUnicode()
{
std::unique_ptr<GooString> str(Poppler::QStringToUnicodeGooString(QString::fromLocal8Bit("2342")));
- const auto res = fromDecimal(str->toStr(), str->hasUnicodeMarker());
+ const auto res = fromDecimal(str->toStr(), hasUnicodeByteOrderMark(str->toStr()));
QCOMPARE(res.first, 2342);
QCOMPARE(res.second, true);
}
diff --git a/qt6/tests/check_strings.cpp b/qt6/tests/check_strings.cpp
index 2157f2e6..e1a4c172 100644
--- a/qt6/tests/check_strings.cpp
+++ b/qt6/tests/check_strings.cpp
@@ -23,6 +23,7 @@
#include <poppler-private.h>
#include <GlobalParams.h>
+#include "UTF.h"
Q_DECLARE_METATYPE(GooString *)
Q_DECLARE_METATYPE(Unicode *)
@@ -194,7 +195,7 @@ void TestStrings::check_QStringToUnicodeGooString()
QVERIFY(goo->toStr().empty());
QCOMPARE(goo->getLength(), 0);
} else {
- QVERIFY(goo->hasUnicodeMarker());
+ QVERIFY(hasUnicodeByteOrderMark(goo->toStr()));
QCOMPARE(goo->getLength(), string.length() * 2 + 2);
QCOMPARE(result, QByteArray::fromRawData(goo->c_str() + 2, goo->getLength() - 2));
}
diff --git a/utils/pdfdetach.cc b/utils/pdfdetach.cc
index fc3e9c75..aa530850 100644
--- a/utils/pdfdetach.cc
+++ b/utils/pdfdetach.cc
@@ -18,7 +18,7 @@
// Copyright (C) 2014, 2017 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2018, 2020, 2022, 2024 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
-// Copyright (C) 2019, 2021 Oliver Sander <oliver.sander@tu-dresden.de>
+// Copyright (C) 2019, 2021, 2024 Oliver Sander <oliver.sander@tu-dresden.de>
// Copyright (C) 2020 <r.coeffier@bee-buzziness.com>
// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
//
@@ -43,6 +43,7 @@
#include "UnicodeMap.h"
#include "PDFDocEncoding.h"
#include "Error.h"
+#include "UTF.h"
#include "Win32Console.h"
#include <filesystem>
@@ -171,7 +172,7 @@ int main(int argc, char *argv[])
if (!s1) {
return 3;
}
- if (s1->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(s1->toStr())) {
isUnicode = true;
j = 2;
} else {
@@ -208,7 +209,7 @@ int main(int argc, char *argv[])
if (!s1) {
return 3;
}
- if (s1->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(s1->toStr())) {
isUnicode = true;
j = 2;
} else {
@@ -273,7 +274,7 @@ int main(int argc, char *argv[])
if (!s1) {
return 3;
}
- if (s1->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(s1->toStr())) {
isUnicode = true;
j = 2;
} else {
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index a69a64af..999ad004 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -28,7 +28,7 @@
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Thibaut Brard <thibaut.brard@gmail.com>
// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
-// Copyright (C) 2019, 2021 Oliver Sander <oliver.sander@tu-dresden.de>
+// Copyright (C) 2019, 2021, 2024 Oliver Sander <oliver.sander@tu-dresden.de>
// Copyright (C) 2021 Hubert Figuiere <hub@figuiere.net>
// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
//
@@ -72,6 +72,7 @@
#include "goo/gfile.h"
#include "Win32Console.h"
#include "InMemoryFile.h"
+#include "UTF.h"
static int firstPage = 1;
static int lastPage = 0;
@@ -409,7 +410,7 @@ static std::unique_ptr<GooString> getInfoString(Dict *infoDict, const char *key)
rawString = obj.getString();
// Convert rawString to unicode
- if (rawString->hasUnicodeMarker()) {
+ if (hasUnicodeByteOrderMark(rawString->toStr())) {
isUnicode = true;
unicodeLength = (obj.getString()->getLength() - 2) / 2;
} else {