summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLászló Németh <nemeth@numbertext.org>2022-12-08 11:41:00 +0100
committerLászló Németh <nemeth@numbertext.org>2022-12-10 18:28:25 +0000
commit09ae3c01940bbc25ffde51963683b04e3cb4bb6a (patch)
tree2e99779b605a9e6c1eeb5b3f1cb4337586eff528
parent080d2d21cb59fd6e81375f15cf17c6cbfc605a77 (diff)
tdf#152203 DOCX import: fix mixed footnotes/endnotes
Footnotes (like endnotes) were imported in the order of their w:footnote elements in footnotes.xml, resulting mixed footnote text content during loading documents exported from Google Docs. Import them in the order of their w:id attributes. Regression from commit 9b39ce0e66acfe812e1d50e530dc2ccdef3e1357 "tdf#76260 DOCX import: fix slow footnote import". Change-Id: I7d9ed36fe96b2b90c4d62fb1ca7201318581775d Reviewed-on: https://gerrit.libreoffice.org/c/core/+/143824 Tested-by: Jenkins Reviewed-by: László Németh <nemeth@numbertext.org>
-rw-r--r--sw/qa/extras/ooxmlexport/data/tdf152203.docxbin0 -> 7778 bytes
-rw-r--r--sw/qa/extras/ooxmlexport/ooxmlexport14.cxx18
-rw-r--r--writerfilter/inc/dmapper/resourcemodel.hxx3
-rw-r--r--writerfilter/source/dmapper/DomainMapper.cxx14
-rw-r--r--writerfilter/source/dmapper/DomainMapper.hxx1
-rw-r--r--writerfilter/source/dmapper/DomainMapper_Impl.cxx71
-rw-r--r--writerfilter/source/dmapper/DomainMapper_Impl.hxx3
-rw-r--r--writerfilter/source/dmapper/LoggedResources.cxx14
-rw-r--r--writerfilter/source/dmapper/LoggedResources.hxx2
-rw-r--r--writerfilter/source/ooxml/OOXMLFastContextHandler.cxx1
10 files changed, 119 insertions, 8 deletions
diff --git a/sw/qa/extras/ooxmlexport/data/tdf152203.docx b/sw/qa/extras/ooxmlexport/data/tdf152203.docx
new file mode 100644
index 000000000000..f8ba96ea59fb
--- /dev/null
+++ b/sw/qa/extras/ooxmlexport/data/tdf152203.docx
Binary files differ
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport14.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport14.cxx
index 19cf5dee5e3f..9ce06be528e9 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport14.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport14.cxx
@@ -21,6 +21,7 @@
#include <com/sun/star/text/XDocumentIndex.hpp>
#include <com/sun/star/text/XDocumentIndexesSupplier.hpp>
#include <com/sun/star/text/XFootnotesSupplier.hpp>
+#include <com/sun/star/text/XFootnote.hpp>
#include <com/sun/star/text/XTextContentAppend.hpp>
#include <com/sun/star/text/XTextDocument.hpp>
#include <com/sun/star/text/XTextFieldsSupplier.hpp>
@@ -1272,6 +1273,23 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf143583)
assertXPath(pXml, "/w:footnotes/w:footnote[6]/w:p", 3);
}
+CPPUNIT_TEST_FIXTURE(Test, testTdf152203)
+{
+ loadAndSave("tdf152203.docx");
+ xmlDocUniquePtr pXml = parseExport("word/footnotes.xml");
+ CPPUNIT_ASSERT(pXml);
+
+ uno::Reference<text::XFootnotesSupplier> xFootnotesSupplier(mxComponent, uno::UNO_QUERY);
+ uno::Reference<container::XIndexAccess> xFootnotes = xFootnotesSupplier->getFootnotes();
+ uno::Reference<text::XTextRange> xLastFootnote(xFootnotes->getByIndex(5), uno::UNO_QUERY);
+ // This was "Footnote for pg5" (replaced footnotes)
+ CPPUNIT_ASSERT_EQUAL( OUString("Footnote for pg 6"), xLastFootnote->getString().trim() );
+
+ uno::Reference<text::XTextRange> xLastButOne(xFootnotes->getByIndex(4), uno::UNO_QUERY);
+ // This was "Footnote for pg 6" (replaced footnotes)
+ CPPUNIT_ASSERT_EQUAL( OUString("Footnote for pg5"), xLastButOne->getString().trim() );
+}
+
// skip test for macOS (missing fonts?)
#if !defined(MACOSX)
DECLARE_OOXMLEXPORT_TEST(testTdf146346, "tdf146346.docx")
diff --git a/writerfilter/inc/dmapper/resourcemodel.hxx b/writerfilter/inc/dmapper/resourcemodel.hxx
index 91fda417c2d9..e277ed675fc1 100644
--- a/writerfilter/inc/dmapper/resourcemodel.hxx
+++ b/writerfilter/inc/dmapper/resourcemodel.hxx
@@ -313,6 +313,9 @@ public:
/// Receives end mark for glossary document entry.
virtual void endGlossaryEntry() = 0;
+ /// Receives identifier for node entry.
+ virtual void checkId(const sal_Int32 nId) = 0;
+
virtual void commentProps(const OUString& /*sId*/, const CommentProperties& /*rProps*/) {}
protected:
diff --git a/writerfilter/source/dmapper/DomainMapper.cxx b/writerfilter/source/dmapper/DomainMapper.cxx
index b2ca35e6da51..1e0f9597b3b4 100644
--- a/writerfilter/source/dmapper/DomainMapper.cxx
+++ b/writerfilter/source/dmapper/DomainMapper.cxx
@@ -197,6 +197,9 @@ DomainMapper::~DomainMapper()
{
try
{
+ // Remove temporary footnotes and endnotes
+ m_pImpl->RemoveTemporaryFootOrEndnotes();
+
uno::Reference< text::XDocumentIndexesSupplier> xIndexesSupplier( m_pImpl->GetTextDocument(), uno::UNO_QUERY );
sal_Int32 nIndexes = 0;
if( xIndexesSupplier.is() )
@@ -3739,6 +3742,17 @@ void DomainMapper::lcl_positivePercentage(const OUString& rText)
m_pImpl->m_aPositivePercentages.push(rText);
}
+void DomainMapper::lcl_checkId(const sal_Int32 nId)
+{
+ if (m_pImpl->IsInFootnote())
+ {
+ if (m_pImpl->GetFootnoteCount() > -1)
+ m_pImpl->m_aFootnoteIds.push_back(nId);
+ }
+ else if (m_pImpl->GetEndnoteCount() > -1)
+ m_pImpl->m_aEndnoteIds.push_back(nId);
+}
+
void DomainMapper::lcl_utext(const sal_uInt8 * data_, size_t len)
{
// All these fixed values are defined as static const sal_Unicode codepoints in the fast parser,
diff --git a/writerfilter/source/dmapper/DomainMapper.hxx b/writerfilter/source/dmapper/DomainMapper.hxx
index 3e35c6772ab5..cb68954c7929 100644
--- a/writerfilter/source/dmapper/DomainMapper.hxx
+++ b/writerfilter/source/dmapper/DomainMapper.hxx
@@ -162,6 +162,7 @@ private:
::writerfilter::Reference<Stream>::Pointer_t ref) override;
virtual void lcl_startGlossaryEntry() override;
virtual void lcl_endGlossaryEntry() override;
+ virtual void lcl_checkId(const sal_Int32 nId) override;
// Properties
virtual void lcl_attribute(Id Name, Value & val) override;
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
index ffeb26da7a97..5365b302c6d2 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
@@ -3617,12 +3617,49 @@ static void lcl_PasteRedlines(
}
}
+void DomainMapper_Impl::RemoveTemporaryFootOrEndnotes()
+{
+ uno::Reference< text::XFootnotesSupplier> xFootnotesSupplier( GetTextDocument(), uno::UNO_QUERY );
+ uno::Reference< text::XEndnotesSupplier> xEndnotesSupplier( GetTextDocument(), uno::UNO_QUERY );
+ uno::Reference< text::XFootnote > xNote;
+ if (GetFootnoteCount() > 0)
+ {
+ auto xFootnotes = xFootnotesSupplier->getFootnotes();
+ for (sal_Int32 i = GetFootnoteCount(); i > 0; --i)
+ {
+ xFootnotes->getByIndex(i) >>= xNote;
+ xNote->getAnchor()->setString("");
+ }
+ }
+ if (GetEndnoteCount() > 0)
+ {
+ auto xEndnotes = xEndnotesSupplier->getEndnotes();
+ for (sal_Int32 i = GetEndnoteCount(); i > 0; --i)
+ {
+ xEndnotes->getByIndex(i) >>= xNote;
+ xNote->getAnchor()->setString("");
+ }
+ }
+}
+
+static void lcl_convertToNoteIndices(std::deque<sal_Int32>& rNoteIds)
+{
+ // convert arbitrary footnote identifiers to 0, 1, 2...
+ // indices, keeping their possible random order
+ std::deque<sal_Int32> aSortedIds = rNoteIds;
+ std::sort(aSortedIds.begin(), aSortedIds.end());
+ std::map<sal_Int32, size_t> aMapIds;
+ for (size_t i = 0; i < aSortedIds.size(); ++i)
+ aMapIds[aSortedIds[i]] = i;
+ for (size_t i = 0; i < rNoteIds.size(); ++i)
+ rNoteIds[i] = aMapIds[rNoteIds[i]];
+}
+
void DomainMapper_Impl::PopFootOrEndnote()
{
// content of the footnotes were inserted after the first footnote in temporary footnotes,
// restore the content of the actual footnote by copying its content from the first
// (remaining) temporary footnote and remove the temporary footnote.
- // FIXME: add footnote IDs to handle possible differences in footnote serialization
uno::Reference< text::XFootnotesSupplier> xFootnotesSupplier( GetTextDocument(), uno::UNO_QUERY );
uno::Reference< text::XEndnotesSupplier> xEndnotesSupplier( GetTextDocument(), uno::UNO_QUERY );
bool bCopied = false;
@@ -3638,11 +3675,31 @@ void DomainMapper_Impl::PopFootOrEndnote()
( xEndnotes->getByIndex(xEndnotes->getCount()-1) >>= xFootnoteLast ) )
) && xFootnoteLast->getLabel().isEmpty() )
{
- // copy content of the first remaining temporary footnote
- if ( IsInFootnote() )
- xFootnotes->getByIndex(1) >>= xFootnoteFirst;
- else
- xEndnotes->getByIndex(1) >>= xFootnoteFirst;
+ // copy content of the next temporary footnote
+ try
+ {
+ if ( IsInFootnote() && !m_aFootnoteIds.empty() )
+ {
+ if ( m_aFootnoteIds.size() == sal::static_int_cast<size_t>(GetFootnoteCount()) )
+ lcl_convertToNoteIndices(m_aFootnoteIds);
+ xFootnotes->getByIndex(m_aFootnoteIds.front() + 1) >>= xFootnoteFirst;
+ m_aFootnoteIds.pop_front();
+ }
+ else if ( !IsInFootnote() && !m_aEndnoteIds.empty() )
+ {
+ if ( m_aEndnoteIds.size() == sal::static_int_cast<size_t>(GetEndnoteCount()) )
+ lcl_convertToNoteIndices(m_aEndnoteIds);
+ xEndnotes->getByIndex(m_aEndnoteIds.front() + 1) >>= xFootnoteFirst;
+ m_aEndnoteIds.pop_front();
+ }
+ else
+ m_bSaxError = true;
+ }
+ catch (uno::Exception const&)
+ {
+ TOOLS_WARN_EXCEPTION("writerfilter.dmapper", "Cannot insert footnote/endnote");
+ m_bSaxError = true;
+ }
if (!m_bSaxError && xFootnoteFirst != xFootnoteLast)
{
uno::Reference< text::XText > xSrc( xFootnoteFirst, uno::UNO_QUERY_THROW );
@@ -3663,8 +3720,6 @@ void DomainMapper_Impl::PopFootOrEndnote()
for( size_t i = 0; redIdx > -1 && i <= sal::static_int_cast<size_t>(redIdx) + 2; i++)
m_aStoredRedlines[eType].pop_front();
- // remove temporary footnote
- xFootnoteFirst->getAnchor()->setString("");
bCopied = true;
}
}
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
index 247c40fae621..1c0c9654c1b0 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
@@ -633,6 +633,8 @@ private:
public:
css::uno::Reference<css::text::XTextRange> m_xInsertTextRange;
css::uno::Reference<css::text::XTextRange> m_xAltChunkStartingRange;
+ std::deque<sal_Int32> m_aFootnoteIds;
+ std::deque<sal_Int32> m_aEndnoteIds;
bool m_bIsInTextBox;
private:
@@ -884,6 +886,7 @@ public:
void IncrementFootnoteCount() { ++m_nFootnotes; }
sal_Int32 GetEndnoteCount() const { return m_nEndnotes; }
void IncrementEndnoteCount() { ++m_nEndnotes; }
+ void RemoveTemporaryFootOrEndnotes();
void PushAnnotation();
void PopAnnotation();
diff --git a/writerfilter/source/dmapper/LoggedResources.cxx b/writerfilter/source/dmapper/LoggedResources.cxx
index bc883b20a6ea..819238bf671d 100644
--- a/writerfilter/source/dmapper/LoggedResources.cxx
+++ b/writerfilter/source/dmapper/LoggedResources.cxx
@@ -316,6 +316,20 @@ void LoggedStream::endGlossaryEntry()
#endif
}
+void LoggedStream::checkId(const sal_Int32 nId)
+{
+#ifdef DBG_UTIL
+ mHelper.startElement("checkId");
+ LoggedResourcesHelper::chars(OUString::number(nId));
+#endif
+
+ lcl_checkId(nId);
+
+#ifdef DBG_UTIL
+ LoggedResourcesHelper::endElement();
+#endif
+}
+
LoggedProperties::LoggedProperties(
#ifdef DBG_UTIL
const std::string& sPrefix)
diff --git a/writerfilter/source/dmapper/LoggedResources.hxx b/writerfilter/source/dmapper/LoggedResources.hxx
index abb52cd91ebe..aa7b84e0a083 100644
--- a/writerfilter/source/dmapper/LoggedResources.hxx
+++ b/writerfilter/source/dmapper/LoggedResources.hxx
@@ -73,6 +73,7 @@ public:
void info(const std::string& info) override;
void startGlossaryEntry() override;
void endGlossaryEntry() override;
+ void checkId(const sal_Int32 nId) override;
virtual void setDocumentReference(writerfilter::ooxml::OOXMLDocument* /*pDocument*/) override{};
@@ -98,6 +99,7 @@ protected:
virtual void lcl_substream(Id name, writerfilter::Reference<Stream>::Pointer_t ref) = 0;
virtual void lcl_startGlossaryEntry() {}
virtual void lcl_endGlossaryEntry() {}
+ virtual void lcl_checkId(const sal_Int32) {}
#ifdef DBG_UTIL
LoggedResourcesHelper mHelper;
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
index 16169b318971..b479e587ebc0 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
@@ -1410,6 +1410,7 @@ void OOXMLFastContextHandlerXNote::lcl_endFastElement
void OOXMLFastContextHandlerXNote::checkId(const OOXMLValue::Pointer_t& pValue)
{
mnMyXNoteId = sal_Int32(pValue->getInt());
+ mpStream->checkId(mnMyXNoteId);
}
void OOXMLFastContextHandlerXNote::checkType(const OOXMLValue::Pointer_t& pValue)