diff options
author | László Németh <nemeth@numbertext.org> | 2022-12-08 11:41:00 +0100 |
---|---|---|
committer | László Németh <nemeth@numbertext.org> | 2022-12-10 18:28:25 +0000 |
commit | 09ae3c01940bbc25ffde51963683b04e3cb4bb6a (patch) | |
tree | 2e99779b605a9e6c1eeb5b3f1cb4337586eff528 | |
parent | 080d2d21cb59fd6e81375f15cf17c6cbfc605a77 (diff) |
tdf#152203 DOCX import: fix mixed footnotes/endnotes
Footnotes (like endnotes) were imported in the order
of their w:footnote elements in footnotes.xml, resulting
mixed footnote text content during loading documents
exported from Google Docs. Import them in the order
of their w:id attributes.
Regression from commit 9b39ce0e66acfe812e1d50e530dc2ccdef3e1357
"tdf#76260 DOCX import: fix slow footnote import".
Change-Id: I7d9ed36fe96b2b90c4d62fb1ca7201318581775d
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/143824
Tested-by: Jenkins
Reviewed-by: László Németh <nemeth@numbertext.org>
-rw-r--r-- | sw/qa/extras/ooxmlexport/data/tdf152203.docx | bin | 0 -> 7778 bytes | |||
-rw-r--r-- | sw/qa/extras/ooxmlexport/ooxmlexport14.cxx | 18 | ||||
-rw-r--r-- | writerfilter/inc/dmapper/resourcemodel.hxx | 3 | ||||
-rw-r--r-- | writerfilter/source/dmapper/DomainMapper.cxx | 14 | ||||
-rw-r--r-- | writerfilter/source/dmapper/DomainMapper.hxx | 1 | ||||
-rw-r--r-- | writerfilter/source/dmapper/DomainMapper_Impl.cxx | 71 | ||||
-rw-r--r-- | writerfilter/source/dmapper/DomainMapper_Impl.hxx | 3 | ||||
-rw-r--r-- | writerfilter/source/dmapper/LoggedResources.cxx | 14 | ||||
-rw-r--r-- | writerfilter/source/dmapper/LoggedResources.hxx | 2 | ||||
-rw-r--r-- | writerfilter/source/ooxml/OOXMLFastContextHandler.cxx | 1 |
10 files changed, 119 insertions, 8 deletions
diff --git a/sw/qa/extras/ooxmlexport/data/tdf152203.docx b/sw/qa/extras/ooxmlexport/data/tdf152203.docx Binary files differnew file mode 100644 index 000000000000..f8ba96ea59fb --- /dev/null +++ b/sw/qa/extras/ooxmlexport/data/tdf152203.docx diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport14.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport14.cxx index 19cf5dee5e3f..9ce06be528e9 100644 --- a/sw/qa/extras/ooxmlexport/ooxmlexport14.cxx +++ b/sw/qa/extras/ooxmlexport/ooxmlexport14.cxx @@ -21,6 +21,7 @@ #include <com/sun/star/text/XDocumentIndex.hpp> #include <com/sun/star/text/XDocumentIndexesSupplier.hpp> #include <com/sun/star/text/XFootnotesSupplier.hpp> +#include <com/sun/star/text/XFootnote.hpp> #include <com/sun/star/text/XTextContentAppend.hpp> #include <com/sun/star/text/XTextDocument.hpp> #include <com/sun/star/text/XTextFieldsSupplier.hpp> @@ -1272,6 +1273,23 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf143583) assertXPath(pXml, "/w:footnotes/w:footnote[6]/w:p", 3); } +CPPUNIT_TEST_FIXTURE(Test, testTdf152203) +{ + loadAndSave("tdf152203.docx"); + xmlDocUniquePtr pXml = parseExport("word/footnotes.xml"); + CPPUNIT_ASSERT(pXml); + + uno::Reference<text::XFootnotesSupplier> xFootnotesSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference<container::XIndexAccess> xFootnotes = xFootnotesSupplier->getFootnotes(); + uno::Reference<text::XTextRange> xLastFootnote(xFootnotes->getByIndex(5), uno::UNO_QUERY); + // This was "Footnote for pg5" (replaced footnotes) + CPPUNIT_ASSERT_EQUAL( OUString("Footnote for pg 6"), xLastFootnote->getString().trim() ); + + uno::Reference<text::XTextRange> xLastButOne(xFootnotes->getByIndex(4), uno::UNO_QUERY); + // This was "Footnote for pg 6" (replaced footnotes) + CPPUNIT_ASSERT_EQUAL( OUString("Footnote for pg5"), xLastButOne->getString().trim() ); +} + // skip test for macOS (missing fonts?) #if !defined(MACOSX) DECLARE_OOXMLEXPORT_TEST(testTdf146346, "tdf146346.docx") diff --git a/writerfilter/inc/dmapper/resourcemodel.hxx b/writerfilter/inc/dmapper/resourcemodel.hxx index 91fda417c2d9..e277ed675fc1 100644 --- a/writerfilter/inc/dmapper/resourcemodel.hxx +++ b/writerfilter/inc/dmapper/resourcemodel.hxx @@ -313,6 +313,9 @@ public: /// Receives end mark for glossary document entry. virtual void endGlossaryEntry() = 0; + /// Receives identifier for node entry. + virtual void checkId(const sal_Int32 nId) = 0; + virtual void commentProps(const OUString& /*sId*/, const CommentProperties& /*rProps*/) {} protected: diff --git a/writerfilter/source/dmapper/DomainMapper.cxx b/writerfilter/source/dmapper/DomainMapper.cxx index b2ca35e6da51..1e0f9597b3b4 100644 --- a/writerfilter/source/dmapper/DomainMapper.cxx +++ b/writerfilter/source/dmapper/DomainMapper.cxx @@ -197,6 +197,9 @@ DomainMapper::~DomainMapper() { try { + // Remove temporary footnotes and endnotes + m_pImpl->RemoveTemporaryFootOrEndnotes(); + uno::Reference< text::XDocumentIndexesSupplier> xIndexesSupplier( m_pImpl->GetTextDocument(), uno::UNO_QUERY ); sal_Int32 nIndexes = 0; if( xIndexesSupplier.is() ) @@ -3739,6 +3742,17 @@ void DomainMapper::lcl_positivePercentage(const OUString& rText) m_pImpl->m_aPositivePercentages.push(rText); } +void DomainMapper::lcl_checkId(const sal_Int32 nId) +{ + if (m_pImpl->IsInFootnote()) + { + if (m_pImpl->GetFootnoteCount() > -1) + m_pImpl->m_aFootnoteIds.push_back(nId); + } + else if (m_pImpl->GetEndnoteCount() > -1) + m_pImpl->m_aEndnoteIds.push_back(nId); +} + void DomainMapper::lcl_utext(const sal_uInt8 * data_, size_t len) { // All these fixed values are defined as static const sal_Unicode codepoints in the fast parser, diff --git a/writerfilter/source/dmapper/DomainMapper.hxx b/writerfilter/source/dmapper/DomainMapper.hxx index 3e35c6772ab5..cb68954c7929 100644 --- a/writerfilter/source/dmapper/DomainMapper.hxx +++ b/writerfilter/source/dmapper/DomainMapper.hxx @@ -162,6 +162,7 @@ private: ::writerfilter::Reference<Stream>::Pointer_t ref) override; virtual void lcl_startGlossaryEntry() override; virtual void lcl_endGlossaryEntry() override; + virtual void lcl_checkId(const sal_Int32 nId) override; // Properties virtual void lcl_attribute(Id Name, Value & val) override; diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx index ffeb26da7a97..5365b302c6d2 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx @@ -3617,12 +3617,49 @@ static void lcl_PasteRedlines( } } +void DomainMapper_Impl::RemoveTemporaryFootOrEndnotes() +{ + uno::Reference< text::XFootnotesSupplier> xFootnotesSupplier( GetTextDocument(), uno::UNO_QUERY ); + uno::Reference< text::XEndnotesSupplier> xEndnotesSupplier( GetTextDocument(), uno::UNO_QUERY ); + uno::Reference< text::XFootnote > xNote; + if (GetFootnoteCount() > 0) + { + auto xFootnotes = xFootnotesSupplier->getFootnotes(); + for (sal_Int32 i = GetFootnoteCount(); i > 0; --i) + { + xFootnotes->getByIndex(i) >>= xNote; + xNote->getAnchor()->setString(""); + } + } + if (GetEndnoteCount() > 0) + { + auto xEndnotes = xEndnotesSupplier->getEndnotes(); + for (sal_Int32 i = GetEndnoteCount(); i > 0; --i) + { + xEndnotes->getByIndex(i) >>= xNote; + xNote->getAnchor()->setString(""); + } + } +} + +static void lcl_convertToNoteIndices(std::deque<sal_Int32>& rNoteIds) +{ + // convert arbitrary footnote identifiers to 0, 1, 2... + // indices, keeping their possible random order + std::deque<sal_Int32> aSortedIds = rNoteIds; + std::sort(aSortedIds.begin(), aSortedIds.end()); + std::map<sal_Int32, size_t> aMapIds; + for (size_t i = 0; i < aSortedIds.size(); ++i) + aMapIds[aSortedIds[i]] = i; + for (size_t i = 0; i < rNoteIds.size(); ++i) + rNoteIds[i] = aMapIds[rNoteIds[i]]; +} + void DomainMapper_Impl::PopFootOrEndnote() { // content of the footnotes were inserted after the first footnote in temporary footnotes, // restore the content of the actual footnote by copying its content from the first // (remaining) temporary footnote and remove the temporary footnote. - // FIXME: add footnote IDs to handle possible differences in footnote serialization uno::Reference< text::XFootnotesSupplier> xFootnotesSupplier( GetTextDocument(), uno::UNO_QUERY ); uno::Reference< text::XEndnotesSupplier> xEndnotesSupplier( GetTextDocument(), uno::UNO_QUERY ); bool bCopied = false; @@ -3638,11 +3675,31 @@ void DomainMapper_Impl::PopFootOrEndnote() ( xEndnotes->getByIndex(xEndnotes->getCount()-1) >>= xFootnoteLast ) ) ) && xFootnoteLast->getLabel().isEmpty() ) { - // copy content of the first remaining temporary footnote - if ( IsInFootnote() ) - xFootnotes->getByIndex(1) >>= xFootnoteFirst; - else - xEndnotes->getByIndex(1) >>= xFootnoteFirst; + // copy content of the next temporary footnote + try + { + if ( IsInFootnote() && !m_aFootnoteIds.empty() ) + { + if ( m_aFootnoteIds.size() == sal::static_int_cast<size_t>(GetFootnoteCount()) ) + lcl_convertToNoteIndices(m_aFootnoteIds); + xFootnotes->getByIndex(m_aFootnoteIds.front() + 1) >>= xFootnoteFirst; + m_aFootnoteIds.pop_front(); + } + else if ( !IsInFootnote() && !m_aEndnoteIds.empty() ) + { + if ( m_aEndnoteIds.size() == sal::static_int_cast<size_t>(GetEndnoteCount()) ) + lcl_convertToNoteIndices(m_aEndnoteIds); + xEndnotes->getByIndex(m_aEndnoteIds.front() + 1) >>= xFootnoteFirst; + m_aEndnoteIds.pop_front(); + } + else + m_bSaxError = true; + } + catch (uno::Exception const&) + { + TOOLS_WARN_EXCEPTION("writerfilter.dmapper", "Cannot insert footnote/endnote"); + m_bSaxError = true; + } if (!m_bSaxError && xFootnoteFirst != xFootnoteLast) { uno::Reference< text::XText > xSrc( xFootnoteFirst, uno::UNO_QUERY_THROW ); @@ -3663,8 +3720,6 @@ void DomainMapper_Impl::PopFootOrEndnote() for( size_t i = 0; redIdx > -1 && i <= sal::static_int_cast<size_t>(redIdx) + 2; i++) m_aStoredRedlines[eType].pop_front(); - // remove temporary footnote - xFootnoteFirst->getAnchor()->setString(""); bCopied = true; } } diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx index 247c40fae621..1c0c9654c1b0 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx @@ -633,6 +633,8 @@ private: public: css::uno::Reference<css::text::XTextRange> m_xInsertTextRange; css::uno::Reference<css::text::XTextRange> m_xAltChunkStartingRange; + std::deque<sal_Int32> m_aFootnoteIds; + std::deque<sal_Int32> m_aEndnoteIds; bool m_bIsInTextBox; private: @@ -884,6 +886,7 @@ public: void IncrementFootnoteCount() { ++m_nFootnotes; } sal_Int32 GetEndnoteCount() const { return m_nEndnotes; } void IncrementEndnoteCount() { ++m_nEndnotes; } + void RemoveTemporaryFootOrEndnotes(); void PushAnnotation(); void PopAnnotation(); diff --git a/writerfilter/source/dmapper/LoggedResources.cxx b/writerfilter/source/dmapper/LoggedResources.cxx index bc883b20a6ea..819238bf671d 100644 --- a/writerfilter/source/dmapper/LoggedResources.cxx +++ b/writerfilter/source/dmapper/LoggedResources.cxx @@ -316,6 +316,20 @@ void LoggedStream::endGlossaryEntry() #endif } +void LoggedStream::checkId(const sal_Int32 nId) +{ +#ifdef DBG_UTIL + mHelper.startElement("checkId"); + LoggedResourcesHelper::chars(OUString::number(nId)); +#endif + + lcl_checkId(nId); + +#ifdef DBG_UTIL + LoggedResourcesHelper::endElement(); +#endif +} + LoggedProperties::LoggedProperties( #ifdef DBG_UTIL const std::string& sPrefix) diff --git a/writerfilter/source/dmapper/LoggedResources.hxx b/writerfilter/source/dmapper/LoggedResources.hxx index abb52cd91ebe..aa7b84e0a083 100644 --- a/writerfilter/source/dmapper/LoggedResources.hxx +++ b/writerfilter/source/dmapper/LoggedResources.hxx @@ -73,6 +73,7 @@ public: void info(const std::string& info) override; void startGlossaryEntry() override; void endGlossaryEntry() override; + void checkId(const sal_Int32 nId) override; virtual void setDocumentReference(writerfilter::ooxml::OOXMLDocument* /*pDocument*/) override{}; @@ -98,6 +99,7 @@ protected: virtual void lcl_substream(Id name, writerfilter::Reference<Stream>::Pointer_t ref) = 0; virtual void lcl_startGlossaryEntry() {} virtual void lcl_endGlossaryEntry() {} + virtual void lcl_checkId(const sal_Int32) {} #ifdef DBG_UTIL LoggedResourcesHelper mHelper; diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx index 16169b318971..b479e587ebc0 100644 --- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx +++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx @@ -1410,6 +1410,7 @@ void OOXMLFastContextHandlerXNote::lcl_endFastElement void OOXMLFastContextHandlerXNote::checkId(const OOXMLValue::Pointer_t& pValue) { mnMyXNoteId = sal_Int32(pValue->getInt()); + mpStream->checkId(mnMyXNoteId); } void OOXMLFastContextHandlerXNote::checkType(const OOXMLValue::Pointer_t& pValue) |