diff options
author | Kevin Suo <suokunlong@126.com> | 2022-10-19 19:08:27 +0800 |
---|---|---|
committer | Kevin Suo <suokunlong@126.com> | 2022-11-24 17:05:16 +0100 |
commit | 4b7fa212b438f6e89196fbdd5d58c38862a35d7d (patch) | |
tree | 0f9d2bc3cb3805ac92032c6b5c70c81b4a6064d6 /sdext | |
parent | 588e59cc36475ded243ce4fd9062473cddd2c016 (diff) |
sdext.pdfimport - Wirter: add handling for continuous space characters
This was done for Draw in sdext/source/pdfimport/tree/drawtreevisiting.cxx,
but was not done for Writer. Without this, continuous spaces in PDF will
show only one space on pdfimport using the Writer pdfimport filter.
Change-Id: I2279d9b1750e07f5743aeba80a3fd553bc037d13
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141527
Tested-by: Jenkins
Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
(cherry picked from commit c2e2997f452b93b400d541c2d0b2ee396a889007)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/142312
Reviewed-by: Kevin Suo <suokunlong@126.com>
Diffstat (limited to 'sdext')
-rw-r--r-- | sdext/source/pdfimport/test/testdocs/testSpace.pdf | bin | 0 -> 8140 bytes | |||
-rw-r--r-- | sdext/source/pdfimport/test/tests.cxx | 38 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/writertreevisiting.cxx | 28 |
3 files changed, 63 insertions, 3 deletions
diff --git a/sdext/source/pdfimport/test/testdocs/testSpace.pdf b/sdext/source/pdfimport/test/testdocs/testSpace.pdf Binary files differnew file mode 100644 index 000000000000..3c94f31ea15b --- /dev/null +++ b/sdext/source/pdfimport/test/testdocs/testSpace.pdf diff --git a/sdext/source/pdfimport/test/tests.cxx b/sdext/source/pdfimport/test/tests.cxx index fe2f659aeb7f..eb1940361436 100644 --- a/sdext/source/pdfimport/test/tests.cxx +++ b/sdext/source/pdfimport/test/tests.cxx @@ -841,6 +841,43 @@ namespace #endif } + void testSpaces() + { +#if HAVE_FEATURE_POPPLER + rtl::Reference<pdfi::PDFIRawAdaptor> xAdaptor(new pdfi::PDFIRawAdaptor(OUString(), getComponentContext())); + xAdaptor->setTreeVisitorFactory(createWriterTreeVisitorFactory()); + + OString aOutput; + xAdaptor->odfConvert(m_directories.getURLFromSrc(u"/sdext/source/pdfimport/test/testdocs/testSpace.pdf"), + new OutputWrapString(aOutput), + nullptr); + xmlDocUniquePtr pXmlDoc(xmlParseDoc(reinterpret_cast<xmlChar const *>(aOutput.getStr()))); + + // Space test: there are 10 spaces, each space is expressed as a <text:s text:c="1" ...>, + // thus the 10th text:s should exist and the attribute "text:c" should be "1". + OString xpath = "//draw:frame[@draw:z-index='1'][1]/draw:text-box/text:p/text:span/text:s[10]"; + OUString sContent = getXPath(pXmlDoc, xpath, "c"); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString("1"), sContent); + + // Tab test: there are 10 tabs. Text before and after the tabs are shown in different draw frames. + // With the Liberation Serif font, the horizontal position of the first frame is 20.03mm and the + // second frame is 94.12mm. + xpath = "//draw:frame[@draw:z-index='2'][1]"; + sContent = getXPath(pXmlDoc, xpath, "transform"); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString("translate( 20.03mm 25.05mm )"), sContent); + xpath = "//draw:frame[@draw:z-index='3'][1]"; + sContent = getXPath(pXmlDoc, xpath, "transform"); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString("translate( 94.12mm 25.05mm )"), sContent); + + // Non-breaking space test: there are 10 NBSpaces, which are treated as the same as normal space in PDF, + // thus each is expressed as a <text:s text:c="1" ...>. + // The 10th text:s should exist and the attribute "text:c" should be "1". + xpath = "//draw:frame[@draw:z-index='4'][1]/draw:text-box/text:p/text:span/text:s[10]"; + sContent = getXPath(pXmlDoc, xpath, "c"); + CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString("1"), sContent); +#endif + } + CPPUNIT_TEST_SUITE(PDFITest); CPPUNIT_TEST(testXPDFParser); CPPUNIT_TEST(testOdfWriterExport); @@ -853,6 +890,7 @@ namespace CPPUNIT_TEST(testTdf78427_FontWeight_MyraidProSemibold); CPPUNIT_TEST(testTdf143959_nameFromFontFile); CPPUNIT_TEST(testTdf104597_textrun); + CPPUNIT_TEST(testSpaces); CPPUNIT_TEST_SUITE_END(); }; diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx b/sdext/source/pdfimport/tree/writertreevisiting.cxx index d3ea6cc05320..060990f003b9 100644 --- a/sdext/source/pdfimport/tree/writertreevisiting.cxx +++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx @@ -81,7 +81,11 @@ void WriterXmlEmitter::visit( TextElement& elem, const std::list< std::unique_pt if( elem.Text.isEmpty() ) return; - PropertyMap aProps; + PropertyMap aProps = {}; + const sal_Unicode strSpace = 0x0020; + const sal_Unicode strNbSpace = 0x00A0; + const sal_Unicode tabSpace = 0x0009; + if( elem.StyleId != -1 ) { aProps[ OUString( "text:style-name" ) ] = @@ -111,8 +115,26 @@ void WriterXmlEmitter::visit( TextElement& elem, const std::list< std::unique_pt str = ::comphelper::string::reverseString(str); m_rEmitContext.rEmitter.beginTag( "text:span", aProps ); - // TODO: reserve continuous spaces, see DrawXmlEmitter::visit( TextElement& elem...) - m_rEmitContext.rEmitter.write(str); + + sal_Unicode strToken; + for (int i = 0; i < elem.Text.getLength(); i++) + { + strToken = str[i]; + if (strToken == strSpace || strToken == strNbSpace) + { + aProps["text:c"] = "1"; + m_rEmitContext.rEmitter.beginTag("text:s", aProps); + m_rEmitContext.rEmitter.endTag("text:s"); + } + else if (strToken == tabSpace) + { + m_rEmitContext.rEmitter.beginTag("text:tab", aProps); + m_rEmitContext.rEmitter.endTag("text:tab"); + } + else + m_rEmitContext.rEmitter.write(OUString(strToken)); + } + auto this_it = elem.Children.begin(); while( this_it != elem.Children.end() && this_it->get() != &elem ) { |