diff options
author | Jonathan Clark <jonathan@libreoffice.org> | 2024-09-26 02:59:26 -0600 |
---|---|---|
committer | Jonathan Clark <jonathan@libreoffice.org> | 2024-09-27 03:50:01 +0200 |
commit | c3c29d31d77ff93aa50634cfd51c62d12dc0f6ec (patch) | |
tree | 0c938a2c80e3de61c8f6aa6133a187d96ba8fb2f | |
parent | d8f430e4bef414616fd80bbf4ea16d767991b5b9 (diff) |
tdf#140767 Implemented Syriac justification
This change extends kashida justification to Syriac, using custom
insertion rules.
Change-Id: I7508d2c32e95abb12a098e989c7153828ba81c87
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/173990
Tested-by: Jenkins
Reviewed-by: Jonathan Clark <jonathan@libreoffice.org>
-rw-r--r-- | i18nutil/qa/cppunit/test_kashida.cxx | 45 | ||||
-rw-r--r-- | i18nutil/source/utility/kashida.cxx | 88 | ||||
-rw-r--r-- | sw/source/core/inc/scriptinfo.hxx | 9 | ||||
-rw-r--r-- | sw/source/core/text/itradj.cxx | 8 | ||||
-rw-r--r-- | sw/source/core/text/porlay.cxx | 12 | ||||
-rw-r--r-- | sw/source/core/text/portxt.cxx | 2 | ||||
-rw-r--r-- | sw/source/core/txtnode/fntcache.cxx | 7 | ||||
-rw-r--r-- | vcl/qa/cppunit/pdfexport/data/tdf140767.odt | bin | 0 -> 101535 bytes | |||
-rw-r--r-- | vcl/qa/cppunit/pdfexport/pdfexport2.cxx | 61 |
9 files changed, 206 insertions, 26 deletions
diff --git a/i18nutil/qa/cppunit/test_kashida.cxx b/i18nutil/qa/cppunit/test_kashida.cxx index 46b40c2a5b7a..e0526c4c8f93 100644 --- a/i18nutil/qa/cppunit/test_kashida.cxx +++ b/i18nutil/qa/cppunit/test_kashida.cxx @@ -26,6 +26,7 @@ public: void testFinalYeh(); void testNoZwnjExpansion(); void testExcludeInvalid(); + void testSyriac(); CPPUNIT_TEST_SUITE(KashidaTest); CPPUNIT_TEST(testCharacteristic); @@ -33,6 +34,7 @@ public: CPPUNIT_TEST(testFinalYeh); CPPUNIT_TEST(testNoZwnjExpansion); CPPUNIT_TEST(testExcludeInvalid); + CPPUNIT_TEST(testSyriac); CPPUNIT_TEST_SUITE_END(); }; @@ -69,7 +71,9 @@ void KashidaTest::testManualKashida() // tdf#65344: Do not insert kashida before a final Yeh void KashidaTest::testFinalYeh() { - CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نیمِي"_ustr).has_value()); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"يييي"_ustr).value().nIndex); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"ييي"_ustr).value().nIndex); + CPPUNIT_ASSERT(!GetWordKashidaPosition(u"يي"_ustr).has_value()); // Should always insert kashida after Seen, even before a final Yeh CPPUNIT_ASSERT_EQUAL(sal_Int32(2), GetWordKashidaPosition(u"كرسي"_ustr).value().nIndex); @@ -82,7 +86,7 @@ void KashidaTest::testNoZwnjExpansion() CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"نویس\u200Cه"_ustr).value().nIndex); CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"متن"_ustr).value().nIndex); - CPPUNIT_ASSERT(!GetWordKashidaPosition(u"مت\u200Cن"_ustr).has_value()); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"مت\u200Cن"_ustr).value().nIndex); } // tdf#163105: Do not insert kashida if the position is invalid @@ -111,6 +115,43 @@ void KashidaTest::testExcludeInvalid() CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نویسه"_ustr, aValid).has_value()); } +// tdf#140767: Kashida justification for Syriac +void KashidaTest::testSyriac() +{ + // - Prefer user-inserted kashida + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), GetWordKashidaPosition(u"ܥܥـܥܥܥܥ"_ustr).value().nIndex); + + std::vector<bool> aValid; + aValid.resize(7, true); + + // - Start from end and work toward midpoint, then reverse direction + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), + GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex); + aValid[5] = false; + + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), + GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex); + aValid[4] = false; + + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), + GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex); + aValid[0] = false; + + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), + GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex); + aValid[1] = false; + + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), + GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex); + aValid[2] = false; + + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), + GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex); + aValid[3] = false; + + CPPUNIT_ASSERT(!GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).has_value()); +} + CPPUNIT_TEST_SUITE_REGISTRATION(KashidaTest); } diff --git a/i18nutil/source/utility/kashida.cxx b/i18nutil/source/utility/kashida.cxx index 6a6c7adde690..a992e5c8e643 100644 --- a/i18nutil/source/utility/kashida.cxx +++ b/i18nutil/source/utility/kashida.cxx @@ -133,10 +133,9 @@ bool CanConnectToPrev(sal_Unicode cCh, sal_Unicode cPrevCh) return bRet; } -} std::optional<i18nutil::KashidaPosition> -i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>& pValidPositions) +GetWordKashidaPositionArabic(const OUString& rWord, const std::vector<bool>& pValidPositions) { sal_Int32 nIdx = 0; sal_Int32 nPrevIdx = 0; @@ -148,9 +147,6 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>& sal_Int32 nWordLen = rWord.getLength(); - SAL_WARN_IF(!pValidPositions.empty() && pValidPositions.size() != static_cast<size_t>(nWordLen), - "i18n", "Kashida valid position array wrong size"); - // ignore trailing vowel chars while (nWordLen && isTransparentChar(rWord[nWordLen - 1])) { @@ -298,8 +294,8 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>& } } - // 8. If valid position data exists, use the last legal position - if (nPriorityLevel >= 7 && nIdx > 0 && !pValidPositions.empty()) + // 8. Try any valid position + if (nPriorityLevel >= 7 && nIdx > 0) { fnTryInsertBefore(7); } @@ -317,10 +313,86 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>& if (-1 != nKashidaPos) { - return KashidaPosition{ nKashidaPos }; + return i18nutil::KashidaPosition{ nKashidaPos }; } return std::nullopt; } +std::optional<i18nutil::KashidaPosition> +GetWordKashidaPositionSyriac(const OUString& rWord, const std::vector<bool>& pValidPositions) +{ + sal_Int32 nWordLen = rWord.getLength(); + + // Search for a user-inserted kashida + for (sal_Int32 i = nWordLen - 1; i >= 0; --i) + { + if (0x640 == rWord[i]) + { + return i18nutil::KashidaPosition{ i }; + } + } + + // Always insert kashida from the outside-in: + // - First, work from the end of the word toward the midpoint + // - Then, work from the beginning of the word toward the midpoint + + sal_Int32 nWordMidpoint = nWordLen / 2; + + auto fnPositionValid = [&pValidPositions](sal_Int32 nIdx) { + // Exclusions: + + // tdf#163105: Do not insert kashida if the position is invalid + if (!pValidPositions.empty() && !pValidPositions[nIdx]) + { + return false; + } + + return true; + }; + + // End to midpoint + for (sal_Int32 i = nWordLen - 2; i > nWordMidpoint; --i) + { + if (fnPositionValid(i)) + { + return i18nutil::KashidaPosition{ i }; + } + } + + // Beginning to midpoint + for (sal_Int32 i = 0; i <= nWordMidpoint; ++i) + { + if (fnPositionValid(i)) + { + return i18nutil::KashidaPosition{ i }; + } + } + + return std::nullopt; +} +} + +std::optional<i18nutil::KashidaPosition> +i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>& pValidPositions) +{ + sal_Int32 nWordLen = rWord.getLength(); + + SAL_WARN_IF(!pValidPositions.empty() && pValidPositions.size() != static_cast<size_t>(nWordLen), + "i18n", "Kashida valid position array wrong size"); + + for (sal_Int32 nIdx = 0; nIdx < nWordLen; ++nIdx) + { + auto cCh = rWord[nIdx]; + + if ((cCh >= 0x700 && cCh <= 0x74F) || (cCh >= 0x860 && cCh <= 0x86A)) + { + // This word contains Syriac characters. + return GetWordKashidaPositionSyriac(rWord, pValidPositions); + } + } + + return GetWordKashidaPositionArabic(rWord, pValidPositions); +} + /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/sw/source/core/inc/scriptinfo.hxx b/sw/source/core/inc/scriptinfo.hxx index ae37779b6f71..60cf9c542eab 100644 --- a/sw/source/core/inc/scriptinfo.hxx +++ b/sw/source/core/inc/scriptinfo.hxx @@ -347,16 +347,17 @@ public: */ bool IsKashidaLine(TextFrameIndex nCharIdx) const; -/** Checks if text is Arabic text. +/** Checks if text is in a script that allows kashida justification. - @descr Checks if text is Arabic text. + @descr Checks if text is in a language that allows kashida justification. @param rText The text to check @param nStt Start index of the text - @return Returns if the language is an Arabic language + @return Returns true if the script is Arabic or Syriac */ - static bool IsArabicText(const OUString& rText, TextFrameIndex nStt, TextFrameIndex nLen); + static bool IsKashidaScriptText(const OUString& rText, TextFrameIndex nStt, + TextFrameIndex nLen); /** Performs a thai justification on the kerning array diff --git a/sw/source/core/text/itradj.cxx b/sw/source/core/text/itradj.cxx index 1a22579c24f1..c072025e7a96 100644 --- a/sw/source/core/text/itradj.cxx +++ b/sw/source/core/text/itradj.cxx @@ -168,7 +168,8 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, SwTextSizeInfo& rInf, S // Fetch the set of valid positions from VCL, where possible aValidPositions.clear(); - if ( SwScriptInfo::IsArabicText( rInf.GetText(), TextFrameIndex{aScanner.GetBegin()}, TextFrameIndex{aScanner.GetLen()} ) ) + if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), TextFrameIndex{ aScanner.GetBegin() }, + TextFrameIndex{ aScanner.GetLen() })) { rItr.SeekAndChgAttrIter(TextFrameIndex{ aScanner.GetBegin() }, rInf.GetRefDev()); @@ -250,7 +251,7 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, SwTextSizeInfo& rInf, S } sal_Int32 nKashidasDropped = 0; - if ( !SwScriptInfo::IsArabicText( rInf.GetText(), nIdx, nNext - nIdx ) ) + if (!SwScriptInfo::IsKashidaScriptText(rInf.GetText(), nIdx, nNext - nIdx)) { nKashidasDropped = nKashidasInAttr; rKashidas -= nKashidasDropped; @@ -314,7 +315,8 @@ static bool lcl_CheckKashidaWidth ( SwScriptInfo& rSI, SwTextSizeInfo& rInf, SwT sal_Int32 nKashidasInAttr = rSI.KashidaJustify(nullptr, nullptr, nIdx, nNext - nIdx); tools::Long nFontMinKashida = rInf.GetRefDev()->GetMinKashida(); - if ( nFontMinKashida && nKashidasInAttr > 0 && SwScriptInfo::IsArabicText( rInf.GetText(), nIdx, nNext - nIdx ) ) + if (nFontMinKashida && nKashidasInAttr > 0 + && SwScriptInfo::IsKashidaScriptText(rInf.GetText(), nIdx, nNext - nIdx)) { sal_Int32 nKashidasDropped = 0; while ( rKashidas && nGluePortion && nKashidasInAttr > 0 && diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx index c18969fc709c..37cb23961cff 100644 --- a/sw/source/core/text/porlay.cxx +++ b/sw/source/core/text/porlay.cxx @@ -2194,16 +2194,18 @@ sal_Int32 SwScriptInfo::KashidaJustify( KernArray* pKernArray, return 0; } -// Checks if the current text is 'Arabic' text. Note that only the first +// Checks if the text is in Arabic or Syriac. Note that only the first // character has to be checked because a ctl portion only contains one // script, see NewTextPortion -bool SwScriptInfo::IsArabicText(const OUString& rText, +bool SwScriptInfo::IsKashidaScriptText(const OUString& rText, TextFrameIndex const nStt, TextFrameIndex const nLen) { using namespace ::com::sun::star::i18n; static const ScriptTypeList typeList[] = { - { UnicodeScript_kArabic, UnicodeScript_kArabic, sal_Int16(UnicodeScript_kArabic) }, // 11, - { UnicodeScript_kScriptCount, UnicodeScript_kScriptCount, sal_Int16(UnicodeScript_kScriptCount) } // 88 + { UnicodeScript_kArabic, UnicodeScript_kArabic, sal_Int16(UnicodeScript_kArabic) }, // 11, + { UnicodeScript_kSyriac, UnicodeScript_kSyriac, sal_Int16(UnicodeScript_kSyriac) }, // 12, + { UnicodeScript_kScriptCount, UnicodeScript_kScriptCount, + sal_Int16(UnicodeScript_kScriptCount) } // 88 }; // go forward if current position does not hold a regular character: @@ -2229,7 +2231,7 @@ bool SwScriptInfo::IsArabicText(const OUString& rText, { const sal_Unicode cCh = rText[nIdx]; const sal_Int16 type = unicode::getUnicodeScriptType( cCh, typeList, sal_Int16(UnicodeScript_kScriptCount) ); - return type == sal_Int16(UnicodeScript_kArabic); + return type == sal_Int16(UnicodeScript_kArabic) || type == sal_Int16(UnicodeScript_kSyriac); } return false; } diff --git a/sw/source/core/text/portxt.cxx b/sw/source/core/text/portxt.cxx index b310ee706cb5..147eeb10064b 100644 --- a/sw/source/core/text/portxt.cxx +++ b/sw/source/core/text/portxt.cxx @@ -116,7 +116,7 @@ static TextFrameIndex lcl_AddSpace(const SwTextSizeInfo &rInf, // Kashida Justification: Insert Kashidas if ( nEnd > nPos && pSI && COMPLEX == nScript ) { - if ( SwScriptInfo::IsArabicText( *pStr, nPos, nEnd - nPos ) && pSI->CountKashida() ) + if (SwScriptInfo::IsKashidaScriptText(*pStr, nPos, nEnd - nPos) && pSI->CountKashida()) { const sal_Int32 nKashRes = pSI->KashidaJustify(nullptr, nullptr, nPos, nEnd - nPos); // i60591: need to check result of KashidaJustify diff --git a/sw/source/core/txtnode/fntcache.cxx b/sw/source/core/txtnode/fntcache.cxx index 8b68e29b9ec3..5ca414fc1c68 100644 --- a/sw/source/core/txtnode/fntcache.cxx +++ b/sw/source/core/txtnode/fntcache.cxx @@ -1127,7 +1127,8 @@ void SwFntObj::DrawText( SwDrawTextInfo &rInf ) // Kashida Justification if ( SwFontScript::CTL == nActual && nSpaceAdd ) { - if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) ) + if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(), + rInf.GetLen())) { aKashidaArray.resize(aKernArray.size(), false); if ( pSI && pSI->CountKashida() && @@ -1344,7 +1345,7 @@ void SwFntObj::DrawText( SwDrawTextInfo &rInf ) // Kashida Justification if ( SwFontScript::CTL == nActual && nSpaceAdd ) { - if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) ) + if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(), rInf.GetLen())) { aKashidaArray.resize(aKernArray.size(), false); if ( pSI && pSI->CountKashida() && @@ -1838,7 +1839,7 @@ TextFrameIndex SwFntObj::GetModelPositionForViewPoint(SwDrawTextInfo &rInf) // Kashida Justification if ( SwFontScript::CTL == nActual && rInf.GetSpace() ) { - if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) ) + if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(), rInf.GetLen())) { if ( pSI && pSI->CountKashida() && pSI->KashidaJustify( &aKernArray, nullptr, rInf.GetIdx(), rInf.GetLen(), diff --git a/vcl/qa/cppunit/pdfexport/data/tdf140767.odt b/vcl/qa/cppunit/pdfexport/data/tdf140767.odt Binary files differnew file mode 100644 index 000000000000..ab6efe6b83ef --- /dev/null +++ b/vcl/qa/cppunit/pdfexport/data/tdf140767.odt diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx index de387cd31850..5e59ec0373f1 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx @@ -5761,6 +5761,67 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf163105Editeng) CPPUNIT_ASSERT_LESS(170.0, aRect.at(2).getWidth()); } +CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf140767SyriacJustification) +{ + saveAsPDF(u"tdf140767.odt"); + + auto pPdfDocument = parsePDFExport(); + CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount()); + + auto pPdfPage = pPdfDocument->openPage(/*nIndex*/ 0); + CPPUNIT_ASSERT(pPdfPage); + auto pTextPage = pPdfPage->getTextPage(); + CPPUNIT_ASSERT(pTextPage); + + int nPageObjectCount = pPdfPage->getObjectCount(); + + CPPUNIT_ASSERT_EQUAL(11, nPageObjectCount); + + std::vector<OUString> aText; + std::vector<basegfx::B2DRectangle> aRect; + + int nTextObjectCount = 0; + for (int i = 0; i < nPageObjectCount; ++i) + { + auto pPageObject = pPdfPage->getObject(i); + CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr); + if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text) + { + aText.push_back(pPageObject->getText(pTextPage)); + aRect.push_back(pPageObject->getBounds()); + ++nTextObjectCount; + } + } + + CPPUNIT_ASSERT_EQUAL(11, nTextObjectCount); + + std::cout << "Strings" << std::endl; + for (auto const& em : aText) + { + std::cout << em << std::endl; + for (sal_Int32 i = 0; i < em.getLength(); ++i) + { + std::cout << std::hex << static_cast<uint32_t>(em[i]) << " "; + } + std::cout << std::endl; + } + + CPPUNIT_ASSERT_EQUAL(u"ܝ"_ustr, aText.at(0).trim()); + CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(1).trim()); + CPPUNIT_ASSERT_EQUAL(u"ܺܛ"_ustr, aText.at(2).trim()); + CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(3).trim()); + CPPUNIT_ASSERT_EQUAL(u"ܰܚ"_ustr, aText.at(4).trim()); + CPPUNIT_ASSERT_EQUAL(u"ܕ"_ustr, aText.at(5).trim()); // This span is whitespace justified + CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(6).trim()); + CPPUNIT_ASSERT_EQUAL(u"ܰܓ"_ustr, aText.at(7).trim()); + CPPUNIT_ASSERT_EQUAL(u"ܒ"_ustr, aText.at(8).trim()); + CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(9).trim()); + CPPUNIT_ASSERT_EQUAL(u"ܰܐ"_ustr, aText.at(10).trim()); + + // Without kashida justification, this space will be 224.328 + CPPUNIT_ASSERT_LESS(90.0, aRect.at(5).getWidth()); +} + } // end anonymous namespace CPPUNIT_PLUGIN_IMPLEMENT(); |