summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Clark <jonathan@libreoffice.org>2024-09-26 02:59:26 -0600
committerJonathan Clark <jonathan@libreoffice.org>2024-09-27 03:50:01 +0200
commitc3c29d31d77ff93aa50634cfd51c62d12dc0f6ec (patch)
tree0c938a2c80e3de61c8f6aa6133a187d96ba8fb2f
parentd8f430e4bef414616fd80bbf4ea16d767991b5b9 (diff)
tdf#140767 Implemented Syriac justification
This change extends kashida justification to Syriac, using custom insertion rules. Change-Id: I7508d2c32e95abb12a098e989c7153828ba81c87 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/173990 Tested-by: Jenkins Reviewed-by: Jonathan Clark <jonathan@libreoffice.org>
-rw-r--r--i18nutil/qa/cppunit/test_kashida.cxx45
-rw-r--r--i18nutil/source/utility/kashida.cxx88
-rw-r--r--sw/source/core/inc/scriptinfo.hxx9
-rw-r--r--sw/source/core/text/itradj.cxx8
-rw-r--r--sw/source/core/text/porlay.cxx12
-rw-r--r--sw/source/core/text/portxt.cxx2
-rw-r--r--sw/source/core/txtnode/fntcache.cxx7
-rw-r--r--vcl/qa/cppunit/pdfexport/data/tdf140767.odtbin0 -> 101535 bytes
-rw-r--r--vcl/qa/cppunit/pdfexport/pdfexport2.cxx61
9 files changed, 206 insertions, 26 deletions
diff --git a/i18nutil/qa/cppunit/test_kashida.cxx b/i18nutil/qa/cppunit/test_kashida.cxx
index 46b40c2a5b7a..e0526c4c8f93 100644
--- a/i18nutil/qa/cppunit/test_kashida.cxx
+++ b/i18nutil/qa/cppunit/test_kashida.cxx
@@ -26,6 +26,7 @@ public:
void testFinalYeh();
void testNoZwnjExpansion();
void testExcludeInvalid();
+ void testSyriac();
CPPUNIT_TEST_SUITE(KashidaTest);
CPPUNIT_TEST(testCharacteristic);
@@ -33,6 +34,7 @@ public:
CPPUNIT_TEST(testFinalYeh);
CPPUNIT_TEST(testNoZwnjExpansion);
CPPUNIT_TEST(testExcludeInvalid);
+ CPPUNIT_TEST(testSyriac);
CPPUNIT_TEST_SUITE_END();
};
@@ -69,7 +71,9 @@ void KashidaTest::testManualKashida()
// tdf#65344: Do not insert kashida before a final Yeh
void KashidaTest::testFinalYeh()
{
- CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نیمِي"_ustr).has_value());
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"يييي"_ustr).value().nIndex);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"ييي"_ustr).value().nIndex);
+ CPPUNIT_ASSERT(!GetWordKashidaPosition(u"يي"_ustr).has_value());
// Should always insert kashida after Seen, even before a final Yeh
CPPUNIT_ASSERT_EQUAL(sal_Int32(2), GetWordKashidaPosition(u"كرسي"_ustr).value().nIndex);
@@ -82,7 +86,7 @@ void KashidaTest::testNoZwnjExpansion()
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"نویس\u200Cه"_ustr).value().nIndex);
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"متن"_ustr).value().nIndex);
- CPPUNIT_ASSERT(!GetWordKashidaPosition(u"مت\u200Cن"_ustr).has_value());
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"مت\u200Cن"_ustr).value().nIndex);
}
// tdf#163105: Do not insert kashida if the position is invalid
@@ -111,6 +115,43 @@ void KashidaTest::testExcludeInvalid()
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نویسه"_ustr, aValid).has_value());
}
+// tdf#140767: Kashida justification for Syriac
+void KashidaTest::testSyriac()
+{
+ // - Prefer user-inserted kashida
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(2), GetWordKashidaPosition(u"ܥܥـܥܥܥܥ"_ustr).value().nIndex);
+
+ std::vector<bool> aValid;
+ aValid.resize(7, true);
+
+ // - Start from end and work toward midpoint, then reverse direction
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(5),
+ GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
+ aValid[5] = false;
+
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(4),
+ GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
+ aValid[4] = false;
+
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0),
+ GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
+ aValid[0] = false;
+
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(1),
+ GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
+ aValid[1] = false;
+
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(2),
+ GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
+ aValid[2] = false;
+
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(3),
+ GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
+ aValid[3] = false;
+
+ CPPUNIT_ASSERT(!GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).has_value());
+}
+
CPPUNIT_TEST_SUITE_REGISTRATION(KashidaTest);
}
diff --git a/i18nutil/source/utility/kashida.cxx b/i18nutil/source/utility/kashida.cxx
index 6a6c7adde690..a992e5c8e643 100644
--- a/i18nutil/source/utility/kashida.cxx
+++ b/i18nutil/source/utility/kashida.cxx
@@ -133,10 +133,9 @@ bool CanConnectToPrev(sal_Unicode cCh, sal_Unicode cPrevCh)
return bRet;
}
-}
std::optional<i18nutil::KashidaPosition>
-i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>& pValidPositions)
+GetWordKashidaPositionArabic(const OUString& rWord, const std::vector<bool>& pValidPositions)
{
sal_Int32 nIdx = 0;
sal_Int32 nPrevIdx = 0;
@@ -148,9 +147,6 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>&
sal_Int32 nWordLen = rWord.getLength();
- SAL_WARN_IF(!pValidPositions.empty() && pValidPositions.size() != static_cast<size_t>(nWordLen),
- "i18n", "Kashida valid position array wrong size");
-
// ignore trailing vowel chars
while (nWordLen && isTransparentChar(rWord[nWordLen - 1]))
{
@@ -298,8 +294,8 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>&
}
}
- // 8. If valid position data exists, use the last legal position
- if (nPriorityLevel >= 7 && nIdx > 0 && !pValidPositions.empty())
+ // 8. Try any valid position
+ if (nPriorityLevel >= 7 && nIdx > 0)
{
fnTryInsertBefore(7);
}
@@ -317,10 +313,86 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>&
if (-1 != nKashidaPos)
{
- return KashidaPosition{ nKashidaPos };
+ return i18nutil::KashidaPosition{ nKashidaPos };
}
return std::nullopt;
}
+std::optional<i18nutil::KashidaPosition>
+GetWordKashidaPositionSyriac(const OUString& rWord, const std::vector<bool>& pValidPositions)
+{
+ sal_Int32 nWordLen = rWord.getLength();
+
+ // Search for a user-inserted kashida
+ for (sal_Int32 i = nWordLen - 1; i >= 0; --i)
+ {
+ if (0x640 == rWord[i])
+ {
+ return i18nutil::KashidaPosition{ i };
+ }
+ }
+
+ // Always insert kashida from the outside-in:
+ // - First, work from the end of the word toward the midpoint
+ // - Then, work from the beginning of the word toward the midpoint
+
+ sal_Int32 nWordMidpoint = nWordLen / 2;
+
+ auto fnPositionValid = [&pValidPositions](sal_Int32 nIdx) {
+ // Exclusions:
+
+ // tdf#163105: Do not insert kashida if the position is invalid
+ if (!pValidPositions.empty() && !pValidPositions[nIdx])
+ {
+ return false;
+ }
+
+ return true;
+ };
+
+ // End to midpoint
+ for (sal_Int32 i = nWordLen - 2; i > nWordMidpoint; --i)
+ {
+ if (fnPositionValid(i))
+ {
+ return i18nutil::KashidaPosition{ i };
+ }
+ }
+
+ // Beginning to midpoint
+ for (sal_Int32 i = 0; i <= nWordMidpoint; ++i)
+ {
+ if (fnPositionValid(i))
+ {
+ return i18nutil::KashidaPosition{ i };
+ }
+ }
+
+ return std::nullopt;
+}
+}
+
+std::optional<i18nutil::KashidaPosition>
+i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>& pValidPositions)
+{
+ sal_Int32 nWordLen = rWord.getLength();
+
+ SAL_WARN_IF(!pValidPositions.empty() && pValidPositions.size() != static_cast<size_t>(nWordLen),
+ "i18n", "Kashida valid position array wrong size");
+
+ for (sal_Int32 nIdx = 0; nIdx < nWordLen; ++nIdx)
+ {
+ auto cCh = rWord[nIdx];
+
+ if ((cCh >= 0x700 && cCh <= 0x74F) || (cCh >= 0x860 && cCh <= 0x86A))
+ {
+ // This word contains Syriac characters.
+ return GetWordKashidaPositionSyriac(rWord, pValidPositions);
+ }
+ }
+
+ return GetWordKashidaPositionArabic(rWord, pValidPositions);
+}
+
/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/sw/source/core/inc/scriptinfo.hxx b/sw/source/core/inc/scriptinfo.hxx
index ae37779b6f71..60cf9c542eab 100644
--- a/sw/source/core/inc/scriptinfo.hxx
+++ b/sw/source/core/inc/scriptinfo.hxx
@@ -347,16 +347,17 @@ public:
*/
bool IsKashidaLine(TextFrameIndex nCharIdx) const;
-/** Checks if text is Arabic text.
+/** Checks if text is in a script that allows kashida justification.
- @descr Checks if text is Arabic text.
+ @descr Checks if text is in a language that allows kashida justification.
@param rText
The text to check
@param nStt
Start index of the text
- @return Returns if the language is an Arabic language
+ @return Returns true if the script is Arabic or Syriac
*/
- static bool IsArabicText(const OUString& rText, TextFrameIndex nStt, TextFrameIndex nLen);
+ static bool IsKashidaScriptText(const OUString& rText, TextFrameIndex nStt,
+ TextFrameIndex nLen);
/** Performs a thai justification on the kerning array
diff --git a/sw/source/core/text/itradj.cxx b/sw/source/core/text/itradj.cxx
index 1a22579c24f1..c072025e7a96 100644
--- a/sw/source/core/text/itradj.cxx
+++ b/sw/source/core/text/itradj.cxx
@@ -168,7 +168,8 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, SwTextSizeInfo& rInf, S
// Fetch the set of valid positions from VCL, where possible
aValidPositions.clear();
- if ( SwScriptInfo::IsArabicText( rInf.GetText(), TextFrameIndex{aScanner.GetBegin()}, TextFrameIndex{aScanner.GetLen()} ) )
+ if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), TextFrameIndex{ aScanner.GetBegin() },
+ TextFrameIndex{ aScanner.GetLen() }))
{
rItr.SeekAndChgAttrIter(TextFrameIndex{ aScanner.GetBegin() }, rInf.GetRefDev());
@@ -250,7 +251,7 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, SwTextSizeInfo& rInf, S
}
sal_Int32 nKashidasDropped = 0;
- if ( !SwScriptInfo::IsArabicText( rInf.GetText(), nIdx, nNext - nIdx ) )
+ if (!SwScriptInfo::IsKashidaScriptText(rInf.GetText(), nIdx, nNext - nIdx))
{
nKashidasDropped = nKashidasInAttr;
rKashidas -= nKashidasDropped;
@@ -314,7 +315,8 @@ static bool lcl_CheckKashidaWidth ( SwScriptInfo& rSI, SwTextSizeInfo& rInf, SwT
sal_Int32 nKashidasInAttr = rSI.KashidaJustify(nullptr, nullptr, nIdx, nNext - nIdx);
tools::Long nFontMinKashida = rInf.GetRefDev()->GetMinKashida();
- if ( nFontMinKashida && nKashidasInAttr > 0 && SwScriptInfo::IsArabicText( rInf.GetText(), nIdx, nNext - nIdx ) )
+ if (nFontMinKashida && nKashidasInAttr > 0
+ && SwScriptInfo::IsKashidaScriptText(rInf.GetText(), nIdx, nNext - nIdx))
{
sal_Int32 nKashidasDropped = 0;
while ( rKashidas && nGluePortion && nKashidasInAttr > 0 &&
diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index c18969fc709c..37cb23961cff 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -2194,16 +2194,18 @@ sal_Int32 SwScriptInfo::KashidaJustify( KernArray* pKernArray,
return 0;
}
-// Checks if the current text is 'Arabic' text. Note that only the first
+// Checks if the text is in Arabic or Syriac. Note that only the first
// character has to be checked because a ctl portion only contains one
// script, see NewTextPortion
-bool SwScriptInfo::IsArabicText(const OUString& rText,
+bool SwScriptInfo::IsKashidaScriptText(const OUString& rText,
TextFrameIndex const nStt, TextFrameIndex const nLen)
{
using namespace ::com::sun::star::i18n;
static const ScriptTypeList typeList[] = {
- { UnicodeScript_kArabic, UnicodeScript_kArabic, sal_Int16(UnicodeScript_kArabic) }, // 11,
- { UnicodeScript_kScriptCount, UnicodeScript_kScriptCount, sal_Int16(UnicodeScript_kScriptCount) } // 88
+ { UnicodeScript_kArabic, UnicodeScript_kArabic, sal_Int16(UnicodeScript_kArabic) }, // 11,
+ { UnicodeScript_kSyriac, UnicodeScript_kSyriac, sal_Int16(UnicodeScript_kSyriac) }, // 12,
+ { UnicodeScript_kScriptCount, UnicodeScript_kScriptCount,
+ sal_Int16(UnicodeScript_kScriptCount) } // 88
};
// go forward if current position does not hold a regular character:
@@ -2229,7 +2231,7 @@ bool SwScriptInfo::IsArabicText(const OUString& rText,
{
const sal_Unicode cCh = rText[nIdx];
const sal_Int16 type = unicode::getUnicodeScriptType( cCh, typeList, sal_Int16(UnicodeScript_kScriptCount) );
- return type == sal_Int16(UnicodeScript_kArabic);
+ return type == sal_Int16(UnicodeScript_kArabic) || type == sal_Int16(UnicodeScript_kSyriac);
}
return false;
}
diff --git a/sw/source/core/text/portxt.cxx b/sw/source/core/text/portxt.cxx
index b310ee706cb5..147eeb10064b 100644
--- a/sw/source/core/text/portxt.cxx
+++ b/sw/source/core/text/portxt.cxx
@@ -116,7 +116,7 @@ static TextFrameIndex lcl_AddSpace(const SwTextSizeInfo &rInf,
// Kashida Justification: Insert Kashidas
if ( nEnd > nPos && pSI && COMPLEX == nScript )
{
- if ( SwScriptInfo::IsArabicText( *pStr, nPos, nEnd - nPos ) && pSI->CountKashida() )
+ if (SwScriptInfo::IsKashidaScriptText(*pStr, nPos, nEnd - nPos) && pSI->CountKashida())
{
const sal_Int32 nKashRes = pSI->KashidaJustify(nullptr, nullptr, nPos, nEnd - nPos);
// i60591: need to check result of KashidaJustify
diff --git a/sw/source/core/txtnode/fntcache.cxx b/sw/source/core/txtnode/fntcache.cxx
index 8b68e29b9ec3..5ca414fc1c68 100644
--- a/sw/source/core/txtnode/fntcache.cxx
+++ b/sw/source/core/txtnode/fntcache.cxx
@@ -1127,7 +1127,8 @@ void SwFntObj::DrawText( SwDrawTextInfo &rInf )
// Kashida Justification
if ( SwFontScript::CTL == nActual && nSpaceAdd )
{
- if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) )
+ if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(),
+ rInf.GetLen()))
{
aKashidaArray.resize(aKernArray.size(), false);
if ( pSI && pSI->CountKashida() &&
@@ -1344,7 +1345,7 @@ void SwFntObj::DrawText( SwDrawTextInfo &rInf )
// Kashida Justification
if ( SwFontScript::CTL == nActual && nSpaceAdd )
{
- if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) )
+ if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(), rInf.GetLen()))
{
aKashidaArray.resize(aKernArray.size(), false);
if ( pSI && pSI->CountKashida() &&
@@ -1838,7 +1839,7 @@ TextFrameIndex SwFntObj::GetModelPositionForViewPoint(SwDrawTextInfo &rInf)
// Kashida Justification
if ( SwFontScript::CTL == nActual && rInf.GetSpace() )
{
- if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) )
+ if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(), rInf.GetLen()))
{
if ( pSI && pSI->CountKashida() &&
pSI->KashidaJustify( &aKernArray, nullptr, rInf.GetIdx(), rInf.GetLen(),
diff --git a/vcl/qa/cppunit/pdfexport/data/tdf140767.odt b/vcl/qa/cppunit/pdfexport/data/tdf140767.odt
new file mode 100644
index 000000000000..ab6efe6b83ef
--- /dev/null
+++ b/vcl/qa/cppunit/pdfexport/data/tdf140767.odt
Binary files differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
index de387cd31850..5e59ec0373f1 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
@@ -5761,6 +5761,67 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf163105Editeng)
CPPUNIT_ASSERT_LESS(170.0, aRect.at(2).getWidth());
}
+CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf140767SyriacJustification)
+{
+ saveAsPDF(u"tdf140767.odt");
+
+ auto pPdfDocument = parsePDFExport();
+ CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount());
+
+ auto pPdfPage = pPdfDocument->openPage(/*nIndex*/ 0);
+ CPPUNIT_ASSERT(pPdfPage);
+ auto pTextPage = pPdfPage->getTextPage();
+ CPPUNIT_ASSERT(pTextPage);
+
+ int nPageObjectCount = pPdfPage->getObjectCount();
+
+ CPPUNIT_ASSERT_EQUAL(11, nPageObjectCount);
+
+ std::vector<OUString> aText;
+ std::vector<basegfx::B2DRectangle> aRect;
+
+ int nTextObjectCount = 0;
+ for (int i = 0; i < nPageObjectCount; ++i)
+ {
+ auto pPageObject = pPdfPage->getObject(i);
+ CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr);
+ if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text)
+ {
+ aText.push_back(pPageObject->getText(pTextPage));
+ aRect.push_back(pPageObject->getBounds());
+ ++nTextObjectCount;
+ }
+ }
+
+ CPPUNIT_ASSERT_EQUAL(11, nTextObjectCount);
+
+ std::cout << "Strings" << std::endl;
+ for (auto const& em : aText)
+ {
+ std::cout << em << std::endl;
+ for (sal_Int32 i = 0; i < em.getLength(); ++i)
+ {
+ std::cout << std::hex << static_cast<uint32_t>(em[i]) << " ";
+ }
+ std::cout << std::endl;
+ }
+
+ CPPUNIT_ASSERT_EQUAL(u"ܝ"_ustr, aText.at(0).trim());
+ CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(1).trim());
+ CPPUNIT_ASSERT_EQUAL(u"ܺܛ"_ustr, aText.at(2).trim());
+ CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(3).trim());
+ CPPUNIT_ASSERT_EQUAL(u"ܰܚ"_ustr, aText.at(4).trim());
+ CPPUNIT_ASSERT_EQUAL(u"ܕ"_ustr, aText.at(5).trim()); // This span is whitespace justified
+ CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(6).trim());
+ CPPUNIT_ASSERT_EQUAL(u"ܰܓ"_ustr, aText.at(7).trim());
+ CPPUNIT_ASSERT_EQUAL(u"ܒ"_ustr, aText.at(8).trim());
+ CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(9).trim());
+ CPPUNIT_ASSERT_EQUAL(u"ܰܐ"_ustr, aText.at(10).trim());
+
+ // Without kashida justification, this space will be 224.328
+ CPPUNIT_ASSERT_LESS(90.0, aRect.at(5).getWidth());
+}
+
} // end anonymous namespace
CPPUNIT_PLUGIN_IMPLEMENT();