summaryrefslogtreecommitdiff
path: root/linguistic
diff options
context:
space:
mode:
authorLászló Németh <nemeth@numbertext.org>2023-12-31 14:30:05 +0100
committerLászló Németh <nemeth@numbertext.org>2024-01-01 00:42:35 +0100
commitc899d3608d30f3ab4c2bc193c1fcd765221614a4 (patch)
treefb5acefddce7cc16ec926c37a092c631f6da8176 /linguistic
parent20c015f2726904b0d96380a1ec16e7d7406acca1 (diff)
tdf#158885 sw: don't hyphenate right after a stem boundary
in compound words to get better typography or orthography with more readable text, if hyphenation zone is enabled. If there are multiple possible break points in the word according to the libhyphen based hyphenation, keep only the best ones using Hunspell morphological data based on compound word decomposition of non-dictionary words (pa: fields), and extra morphological data of dictionary words (hy: fields) or their combination. For readability and tradition, orthography and typography prefer or only allow hyphenation between stems in compound words in several languages, like Danish, Dutch, German, Hungarian, Norwegian and Sweden. Hyphenation zone is to avoid of too much or bad hyphenation. Preferring stem boundaries for hyphenation within the hyphenation zone is a natural extension of it, i.e. skip hyphenation within stems, if there is stem boundary within the hyphenation zone. Now skip break points after skip boundaries, if their distance is 3 or less characters (COMPOUNDLEFTHYPHENMIN = 4). Skip also break points on stem boundaries, if there is a weighted stem boundary before them within 3 characters. Weighted stem boundaries are there between the pa: fields, (stems resulted by the compound word decomposition), or in hy: field, boundaries marked by double || instead of a single |. More information: man (5) hunspell, and option -m hunspell. Note: break points skipped only in the last stems for languages with fogemorphemes, yet, because of their incomplete Hunspell output for morphological analysis. Change-Id: I739908716d11a9c2db0c9d36fba8657ba6f53bee Reviewed-on: https://gerrit.libreoffice.org/c/core/+/161498 Tested-by: Jenkins Reviewed-by: László Németh <nemeth@numbertext.org>
Diffstat (limited to 'linguistic')
-rw-r--r--linguistic/source/lngprophelp.cxx15
1 files changed, 15 insertions, 0 deletions
diff --git a/linguistic/source/lngprophelp.cxx b/linguistic/source/lngprophelp.cxx
index 57483d062e55..ee593f9f489f 100644
--- a/linguistic/source/lngprophelp.cxx
+++ b/linguistic/source/lngprophelp.cxx
@@ -508,6 +508,7 @@ void PropertyHelper_Hyphen::SetDefaultValues()
nResHyphMinLeading = nHyphMinLeading = 2;
nResHyphMinTrailing = nHyphMinTrailing = 2;
nResHyphMinWordLength = nHyphMinWordLength = 0;
+ nResHyphTextHyphenZone = nHyphTextHyphenZone = 0;
bResNoHyphenateCaps = bNoHyphenateCaps = false;
}
@@ -542,6 +543,11 @@ void PropertyHelper_Hyphen::GetCurrentValues()
pnVal = &nHyphMinWordLength;
pnResVal = &nResHyphMinWordLength;
}
+ else if ( rPropName == UPN_HYPH_ZONE )
+ {
+ pnVal = &nHyphTextHyphenZone;
+ pnResVal = &nResHyphTextHyphenZone;
+ }
else if ( rPropName == UPN_HYPH_NO_CAPS )
{
pbVal = &bNoHyphenateCaps;
@@ -575,6 +581,7 @@ bool PropertyHelper_Hyphen::propertyChange_Impl( const PropertyChangeEvent& rEvt
case UPH_HYPH_MIN_LEADING : pnVal = &nHyphMinLeading; break;
case UPH_HYPH_MIN_TRAILING : pnVal = &nHyphMinTrailing; break;
case UPH_HYPH_MIN_WORD_LENGTH : pnVal = &nHyphMinWordLength; break;
+ case UPH_HYPH_ZONE : pnVal = &nHyphTextHyphenZone; break;
case UPH_HYPH_NO_CAPS : pbVal = &bNoHyphenateCaps; break;
default:
SAL_WARN( "linguistic", "unknown property handle " << rEvt.PropertyHandle << " (check in include/unotools/linguprops.hxx)");
@@ -613,6 +620,7 @@ void PropertyHelper_Hyphen::SetTmpPropVals( const PropertyValues &rPropVals )
nResHyphMinLeading = nHyphMinLeading;
nResHyphMinTrailing = nHyphMinTrailing;
nResHyphMinWordLength = nHyphMinWordLength;
+ nResHyphTextHyphenZone = nHyphTextHyphenZone;
bResNoHyphenateCaps = bNoHyphenateCaps;
for (const PropertyValue& rVal : rPropVals)
@@ -626,6 +634,8 @@ void PropertyHelper_Hyphen::SetTmpPropVals( const PropertyValues &rPropVals )
pnResVal = &nResHyphMinTrailing;
else if ( rVal.Name == UPN_HYPH_MIN_WORD_LENGTH )
pnResVal = &nResHyphMinWordLength;
+ else if ( rVal.Name == UPN_HYPH_ZONE )
+ pnResVal = &nResHyphTextHyphenZone;
else if ( rVal.Name == UPN_HYPH_NO_CAPS )
pbResVal = &bResNoHyphenateCaps;
@@ -705,6 +715,11 @@ sal_Int16 PropertyHelper_Hyphenation::GetMinWordLength() const
return mxPropHelper->GetMinWordLength();
}
+sal_Int16 PropertyHelper_Hyphenation::GetTextHyphenZone() const
+{
+ return mxPropHelper->GetTextHyphenZone();
+}
+
bool PropertyHelper_Hyphenation::IsNoHyphenateCaps() const
{
return mxPropHelper->IsNoHyphenateCaps();