From 4d24507451fbc2b7d72cf5f7048a424d52b63850 Mon Sep 17 00:00:00 2001 From: Eike Rathke Date: Tue, 11 Dec 2012 11:46:19 +0100 Subject: avoid liblangtag processing for known locales, and know 'qtz' For "simple" known (to us) locales avoid the overhead of liblangtag, also in preparation of not having to initialize its database during startup. This also enables recognition of the 'qtz' pseudolocale for key ID resources again, see fdo#57413 Change-Id: Id52a1ddc37b42063e3bf68a6dbeaeacfbfa704ef --- i18npool/inc/i18npool/languagetag.hxx | 28 ++- i18npool/qa/cppunit/test_languagetag.cxx | 42 +++- i18npool/source/languagetag/languagetag.cxx | 317 ++++++++++++++++++++++------ 3 files changed, 315 insertions(+), 72 deletions(-) (limited to 'i18npool') diff --git a/i18npool/inc/i18npool/languagetag.hxx b/i18npool/inc/i18npool/languagetag.hxx index 0b3eeddd150f..bb86b72262f0 100644 --- a/i18npool/inc/i18npool/languagetag.hxx +++ b/i18npool/inc/i18npool/languagetag.hxx @@ -229,6 +229,7 @@ private: mutable Decision meIsValid; mutable Decision meIsIsoLocale; mutable Decision meIsIsoODF; + mutable Decision meIsLiblangtagNeeded; ///< whether processing with liblangtag needed bool mbSystemLocale : 1; mutable bool mbInitializedBcp47 : 1; mutable bool mbInitializedLocale : 1; @@ -245,17 +246,36 @@ private: void convertLangToLocale(); void convertLangToBcp47(); - bool canonicalize() const; + bool canonicalize(); - rtl::OUString getLanguageFromLangtag() const; - rtl::OUString getScriptFromLangtag() const; - rtl::OUString getRegionFromLangtag() const; + rtl::OUString getLanguageFromLangtag(); + rtl::OUString getScriptFromLangtag(); + rtl::OUString getRegionFromLangtag(); void resetVars(); + /** Obtain Language, Script and Country via simpleExtract() and assign them + to the cached variables if successful. + + @return return of simpleExtract() + */ + bool cacheSimpleLSC(); + static bool isIsoLanguage( const rtl::OUString& rLanguage ); static bool isIsoScript( const rtl::OUString& rScript ); static bool isIsoCountry( const rtl::OUString& rRegion ); + + /** Of a simple language tag of the form lll[-Ssss][-CC] (i.e. one that + would fulfill the isIsoODF() condition) extract the portions. + + Does not check case or content! + + @return TRUE if it detected a simple tag, else FALSE. + */ + static bool simpleExtract( const rtl::OUString& rBcp47, + rtl::OUString& rLanguage, + rtl::OUString& rScript, + rtl::OUString& rCountry ); }; #endif // INCLUDED_I18NPOOL_LANGUAGETAG_HXX diff --git a/i18npool/qa/cppunit/test_languagetag.cxx b/i18npool/qa/cppunit/test_languagetag.cxx index deaeecd92768..f748802e5e53 100644 --- a/i18npool/qa/cppunit/test_languagetag.cxx +++ b/i18npool/qa/cppunit/test_languagetag.cxx @@ -61,13 +61,21 @@ void TestLanguageTag::testAllTags() CPPUNIT_ASSERT( aLocale.Country == "DE" ); CPPUNIT_ASSERT( aLocale.Variant == "" ); CPPUNIT_ASSERT( nLanguageType == LANGUAGE_GERMAN ); + CPPUNIT_ASSERT( de_DE.getLanguage() == "de" ); + CPPUNIT_ASSERT( de_DE.getCountry() == "DE" ); + CPPUNIT_ASSERT( de_DE.getScript() == "" ); + CPPUNIT_ASSERT( de_DE.getLanguageAndScript() == "de" ); #else // The simple replacement code doesn't do any fancy stuff. CPPUNIT_ASSERT_MESSAGE("Default script was stripped after canonicalize!?!", aBcp47 == s_de_Latn_DE ); CPPUNIT_ASSERT( aLocale.Language == "qlt" ); CPPUNIT_ASSERT( aLocale.Country == "DE" ); CPPUNIT_ASSERT( aLocale.Variant == "de-Latn-DE" ); - (void)nLanguageType; //XXX CPPUNIT_ASSERT( nLanguageType == LANGUAGE_GERMAN ); + CPPUNIT_ASSERT( nLanguageType == LANGUAGE_SYSTEM ); // XXX not resolved! + CPPUNIT_ASSERT( de_DE.getLanguage() == "de" ); + CPPUNIT_ASSERT( de_DE.getCountry() == "DE" ); + CPPUNIT_ASSERT( de_DE.getScript() == "Latn" ); + CPPUNIT_ASSERT( de_DE.getLanguageAndScript() == "de-Latn" ); #endif } @@ -127,6 +135,10 @@ void TestLanguageTag::testAllTags() CPPUNIT_ASSERT( sr_RS.isValidBcp47() == true ); CPPUNIT_ASSERT( sr_RS.isIsoLocale() == false ); CPPUNIT_ASSERT( sr_RS.isIsoODF() == true ); + CPPUNIT_ASSERT( sr_RS.getLanguage() == "sr" ); + CPPUNIT_ASSERT( sr_RS.getCountry() == "RS" ); + CPPUNIT_ASSERT( sr_RS.getScript() == "Latn" ); + CPPUNIT_ASSERT( sr_RS.getLanguageAndScript() == "sr-Latn" ); } { @@ -141,6 +153,10 @@ void TestLanguageTag::testAllTags() CPPUNIT_ASSERT( de_DE.isValidBcp47() == true ); CPPUNIT_ASSERT( de_DE.isIsoLocale() == true ); CPPUNIT_ASSERT( de_DE.isIsoODF() == true ); + CPPUNIT_ASSERT( de_DE.getLanguage() == "de" ); + CPPUNIT_ASSERT( de_DE.getCountry() == "DE" ); + CPPUNIT_ASSERT( de_DE.getScript() == "" ); + CPPUNIT_ASSERT( de_DE.getLanguageAndScript() == "de" ); } { @@ -165,6 +181,30 @@ void TestLanguageTag::testAllTags() CPPUNIT_ASSERT( de_DE.getLanguageType() == LANGUAGE_GERMAN ); } + // 'qtz' is a local use known pseudolocale for key ID resource + { + OUString s_qtz( "qtz" ); + LanguageTag qtz( s_qtz ); + lang::Locale aLocale = qtz.getLocale(); + CPPUNIT_ASSERT( qtz.getBcp47() == s_qtz ); + CPPUNIT_ASSERT( aLocale.Language == "qtz" ); + CPPUNIT_ASSERT( aLocale.Country == "" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( qtz.getLanguageType() == LANGUAGE_USER_KEYID ); + } + + // 'qty' is a local use unknown locale + { + OUString s_qty( "qty" ); + LanguageTag qty( s_qty ); + lang::Locale aLocale = qty.getLocale(); + CPPUNIT_ASSERT( qty.getBcp47() == s_qty ); + CPPUNIT_ASSERT( aLocale.Language == "qty" ); + CPPUNIT_ASSERT( aLocale.Country == "" ); + CPPUNIT_ASSERT( aLocale.Variant == "" ); + CPPUNIT_ASSERT( qty.getLanguageType() == LANGUAGE_SYSTEM ); + } + // test reset() methods { LanguageTag aTag( LANGUAGE_DONTKNOW ); diff --git a/i18npool/source/languagetag/languagetag.cxx b/i18npool/source/languagetag/languagetag.cxx index 94cf2e16c7a8..bab244371063 100644 --- a/i18npool/source/languagetag/languagetag.cxx +++ b/i18npool/source/languagetag/languagetag.cxx @@ -99,6 +99,7 @@ LiblantagDataRef::~LiblantagDataRef() void LiblantagDataRef::setup() { + SAL_INFO( "i18npool.langtag", "LiblantagDataRef::setup: initializing database"); if (maDataPath.isEmpty()) setupDataPath(); lt_db_initialize(); @@ -108,6 +109,7 @@ void LiblantagDataRef::setup() void LiblantagDataRef::teardown() { + SAL_INFO( "i18npool.langtag", "LiblantagDataRef::teardown: finalizing database"); lt_db_finalize(); } @@ -142,6 +144,7 @@ LanguageTag::LanguageTag( const rtl::OUString & rBcp47LanguageTag, bool bCanonic meIsValid( DECISION_DONTKNOW), meIsIsoLocale( DECISION_DONTKNOW), meIsIsoODF( DECISION_DONTKNOW), + meIsLiblangtagNeeded( DECISION_DONTKNOW), mbSystemLocale( rBcp47LanguageTag.isEmpty()), mbInitializedBcp47( !mbSystemLocale), mbInitializedLocale( false), @@ -151,8 +154,6 @@ LanguageTag::LanguageTag( const rtl::OUString & rBcp47LanguageTag, bool bCanonic mbCachedCountry( false), mbIsFallback( false) { - theDataRef::get().incRef(); - if (bCanonicalize) canonicalize(); } @@ -166,6 +167,7 @@ LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale ) meIsValid( DECISION_DONTKNOW), meIsIsoLocale( DECISION_DONTKNOW), meIsIsoODF( DECISION_DONTKNOW), + meIsLiblangtagNeeded( DECISION_DONTKNOW), mbSystemLocale( rLocale.Language.isEmpty()), mbInitializedBcp47( false), mbInitializedLocale( !mbSystemLocale), @@ -175,7 +177,6 @@ LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale ) mbCachedCountry( false), mbIsFallback( false) { - theDataRef::get().incRef(); } @@ -186,6 +187,7 @@ LanguageTag::LanguageTag( LanguageType nLanguage ) meIsValid( DECISION_DONTKNOW), meIsIsoLocale( DECISION_DONTKNOW), meIsIsoODF( DECISION_DONTKNOW), + meIsLiblangtagNeeded( DECISION_DONTKNOW), mbSystemLocale( nLanguage == LANGUAGE_SYSTEM), mbInitializedBcp47( false), mbInitializedLocale( false), @@ -195,7 +197,6 @@ LanguageTag::LanguageTag( LanguageType nLanguage ) mbCachedCountry( false), mbIsFallback( false) { - theDataRef::get().incRef(); } @@ -207,6 +208,7 @@ LanguageTag::LanguageTag( const rtl::OUString& rLanguage, const rtl::OUString& r meIsValid( DECISION_DONTKNOW), meIsIsoLocale( DECISION_DONTKNOW), meIsIsoODF( DECISION_DONTKNOW), + meIsLiblangtagNeeded( DECISION_DONTKNOW), mbSystemLocale( rLanguage.isEmpty()), mbInitializedBcp47( false), mbInitializedLocale( !mbSystemLocale), @@ -216,7 +218,6 @@ LanguageTag::LanguageTag( const rtl::OUString& rLanguage, const rtl::OUString& r mbCachedCountry( false), mbIsFallback( false) { - theDataRef::get().incRef(); } @@ -228,6 +229,7 @@ LanguageTag::LanguageTag( const rtl_Locale & rLocale ) meIsValid( DECISION_DONTKNOW), meIsIsoLocale( DECISION_DONTKNOW), meIsIsoODF( DECISION_DONTKNOW), + meIsLiblangtagNeeded( DECISION_DONTKNOW), mbSystemLocale( maLocale.Language.isEmpty()), mbInitializedBcp47( false), mbInitializedLocale( !mbSystemLocale), @@ -237,7 +239,6 @@ LanguageTag::LanguageTag( const rtl_Locale & rLocale ) mbCachedCountry( false), mbIsFallback( false) { - theDataRef::get().incRef(); } @@ -254,6 +255,7 @@ LanguageTag::LanguageTag( const LanguageTag & rLanguageTag ) meIsValid( rLanguageTag.meIsValid), meIsIsoLocale( rLanguageTag.meIsIsoLocale), meIsIsoODF( rLanguageTag.meIsIsoODF), + meIsLiblangtagNeeded( rLanguageTag.meIsLiblangtagNeeded), mbSystemLocale( rLanguageTag.mbSystemLocale), mbInitializedBcp47( rLanguageTag.mbInitializedBcp47), mbInitializedLocale( rLanguageTag.mbInitializedLocale), @@ -263,7 +265,8 @@ LanguageTag::LanguageTag( const LanguageTag & rLanguageTag ) mbCachedCountry( rLanguageTag.mbCachedCountry), mbIsFallback( rLanguageTag.mbIsFallback) { - theDataRef::get().incRef(); + if (mpImplLangtag) + theDataRef::get().incRef(); } @@ -281,6 +284,7 @@ LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag ) meIsValid = rLanguageTag.meIsValid; meIsIsoLocale = rLanguageTag.meIsIsoLocale; meIsIsoODF = rLanguageTag.meIsIsoODF; + meIsLiblangtagNeeded= rLanguageTag.meIsLiblangtagNeeded; mbSystemLocale = rLanguageTag.mbSystemLocale; mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47; mbInitializedLocale = rLanguageTag.mbInitializedLocale; @@ -289,22 +293,30 @@ LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag ) mbCachedScript = rLanguageTag.mbCachedScript; mbCachedCountry = rLanguageTag.mbCachedCountry; mbIsFallback = rLanguageTag.mbIsFallback; + if (mpImplLangtag) + theDataRef::get().incRef(); return *this; } LanguageTag::~LanguageTag() { - lt_tag_unref( MPLANGTAG); - - theDataRef::get().decRef(); + if (mpImplLangtag) + { + lt_tag_unref( MPLANGTAG); + theDataRef::get().decRef(); + } } void LanguageTag::resetVars() { - lt_tag_unref( MPLANGTAG); - mpImplLangtag = NULL; + if (mpImplLangtag) + { + lt_tag_unref( MPLANGTAG); + mpImplLangtag = NULL; + theDataRef::get().decRef(); + } maLocale = lang::Locale(); if (!maBcp47.isEmpty()) @@ -319,6 +331,7 @@ void LanguageTag::resetVars() meIsValid = DECISION_DONTKNOW; meIsIsoLocale = DECISION_DONTKNOW; meIsIsoODF = DECISION_DONTKNOW; + meIsLiblangtagNeeded= DECISION_DONTKNOW; mbSystemLocale = true; mbInitializedBcp47 = false; mbInitializedLocale = false; @@ -360,7 +373,7 @@ void LanguageTag::reset( LanguageType nLanguage ) } -bool LanguageTag::canonicalize() const +bool LanguageTag::canonicalize() { #ifdef erDEBUG // dump once @@ -373,9 +386,99 @@ bool LanguageTag::canonicalize() const dumper aDumper( &mpImplLangtag); #endif - getBcp47( true ); // side effect: have maBcp47 in any case, resolved system + // Side effect: have maBcp47 in any case, resolved system. + // Some methods calling canonicalize() (or not calling it due to + // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set + // meIsLiblangtagNeeded anywhere else than hereafter. + getBcp47( true ); + + // The simple cases and known locales don't need liblangtag processing, + // which also avoids loading liblangtag data on startup. + if (meIsLiblangtagNeeded == DECISION_DONTKNOW) + { + bool bTemporaryLocale = false; + bool bTemporaryLangID = false; + if (!mbInitializedLocale && !mbInitializedLangID) + { + if (mbSystemLocale) + { + mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM); + mbInitializedLangID = true; + } + else + { + // Now this is getting funny.. we only have some BCP47 string + // and want to determine if parsing it would be possible + // without using liblangtag just to see if it is a simple known + // locale. + OUString aLanguage, aScript, aCountry; + if (simpleExtract( maBcp47, aLanguage, aScript, aCountry)) + { + if (aScript.isEmpty()) + { + maLocale.Language = aLanguage; + maLocale.Country = aCountry; + } + else + { + maLocale.Language = ISO639_LANGUAGE_TAG; + maLocale.Country = aCountry; + maLocale.Variant = maBcp47; + } + bTemporaryLocale = mbInitializedLocale = true; + } + } + } + if (mbInitializedLangID && !mbInitializedLocale) + { + // Do not call getLocale() here because that prefers + // convertBcp47ToLocale() which would end up in recursion via + // isIsoLocale()! + + // Prepare to verify that we have a known locale, not just an + // arbitrary MS-LangID. + convertLangToLocale(); + } + if (mbInitializedLocale) + { + if (maLocale.Variant.isEmpty()) + meIsLiblangtagNeeded = DECISION_NO; // per definition ll[l][-CC] + else + { + if (!mbInitializedLangID) + { + convertLocaleToLang(); + if (bTemporaryLocale) + bTemporaryLangID = true; + } + if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM) + meIsLiblangtagNeeded = DECISION_NO; // known locale + } + } + if (bTemporaryLocale) + { + mbInitializedLocale = false; + maLocale = lang::Locale(); + } + if (bTemporaryLangID) + { + mbInitializedLangID = false; + mnLangID = LANGUAGE_DONTKNOW; + } + } + if (meIsLiblangtagNeeded == DECISION_NO) + { + meIsValid = DECISION_YES; // really, known must be valid ... + return true; // that's it + } + meIsLiblangtagNeeded = DECISION_YES; + SAL_INFO( "i18npool.langtag", "LanguageTag::canonicalize: using liblangtag for " << maBcp47); + if (!mpImplLangtag) + { + theDataRef::get().incRef(); mpImplLangtag = lt_tag_new(); + } // ensure error is free'd struct myerror @@ -545,66 +648,90 @@ const rtl::OUString & LanguageTag::getBcp47( bool bResolveSystem ) const } -rtl::OUString LanguageTag::getLanguageFromLangtag() const +rtl::OUString LanguageTag::getLanguageFromLangtag() { - rtl::OUString aLanguage; - if (!mpImplLangtag) + OUString aLanguage; + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) canonicalize(); if (maBcp47.isEmpty()) return aLanguage; - const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG); - SAL_WARN_IF( !pLangT, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL"); - if (!pLangT) - return aLanguage; - const char* pLang = lt_lang_get_tag( pLangT); - SAL_WARN_IF( !pLang, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL"); - if (pLang) - aLanguage = OUString::createFromAscii( pLang); + if (mpImplLangtag) + { + const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG); + SAL_WARN_IF( !pLangT, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL"); + if (!pLangT) + return aLanguage; + const char* pLang = lt_lang_get_tag( pLangT); + SAL_WARN_IF( !pLang, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL"); + if (pLang) + aLanguage = OUString::createFromAscii( pLang); + } + else + { + if (mbCachedLanguage || cacheSimpleLSC()) + aLanguage = maCachedLanguage; + } return aLanguage; } -rtl::OUString LanguageTag::getScriptFromLangtag() const +rtl::OUString LanguageTag::getScriptFromLangtag() { - rtl::OUString aScript; - if (!mpImplLangtag) + OUString aScript; + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) canonicalize(); if (maBcp47.isEmpty()) return aScript; - const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG); - // pScriptT==NULL is valid for default scripts - if (!pScriptT) - return aScript; - const char* pScript = lt_script_get_tag( pScriptT); - SAL_WARN_IF( !pScript, "i18npool.langtag", "LanguageTag::getScriptFromLangtag: pScript==NULL"); - if (pScript) - aScript = OUString::createFromAscii( pScript); + if (mpImplLangtag) + { + const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG); + // pScriptT==NULL is valid for default scripts + if (!pScriptT) + return aScript; + const char* pScript = lt_script_get_tag( pScriptT); + SAL_WARN_IF( !pScript, "i18npool.langtag", "LanguageTag::getScriptFromLangtag: pScript==NULL"); + if (pScript) + aScript = OUString::createFromAscii( pScript); + } + else + { + if (mbCachedScript || cacheSimpleLSC()) + aScript = maCachedScript; + } return aScript; } -rtl::OUString LanguageTag::getRegionFromLangtag() const +rtl::OUString LanguageTag::getRegionFromLangtag() { - rtl::OUString aRegion; - if (!mpImplLangtag) + OUString aRegion; + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) canonicalize(); if (maBcp47.isEmpty()) return aRegion; - const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG); - // pRegionT==NULL is valid for language only tags, rough check here that - // does not take sophisticated tags into account that actually should have - // a region, check for ll, lll, ll-Ssss and lll-Ssss so that ll-CC and - // lll-CC actually fail. - SAL_WARN_IF( !pRegionT && - maBcp47.getLength() != 2 && maBcp47.getLength() != 3 && - maBcp47.getLength() != 7 && maBcp47.getLength() != 8, - "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL"); - if (!pRegionT) - return aRegion; - const char* pRegion = lt_region_get_tag( pRegionT); - SAL_WARN_IF( !pRegion, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL"); - if (pRegion) - aRegion = OUString::createFromAscii( pRegion); + if (mpImplLangtag) + { + const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG); + // pRegionT==NULL is valid for language only tags, rough check here + // that does not take sophisticated tags into account that actually + // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so + // that ll-CC and lll-CC actually fail. + SAL_WARN_IF( !pRegionT && + maBcp47.getLength() != 2 && maBcp47.getLength() != 3 && + maBcp47.getLength() != 7 && maBcp47.getLength() != 8, + "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL"); + if (!pRegionT) + return aRegion; + const char* pRegion = lt_region_get_tag( pRegionT); + SAL_WARN_IF( !pRegion, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL"); + if (pRegion) + aRegion = OUString::createFromAscii( pRegion); + } + else + { + if (mbCachedCountry || cacheSimpleLSC()) + aRegion = maCachedCountry; + } return aRegion; } @@ -681,7 +808,7 @@ bool LanguageTag::isIsoLanguage( const rtl::OUString& rLanguage ) SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) && (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) || (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18npool.langtag", - "LanguageTag::isIsoLanguage: rejecting upper case"); + "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage); return false; } @@ -694,7 +821,7 @@ bool LanguageTag::isIsoCountry( const rtl::OUString& rRegion ) (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1]))) return true; SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])), - "i18npool.langtag", "LanguageTag::isIsoCountry: rejecting lower case"); + "i18npool.langtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion); return false; } @@ -711,7 +838,7 @@ bool LanguageTag::isIsoScript( const rtl::OUString& rScript ) SAL_WARN_IF( rScript.getLength() == 4 && (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) || isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])), - "i18npool.langtag", "LanguageTag::isIsoScript: rejecting case mismatch"); + "i18npool.langtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript); return false; } @@ -720,7 +847,7 @@ rtl::OUString LanguageTag::getLanguage() const { if (!mbCachedLanguage) { - maCachedLanguage = getLanguageFromLangtag(); + maCachedLanguage = const_cast(this)->getLanguageFromLangtag(); mbCachedLanguage = true; } return maCachedLanguage; @@ -731,7 +858,7 @@ rtl::OUString LanguageTag::getScript() const { if (!mbCachedScript) { - maCachedScript = getScriptFromLangtag(); + maCachedScript = const_cast(this)->getScriptFromLangtag(); mbCachedScript = true; } return maCachedScript; @@ -756,7 +883,7 @@ rtl::OUString LanguageTag::getCountry() const { if (!mbCachedCountry) { - maCachedCountry = getRegionFromLangtag(); + maCachedCountry = const_cast(this)->getRegionFromLangtag(); if (!isIsoCountry( maCachedCountry)) maCachedCountry = OUString(); mbCachedCountry = true; @@ -767,7 +894,22 @@ rtl::OUString LanguageTag::getCountry() const rtl::OUString LanguageTag::getRegion() const { - return getRegionFromLangtag(); + return const_cast(this)->getRegionFromLangtag(); +} + + +bool LanguageTag::cacheSimpleLSC() +{ + OUString aLanguage, aScript, aCountry; + bool bRet = simpleExtract( maBcp47, aLanguage, aScript, aCountry); + if (bRet) + { + maCachedLanguage = aLanguage; + maCachedScript = aScript; + maCachedCountry = aCountry; + mbCachedLanguage = mbCachedScript = mbCachedCountry = true; + } + return bRet; } @@ -775,8 +917,8 @@ bool LanguageTag::isIsoLocale() const { if (meIsIsoLocale == DECISION_DONTKNOW) { - if (!mpImplLangtag) - canonicalize(); + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) + const_cast(this)->canonicalize(); // It must be at most ll-CC or lll-CC // Do not use getCountry() here, use getRegion() instead. meIsIsoLocale = ((maBcp47.isEmpty() || @@ -791,8 +933,8 @@ bool LanguageTag::isIsoODF() const { if (meIsIsoODF == DECISION_DONTKNOW) { - if (!mpImplLangtag) - canonicalize(); + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) + const_cast(this)->canonicalize(); if (!isIsoScript( getScript())) return ((meIsIsoODF = DECISION_NO) == DECISION_YES); // The usual case is lll-CC so simply check that first. @@ -812,8 +954,8 @@ bool LanguageTag::isValidBcp47() const { if (meIsValid == DECISION_DONTKNOW) { - if (!mpImplLangtag) - canonicalize(); + if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) + const_cast(this)->canonicalize(); SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18npool.langtag", "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid"); } @@ -866,4 +1008,45 @@ bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const } +// static +bool LanguageTag::simpleExtract( const rtl::OUString& rBcp47, + rtl::OUString& rLanguage, + rtl::OUString& rScript, + rtl::OUString& rCountry ) +{ + bool bRet = false; + const sal_Int32 nLen = rBcp47.getLength(); + const sal_Int32 nHyph1 = rBcp47.indexOf( '-'); + if ((nLen == 2 || nLen == 3) && nHyph1 < 0) // ll or lll + { + rLanguage = rBcp47; + rScript = rCountry = OUString(); + bRet = true; + } + else if ( (nLen == 5 && nHyph1 == 2) // ll-CC + || (nLen == 6 && nHyph1 == 3)) // lll-CC + { + rLanguage = rBcp47.copy( 0, nHyph1); + rCountry = rBcp47.copy( nHyph1 + 1, 2); + rScript = OUString(); + bRet = true; + } + else if ( (nHyph1 == 2 && nLen == 10) // ll-Ssss-CC check + || (nHyph1 == 3 && nLen == 11)) // lll-Ssss-CC check + { + const sal_Int32 nHyph2 = rBcp47.indexOf( '-', nHyph1 + 1); + if (nHyph2 == nHyph1 + 5) + { + rLanguage = rBcp47.copy( 0, nHyph1); + rScript = rBcp47.copy( nHyph1 + 1, 4); + rCountry = rBcp47.copy( nHyph2 + 1, 2); + bRet = true; + } + } + if (!bRet) + rLanguage = rScript = rCountry = OUString(); + return bRet; +} + + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit v1.2.3