diff options
Diffstat (limited to 'lingucomponent/source/languageguessing')
-rw-r--r-- | lingucomponent/source/languageguessing/altstrfunc.cxx | 50 | ||||
-rw-r--r-- | lingucomponent/source/languageguessing/altstrfunc.hxx | 43 | ||||
-rw-r--r-- | lingucomponent/source/languageguessing/guess.cxx | 139 | ||||
-rw-r--r-- | lingucomponent/source/languageguessing/guess.hxx | 74 | ||||
-rw-r--r-- | lingucomponent/source/languageguessing/guesslang.component | 34 | ||||
-rw-r--r-- | lingucomponent/source/languageguessing/guesslang.cxx | 435 | ||||
-rw-r--r-- | lingucomponent/source/languageguessing/makefile.mk | 88 | ||||
-rw-r--r-- | lingucomponent/source/languageguessing/simpleguesser.cxx | 237 | ||||
-rw-r--r-- | lingucomponent/source/languageguessing/simpleguesser.hxx | 124 |
9 files changed, 0 insertions, 1224 deletions
diff --git a/lingucomponent/source/languageguessing/altstrfunc.cxx b/lingucomponent/source/languageguessing/altstrfunc.cxx deleted file mode 100644 index 5461a2267..000000000 --- a/lingucomponent/source/languageguessing/altstrfunc.cxx +++ /dev/null @@ -1,50 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/*************************************************************************** - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_lingucomponent.hxx" -#include "altstrfunc.hxx" - -#include <sal/types.h> - -int start(const std::string &s1, const std::string &s2){ - size_t i; - int ret = 0; - - size_t min = s1.length(); - if (min > s2.length()) - min = s2.length(); - - for(i = 0; i < min && s2[i] && s1[i] && !ret; i++){ - ret = toupper(s1[i]) - toupper(s2[i]); - if(s1[i] == '.' || s2[i] == '.'){ret = 0;}//. is a neutral character - } - return ret; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/altstrfunc.hxx b/lingucomponent/source/languageguessing/altstrfunc.hxx deleted file mode 100644 index 5ddeda4ee..000000000 --- a/lingucomponent/source/languageguessing/altstrfunc.hxx +++ /dev/null @@ -1,43 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/*************************************************************************** - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#ifndef _ALT_STRFUNC_HXX_ -#define _ALT_STRFUNC_HXX_ - -#include <string> -#include <guess.hxx> - -inline bool isSeparator(const char c){ - return c == GUESS_SEPARATOR_OPEN || c == GUESS_SEPARATOR_SEP || c == GUESS_SEPARATOR_CLOSE || c == '\0'; -} - -int start(const std::string &s1, const std::string &s2); - -#endif - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guess.cxx b/lingucomponent/source/languageguessing/guess.cxx deleted file mode 100644 index 2a377dc0f..000000000 --- a/lingucomponent/source/languageguessing/guess.cxx +++ /dev/null @@ -1,139 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/*************************************************************************** - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_lingucomponent.hxx" - -#include <iostream> -#include <string.h> - -#include <libtextcat/textcat.h> -#include <altstrfunc.hxx> -#include <guess.hxx> - -using namespace std; - -Guess::Guess() -{ - language_str = DEFAULT_LANGUAGE; - country_str = DEFAULT_COUNTRY; - encoding_str = DEFAULT_ENCODING; -} - -/* -* this use a char * string to build the guess object -* a string like those is made as : [language-country-encoding]... -* -*/ - -Guess::Guess(char * guess_str) -{ - Guess(); - - string lang; - string country; - string enc; - - //if the guess is not like "UNKNOWN" or "SHORT", go into the brackets -// if(strncmp((const char*)(guess_str + 1), _TEXTCAT_RESULT_UNKOWN, strlen(_TEXTCAT_RESULT_UNKOWN)) != 0 -// && -// strncmp((const char*)(guess_str + 1), _TEXTCAT_RESULT_SHORT, strlen(_TEXTCAT_RESULT_SHORT)) != 0) -// { - if(strcmp((const char*)(guess_str + 1), _TEXTCAT_RESULT_UNKOWN) != 0 - && - strcmp((const char*)(guess_str + 1), _TEXTCAT_RESULT_SHORT) != 0) - { - - int current_pointer = 0; - - //this is to go to the first char of the guess string ( the '[' of "[en-US-utf8]" ) - while(!isSeparator(guess_str[current_pointer])){ - current_pointer++; - } - current_pointer++; - - //this is to pick up the language ( the "en" from "[en-US-utf8]" ) - while(!isSeparator(guess_str[current_pointer])){ - lang+=guess_str[current_pointer]; - current_pointer++; - } - current_pointer++; - - //this is to pick up the country ( the "US" from "[en-US-utf8]" ) - while(!isSeparator(guess_str[current_pointer])){ - country+=guess_str[current_pointer]; - current_pointer++; - } - current_pointer++; - - //this is to pick up the encoding ( the "utf8" from "[en-US-utf8]" ) - while(!isSeparator(guess_str[current_pointer])){ - enc+=guess_str[current_pointer]; - current_pointer++; - } - - if(lang!=""){//if not we use the default value - language_str=lang; - } - country_str=country; - - if(enc!=""){//if not we use the default value - encoding_str=enc; - } - } -} - -Guess::~Guess(){} - -string Guess::GetLanguage() -{ - return language_str; -} - -string Guess::GetCountry() -{ - return country_str; -} - -string Guess::GetEncoding() -{ - return encoding_str; -} - -bool Guess::operator==(string lang) -{ - string toString; - toString += GetLanguage(); - toString += "-"; - toString += GetCountry(); - toString += "-"; - toString += GetEncoding(); - return start(toString, lang); -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guess.hxx b/lingucomponent/source/languageguessing/guess.hxx deleted file mode 100644 index b05913bb2..000000000 --- a/lingucomponent/source/languageguessing/guess.hxx +++ /dev/null @@ -1,74 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/*************************************************************************** - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ -#ifndef GUESS_H -#define GUESS_H - -#define GUESS_SEPARATOR_OPEN '[' -#define GUESS_SEPARATOR_CLOSE ']' -#define GUESS_SEPARATOR_SEP '-' -#define DEFAULT_LANGUAGE "" -#define DEFAULT_COUNTRY "" -#define DEFAULT_ENCODING "" - -#include <string> - -using namespace std; - -/** -@author Jocelyn Merand - */ -class Guess{ - public: - - /** - * Default init - */ - Guess(); - - /** - * Init from a string like [en-UK-utf8] and the rank - */ - Guess(char * guess_str); - - ~Guess(); - - string GetLanguage(); - string GetCountry(); - string GetEncoding(); - - bool operator==(string lang); - - protected: - string language_str; - string country_str; - string encoding_str; -}; - -#endif - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/guesslang.component b/lingucomponent/source/languageguessing/guesslang.component deleted file mode 100644 index 633a489c3..000000000 --- a/lingucomponent/source/languageguessing/guesslang.component +++ /dev/null @@ -1,34 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!--********************************************************************** -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* Copyright 2000, 2010 Oracle and/or its affiliates. -* -* OpenOffice.org - a multi-platform office productivity suite -* -* This file is part of OpenOffice.org. -* -* OpenOffice.org is free software: you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License version 3 -* only, as published by the Free Software Foundation. -* -* OpenOffice.org is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License version 3 for more details -* (a copy is included in the LICENSE file that accompanied this code). -* -* You should have received a copy of the GNU Lesser General Public License -* version 3 along with OpenOffice.org. If not, see -* <http://www.openoffice.org/license.html> -* for a copy of the LGPLv3 License. -* -**********************************************************************--> - -<component loader="com.sun.star.loader.SharedLibrary" - xmlns="http://openoffice.org/2010/uno-components"> - <implementation name="com.sun.star.lingu2.LanguageGuessing"> - <service name="com.sun.star.linguistic2.LanguageGuessing"/> - </implementation> -</component> diff --git a/lingucomponent/source/languageguessing/guesslang.cxx b/lingucomponent/source/languageguessing/guesslang.cxx deleted file mode 100644 index 49e033040..000000000 --- a/lingucomponent/source/languageguessing/guesslang.cxx +++ /dev/null @@ -1,435 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_lingucomponent.hxx" - -#include <iostream> - -#include <tools/debug.hxx> - -#include <sal/config.h> -#include <cppuhelper/factory.hxx> -#include <cppuhelper/implementationentry.hxx> -#include <cppuhelper/implbase2.hxx> -#include <tools/string.hxx> - -#include <simpleguesser.hxx> -#include <guess.hxx> - -#include <com/sun/star/registry/XRegistryKey.hpp> -#include <com/sun/star/lang/XServiceInfo.hpp> -#include <com/sun/star/linguistic2/XLanguageGuessing.hpp> -#include <unotools/pathoptions.hxx> -#include <unotools/localfilehelper.hxx> -#include <osl/thread.h> - -#include <sal/macros.h> - -using namespace ::rtl; -using namespace ::osl; -using namespace ::cppu; -using namespace ::com::sun::star; -using namespace ::com::sun::star::uno; -using namespace ::com::sun::star::lang; -using namespace ::com::sun::star::linguistic2; - -namespace css = ::com::sun::star; - -//================================================================================================== - -#define A2OU(x) ::rtl::OUString::createFromAscii( x ) - -#define SERVICENAME "com.sun.star.linguistic2.LanguageGuessing" -#define IMPLNAME "com.sun.star.lingu2.LanguageGuessing" - -static Sequence< OUString > getSupportedServiceNames_LangGuess_Impl() -{ - Sequence<OUString> names(1); - names[0] = A2OU( SERVICENAME ); - return names; -} - -static OUString getImplementationName_LangGuess_Impl() -{ - return A2OU( IMPLNAME ); -} - -static osl::Mutex & GetLangGuessMutex() -{ - static osl::Mutex aMutex; - return aMutex; -} - - -class LangGuess_Impl : - public ::cppu::WeakImplHelper2< - XLanguageGuessing, - XServiceInfo > -{ - SimpleGuesser m_aGuesser; - bool m_bInitialized; - css::uno::Reference< css::uno::XComponentContext > m_xContext; - - LangGuess_Impl( const LangGuess_Impl & ); // not defined - LangGuess_Impl & operator =( const LangGuess_Impl & ); // not defined - - virtual ~LangGuess_Impl() {} - void EnsureInitialized(); - -public: - explicit LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext); - - // XServiceInfo implementation - virtual OUString SAL_CALL getImplementationName( ) throw(RuntimeException); - virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) throw(RuntimeException); - virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) throw(RuntimeException); - static Sequence< OUString > SAL_CALL getSupportedServiceNames_Static( ); - - // XLanguageGuessing implementation - virtual ::com::sun::star::lang::Locale SAL_CALL guessPrimaryLanguage( const ::rtl::OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException); - virtual void SAL_CALL disableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException); - virtual void SAL_CALL enableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException); - virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getAvailableLanguages( ) throw (::com::sun::star::uno::RuntimeException); - virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getEnabledLanguages( ) throw (::com::sun::star::uno::RuntimeException); - virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getDisabledLanguages( ) throw (::com::sun::star::uno::RuntimeException); - - // implementation specific - void SetFingerPrintsDB( const rtl::OUString &fileName ) throw (RuntimeException); - - static const OUString & SAL_CALL getImplementationName_Static() throw(); - -}; - -//************************************************************************* - -LangGuess_Impl::LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext) : - m_bInitialized( false ), - m_xContext( rxContext ) -{ -} - -//************************************************************************* - -void LangGuess_Impl::EnsureInitialized() -{ - if (!m_bInitialized) - { - // set this to true at the very start to prevent loops because of - // implicitly called functions below - m_bInitialized = true; - - // set default fingerprint path to where those get installed - String aPhysPath; - String aURL( SvtPathOptions().GetFingerprintPath() ); - utl::LocalFileHelper::ConvertURLToPhysicalName( aURL, aPhysPath ); -#ifdef WNT - aPhysPath += '\\'; -#else - aPhysPath += '/'; -#endif - - SetFingerPrintsDB( aPhysPath ); - - // - // disable currently not functional languages... - // - struct LangCountry - { - const char *pLang; - const char *pCountry; - }; - LangCountry aDisable[] = - { - {"gv", ""}, {"sco", ""}, // no lang-id available yet... -// {"hy", ""}, {"drt", ""}, // 0 bytes fingerprints... - {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""}, // not yet correct functional... - {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, - {"sa", ""}, {"ta", ""}, {"th", ""}, - {"qu", ""}, {"yi", ""} - }; - sal_Int32 nNum = SAL_N_ELEMENTS(aDisable); - Sequence< Locale > aDisableSeq( nNum ); - Locale *pDisableSeq = aDisableSeq.getArray(); - for (sal_Int32 i = 0; i < nNum; ++i) - { - Locale aLocale; - aLocale.Language = OUString::createFromAscii( aDisable[i].pLang ); - aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry ); - pDisableSeq[i] = aLocale; - } - disableLanguages( aDisableSeq ); - DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" ); - } -} - -//************************************************************************* - -Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage( - const ::rtl::OUString& rText, - ::sal_Int32 nStartPos, - ::sal_Int32 nLen ) - throw (lang::IllegalArgumentException, uno::RuntimeException) -{ - osl::MutexGuard aGuard( GetLangGuessMutex() ); - - EnsureInitialized(); - - lang::Locale aRes; - if (nStartPos >=0 && nLen >= 0 && nStartPos + nLen <= rText.getLength()) - { - OString o( OUStringToOString( rText.copy(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) ); - Guess g = m_aGuesser.GuessPrimaryLanguage((char*)o.getStr()); - aRes.Language = OUString::createFromAscii(g.GetLanguage().c_str()); - aRes.Country = OUString::createFromAscii(g.GetCountry().c_str()); - } - else - throw lang::IllegalArgumentException(); - - return aRes; -} - -//************************************************************************* -#define DEFAULT_CONF_FILE_NAME "fpdb.conf" - -void LangGuess_Impl::SetFingerPrintsDB( - const rtl::OUString &filePath ) - throw (RuntimeException) -{ - //! text encoding for file name / path needs to be in the same encoding the OS uses - OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() ); - OString conf_file_name( DEFAULT_CONF_FILE_NAME ); - OString conf_file_path(path); - conf_file_path += conf_file_name; - - m_aGuesser.SetDBPath((const char*)conf_file_path.getStr(), (const char*)path.getStr()); -} - -//************************************************************************* -uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( ) - throw (uno::RuntimeException) -{ - osl::MutexGuard aGuard( GetLangGuessMutex() ); - - EnsureInitialized(); - - Sequence< com::sun::star::lang::Locale > aRes; - vector<Guess> gs = m_aGuesser.GetAllManagedLanguages(); - aRes.realloc(gs.size()); - - com::sun::star::lang::Locale *pRes = aRes.getArray(); - - for(size_t i = 0; i < gs.size() ; i++ ){ - com::sun::star::lang::Locale current_aRes; - current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() ); - current_aRes.Country = A2OU( gs[i].GetCountry().c_str() ); - pRes[i] = current_aRes; - } - - return aRes; -} - -//************************************************************************* -uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( ) - throw (uno::RuntimeException) -{ - osl::MutexGuard aGuard( GetLangGuessMutex() ); - - EnsureInitialized(); - - Sequence< com::sun::star::lang::Locale > aRes; - vector<Guess> gs = m_aGuesser.GetAvailableLanguages(); - aRes.realloc(gs.size()); - - com::sun::star::lang::Locale *pRes = aRes.getArray(); - - for(size_t i = 0; i < gs.size() ; i++ ){ - com::sun::star::lang::Locale current_aRes; - current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() ); - current_aRes.Country = A2OU( gs[i].GetCountry().c_str() ); - pRes[i] = current_aRes; - } - - return aRes; -} - -//************************************************************************* -uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( ) - throw (uno::RuntimeException) -{ - osl::MutexGuard aGuard( GetLangGuessMutex() ); - - EnsureInitialized(); - - Sequence< com::sun::star::lang::Locale > aRes; - vector<Guess> gs = m_aGuesser.GetUnavailableLanguages(); - aRes.realloc(gs.size()); - - com::sun::star::lang::Locale *pRes = aRes.getArray(); - - for(size_t i = 0; i < gs.size() ; i++ ){ - com::sun::star::lang::Locale current_aRes; - current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() ); - current_aRes.Country = A2OU( gs[i].GetCountry().c_str() ); - pRes[i] = current_aRes; - } - - return aRes; -} - -//************************************************************************* -void SAL_CALL LangGuess_Impl::disableLanguages( - const uno::Sequence< Locale >& rLanguages ) - throw (lang::IllegalArgumentException, uno::RuntimeException) -{ - osl::MutexGuard aGuard( GetLangGuessMutex() ); - - EnsureInitialized(); - - sal_Int32 nLanguages = rLanguages.getLength(); - const Locale *pLanguages = rLanguages.getConstArray(); - - for (sal_Int32 i = 0; i < nLanguages; ++i) - { - string language; - - OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US ); - OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US ); - - language += l.getStr(); - language += "-"; - language += c.getStr(); - m_aGuesser.DisableLanguage(language); - } -} - -//************************************************************************* -void SAL_CALL LangGuess_Impl::enableLanguages( - const uno::Sequence< Locale >& rLanguages ) - throw (lang::IllegalArgumentException, uno::RuntimeException) -{ - osl::MutexGuard aGuard( GetLangGuessMutex() ); - - EnsureInitialized(); - - sal_Int32 nLanguages = rLanguages.getLength(); - const Locale *pLanguages = rLanguages.getConstArray(); - - for (sal_Int32 i = 0; i < nLanguages; ++i) - { - string language; - - OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US ); - OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US ); - - language += l.getStr(); - language += "-"; - language += c.getStr(); - m_aGuesser.EnableLanguage(language); - } -} - -//************************************************************************* -OUString SAL_CALL LangGuess_Impl::getImplementationName( ) - throw(RuntimeException) -{ - osl::MutexGuard aGuard( GetLangGuessMutex() ); - return A2OU( IMPLNAME ); -} - -//************************************************************************* -sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName ) - throw(RuntimeException) -{ - osl::MutexGuard aGuard( GetLangGuessMutex() ); - Sequence< OUString > aSNL = getSupportedServiceNames(); - const OUString * pArray = aSNL.getArray(); - for( sal_Int32 i = 0; i < aSNL.getLength(); i++ ) - if( pArray[i] == ServiceName ) - return sal_True; - return sal_False; -} - -//************************************************************************* -Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( ) - throw(RuntimeException) -{ - osl::MutexGuard aGuard( GetLangGuessMutex() ); - return getSupportedServiceNames_Static(); -} - -//************************************************************************* -Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames_Static( ) -{ - OUString aName( A2OU( SERVICENAME ) ); - return Sequence< OUString >( &aName, 1 ); -} - -//************************************************************************* - - -/** - * Function to create a new component instance; is needed by factory helper implementation. - * @param xMgr service manager to if the components needs other component instances - */ -Reference< XInterface > SAL_CALL LangGuess_Impl_create( - Reference< XComponentContext > const & xContext ) - SAL_THROW( () ) -{ - return static_cast< ::cppu::OWeakObject * >( new LangGuess_Impl(xContext) ); -} - -//################################################################################################## -//#### EXPORTED ### functions to allow for registration and creation of the UNO component -//################################################################################################## - -static struct ::cppu::ImplementationEntry s_component_entries [] = -{ - { - LangGuess_Impl_create, getImplementationName_LangGuess_Impl, - getSupportedServiceNames_LangGuess_Impl, - ::cppu::createSingleComponentFactory, - 0, 0 - }, - { 0, 0, 0, 0, 0, 0 } -}; - -extern "C" -{ - -SAL_DLLPUBLIC_EXPORT void * SAL_CALL component_getFactory( - sal_Char const * implName, lang::XMultiServiceFactory * xMgr, - registry::XRegistryKey * xRegistry ) -{ - return ::cppu::component_getFactoryHelper( - implName, xMgr, xRegistry, s_component_entries ); -} - -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/makefile.mk b/lingucomponent/source/languageguessing/makefile.mk deleted file mode 100644 index 624ccba3c..000000000 --- a/lingucomponent/source/languageguessing/makefile.mk +++ /dev/null @@ -1,88 +0,0 @@ -#************************************************************************* -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# Copyright 2000, 2010 Oracle and/or its affiliates. -# -# OpenOffice.org - a multi-platform office productivity suite -# -# This file is part of OpenOffice.org. -# -# OpenOffice.org is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License version 3 -# only, as published by the Free Software Foundation. -# -# OpenOffice.org is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License version 3 for more details -# (a copy is included in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU Lesser General Public License -# version 3 along with OpenOffice.org. If not, see -# <http://www.openoffice.org/license.html> -# for a copy of the LGPLv3 License. -# -#************************************************************************* - -PRJ=..$/.. -PRJNAME=lingucomponent - -TARGET=guesslang - -ENABLE_EXCEPTIONS=TRUE - -#----- Settings --------------------------------------------------------- - -.INCLUDE : settings.mk - -# --- Files -------------------------------------------------------- - -.IF "$(GUI)"=="UNX" || "$(GUI)"=="MAC" || "$(GUI)$(COM)"=="WNTGCC" -LIBTEXTCATLIB=-ltextcat -.ELSE # "$(GUI)"=="UNX" || "$(GUI)"=="MAC" -LIBTEXTCATLIB=ilibtextcat.lib -.ENDIF # "$(GUI)"=="UNX" || "$(GUI)"=="MAC" - -SLOFILES = \ - $(SLO)$/altstrfunc.obj \ - $(SLO)$/guess.obj \ - $(SLO)$/guesslang.obj \ - $(SLO)$/simpleguesser.obj - - -SHL1TARGET= $(TARGET)$(DLLPOSTFIX) - -SHL1STDLIBS= \ - $(CPPUHELPERLIB) \ - $(CPPULIB) \ - $(LIBTEXTCATLIB) \ - $(SALLIB) \ - $(SVLLIB) \ - $(TOOLSLIB) \ - $(UNOTOOLSLIB) - -# build DLL -SHL1LIBS= $(SLB)$/$(TARGET).lib -SHL1IMPLIB= i$(TARGET) -SHL1DEPN= $(SHL1LIBS) -SHL1DEF= $(MISC)$/$(SHL1TARGET).def -.IF "$(OS)"!="MACOSX" -SHL1VERSIONMAP=$(SOLARENV)/src/component.map -.ENDIF - -# build DEF file -DEF1NAME =$(SHL1TARGET) - -# --- Targets ------------------------------------------------------ - -.INCLUDE : target.mk - - -ALLTAR : $(MISC)/guesslang.component - -$(MISC)/guesslang.component .ERRREMOVE : $(SOLARENV)/bin/createcomponent.xslt \ - guesslang.component - $(XSLTPROC) --nonet --stringparam uri \ - '$(COMPONENTPREFIX_BASIS_NATIVE)$(SHL1TARGETN:f)' -o $@ \ - $(SOLARENV)/bin/createcomponent.xslt guesslang.component diff --git a/lingucomponent/source/languageguessing/simpleguesser.cxx b/lingucomponent/source/languageguessing/simpleguesser.cxx deleted file mode 100644 index 68c53318f..000000000 --- a/lingucomponent/source/languageguessing/simpleguesser.cxx +++ /dev/null @@ -1,237 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/*************************************************************************** - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - - /** - * - * - * - * - * TODO - * - Add exception throwing when h == NULL - * - Not init h when implicit constructor is launched - */ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_lingucomponent.hxx" - -#include <string.h> -#include <sstream> -#include <iostream> - -#include <libtextcat/textcat.h> -#include <libtextcat/common.h> -#include <libtextcat/constants.h> -#include <libtextcat/fingerprint.h> -#include <libtextcat/utf8misc.h> - -#include <sal/types.h> - -#include "altstrfunc.hxx" -#include "simpleguesser.hxx" - -#ifndef _UTF8_ -#define _UTF8_ -#endif - - -using namespace std; - - -/** - * This 3 following structures are from fingerprint.c and textcat.c - */ - -typedef struct ngram_t { - - sint2 rank; - char str[MAXNGRAMSIZE+1]; - -} ngram_t; - -typedef struct fp_t { - - const char *name; - ngram_t *fprint; - uint4 size; - -} fp_t; - -typedef struct textcat_t{ - - void **fprint; - char *fprint_disable; - uint4 size; - uint4 maxsize; - - char output[MAXOUTPUTSIZE]; - -} textcat_t; -/** end of the 3 structs */ - -SimpleGuesser::SimpleGuesser() -{ - h = NULL; -} - -void SimpleGuesser::operator=(SimpleGuesser& sg){ - if(h){textcat_Done(h);} - h = sg.h; -} - -SimpleGuesser::~SimpleGuesser() -{ - if(h){textcat_Done(h);} -} - - -/*! - \fn SimpleGuesser::GuessLanguage(char* text) - */ -vector<Guess> SimpleGuesser::GuessLanguage(char* text) -{ - vector<Guess> guesses; - - if(!h){return guesses;} - - //calculate le number of unicode charcters (symbols) - int len = utfstrlen(text); - - if( len > MAX_STRING_LENGTH_TO_ANALYSE ){len = MAX_STRING_LENGTH_TO_ANALYSE ;} - - char *guess_list = textcat_Classify(h, text, len); - - if(strcmp(guess_list, _TEXTCAT_RESULT_SHORT) == 0){ - return guesses; - } - - int current_pointer = 0; - - for(int i = 0; guess_list[current_pointer] != '\0'; i++) - { - while(guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0'){ - current_pointer++; - } - if(guess_list[current_pointer] != '\0') - { - Guess g((char*)(guess_list + current_pointer)); - - guesses.push_back(g); - - current_pointer++; - } - } - - return guesses; -} - -/*! - \fn SimpleGuesser::GuessPrimaryLanguage(char* text) - */ -Guess SimpleGuesser::GuessPrimaryLanguage(char* text) -{ - vector<Guess> ret = GuessLanguage(text); - if(!ret.empty()){ - return GuessLanguage(text)[0]; - } - else{ - return Guess(); - } -} -/** - * Is used to know wich language is available, unavailable or both - * when mask = 0xF0, return only Available - * when mask = 0x0F, return only Unavailable - * when mask = 0xFF, return both Available and Unavailable - */ -vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask) -{ - size_t i; - textcat_t *tables = (textcat_t*)h; - - vector<Guess> lang; - if(!h){return lang;} - - for (i=0; i<tables->size; i++) { - if(tables->fprint_disable[i] & mask){ - string langStr = "["; - langStr += (char*)fp_Name(tables->fprint[i]); - Guess g( (char *)langStr.c_str()); - lang.push_back(g); - } - } - - return lang; -} - -vector<Guess> SimpleGuesser::GetAvailableLanguages(){ - return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) ); -} - -vector<Guess> SimpleGuesser::GetUnavailableLanguages(){ - return GetManagedLanguages( sal::static_int_cast< char >( 0x0F )); -} - -vector<Guess> SimpleGuesser::GetAllManagedLanguages(){ - return GetManagedLanguages( sal::static_int_cast< char >( 0xFF )); -} - -void SimpleGuesser::XableLanguage(string lang, char mask){ - size_t i; - textcat_t *tables = (textcat_t*)h; - - if(!h){return;} - - for (i=0; i<tables->size; i++) { - string language(fp_Name(tables->fprint[i])); - if(start(language,lang) == 0){ - //cout << language << endl; - tables->fprint_disable[i] = mask; - //continue; - } - } -} - -void SimpleGuesser::EnableLanguage(string lang){ - XableLanguage(lang, sal::static_int_cast< char >( 0xF0 )); -} - -void SimpleGuesser::DisableLanguage(string lang){ - XableLanguage(lang, sal::static_int_cast< char >( 0x0F )); -} - -/** -* -*/ -void SimpleGuesser::SetDBPath(const char* path, const char* prefix){ - if(h){ - textcat_Done(h); - } - h = special_textcat_Init(path, prefix); -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lingucomponent/source/languageguessing/simpleguesser.hxx b/lingucomponent/source/languageguessing/simpleguesser.hxx deleted file mode 100644 index ee76b0781..000000000 --- a/lingucomponent/source/languageguessing/simpleguesser.hxx +++ /dev/null @@ -1,124 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/*************************************************************************** - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ -#ifndef SIMPLEGUESSER_H -#define SIMPLEGUESSER_H - -#include <string.h> -#include <string> -#include <cstdlib> -#include <vector> -#include <guess.hxx> - -#define MAX_STRING_LENGTH_TO_ANALYSE 200 - -using namespace std; - -/** -@author Jocelyn Merand -*/ -class SimpleGuesser{ -public: - /**inits the object with conf file "./conf.txt"*/ - SimpleGuesser(); - - /** Compares the current Simpleguesser with an other - * @param SimpleGuesser& sg the other guesser to compare - */ - void operator=(SimpleGuesser& sg); - - /** - * destroy the object - */ - ~SimpleGuesser(); - - /** - * Analyze a text and return the most probable languages of the text - * @param char* text is the text to analyze - * @return the list of guess - */ - vector<Guess> GuessLanguage(char* text); - - /** - * Analyze a text and return the most probable language of the text - * @param char* text is the text to analyze - * @return the guess (containing language) - */ - Guess GuessPrimaryLanguage(char* text); - - /** - * List all available languages (possibly to be in guesses) - * @return the list of languages - */ - vector<Guess> GetAvailableLanguages(); - - /** - * List all languages (possibly in guesses or not) - * @return the list of languages - */ - vector<Guess> GetAllManagedLanguages(); - - /** - * List all Unavailable languages (disable for any reason) - * @return the list of languages - */ - vector<Guess> GetUnavailableLanguages(); - - /** - * Mark a language enabled - * @param string lang the language to enable (build like language-COUNTRY-encoding) - */ - void EnableLanguage(string lang); - - /** - * Mark a language disabled - * @param string lang the language to disable (build like language-COUNTRY-encoding) - */ - void DisableLanguage(string lang); - - /** - * Load a new DB of fingerprints - * @param const char* thePathOfConfFile self explaining - * @param const char* prefix is the path where the directory witch contains fingerprint files is stored - */ - void SetDBPath(const char* thePathOfConfFile, const char* prefix); - -protected: - - //Where typical fingerprints (n-gram tables) are stored - void* h; - - //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both - vector<Guess> GetManagedLanguages(const char mask); - - //Like getManagedLanguages, this function enable or disable a language and it depends of the mask - void XableLanguage(string lang, char mask); -}; - -#endif - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |