summaryrefslogtreecommitdiff
path: root/lingucomponent/source/languageguessing
diff options
context:
space:
mode:
Diffstat (limited to 'lingucomponent/source/languageguessing')
-rw-r--r--lingucomponent/source/languageguessing/altstrfunc.cxx50
-rw-r--r--lingucomponent/source/languageguessing/altstrfunc.hxx43
-rw-r--r--lingucomponent/source/languageguessing/guess.cxx139
-rw-r--r--lingucomponent/source/languageguessing/guess.hxx74
-rw-r--r--lingucomponent/source/languageguessing/guesslang.component34
-rw-r--r--lingucomponent/source/languageguessing/guesslang.cxx435
-rw-r--r--lingucomponent/source/languageguessing/makefile.mk88
-rw-r--r--lingucomponent/source/languageguessing/simpleguesser.cxx237
-rw-r--r--lingucomponent/source/languageguessing/simpleguesser.hxx124
9 files changed, 0 insertions, 1224 deletions
diff --git a/lingucomponent/source/languageguessing/altstrfunc.cxx b/lingucomponent/source/languageguessing/altstrfunc.cxx
deleted file mode 100644
index 5461a2267..000000000
--- a/lingucomponent/source/languageguessing/altstrfunc.cxx
+++ /dev/null
@@ -1,50 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/***************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_lingucomponent.hxx"
-#include "altstrfunc.hxx"
-
-#include <sal/types.h>
-
-int start(const std::string &s1, const std::string &s2){
- size_t i;
- int ret = 0;
-
- size_t min = s1.length();
- if (min > s2.length())
- min = s2.length();
-
- for(i = 0; i < min && s2[i] && s1[i] && !ret; i++){
- ret = toupper(s1[i]) - toupper(s2[i]);
- if(s1[i] == '.' || s2[i] == '.'){ret = 0;}//. is a neutral character
- }
- return ret;
-}
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/languageguessing/altstrfunc.hxx b/lingucomponent/source/languageguessing/altstrfunc.hxx
deleted file mode 100644
index 5ddeda4ee..000000000
--- a/lingucomponent/source/languageguessing/altstrfunc.hxx
+++ /dev/null
@@ -1,43 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/***************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-#ifndef _ALT_STRFUNC_HXX_
-#define _ALT_STRFUNC_HXX_
-
-#include <string>
-#include <guess.hxx>
-
-inline bool isSeparator(const char c){
- return c == GUESS_SEPARATOR_OPEN || c == GUESS_SEPARATOR_SEP || c == GUESS_SEPARATOR_CLOSE || c == '\0';
-}
-
-int start(const std::string &s1, const std::string &s2);
-
-#endif
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/languageguessing/guess.cxx b/lingucomponent/source/languageguessing/guess.cxx
deleted file mode 100644
index 2a377dc0f..000000000
--- a/lingucomponent/source/languageguessing/guess.cxx
+++ /dev/null
@@ -1,139 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/***************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_lingucomponent.hxx"
-
-#include <iostream>
-#include <string.h>
-
-#include <libtextcat/textcat.h>
-#include <altstrfunc.hxx>
-#include <guess.hxx>
-
-using namespace std;
-
-Guess::Guess()
-{
- language_str = DEFAULT_LANGUAGE;
- country_str = DEFAULT_COUNTRY;
- encoding_str = DEFAULT_ENCODING;
-}
-
-/*
-* this use a char * string to build the guess object
-* a string like those is made as : [language-country-encoding]...
-*
-*/
-
-Guess::Guess(char * guess_str)
-{
- Guess();
-
- string lang;
- string country;
- string enc;
-
- //if the guess is not like "UNKNOWN" or "SHORT", go into the brackets
-// if(strncmp((const char*)(guess_str + 1), _TEXTCAT_RESULT_UNKOWN, strlen(_TEXTCAT_RESULT_UNKOWN)) != 0
-// &&
-// strncmp((const char*)(guess_str + 1), _TEXTCAT_RESULT_SHORT, strlen(_TEXTCAT_RESULT_SHORT)) != 0)
-// {
- if(strcmp((const char*)(guess_str + 1), _TEXTCAT_RESULT_UNKOWN) != 0
- &&
- strcmp((const char*)(guess_str + 1), _TEXTCAT_RESULT_SHORT) != 0)
- {
-
- int current_pointer = 0;
-
- //this is to go to the first char of the guess string ( the '[' of "[en-US-utf8]" )
- while(!isSeparator(guess_str[current_pointer])){
- current_pointer++;
- }
- current_pointer++;
-
- //this is to pick up the language ( the "en" from "[en-US-utf8]" )
- while(!isSeparator(guess_str[current_pointer])){
- lang+=guess_str[current_pointer];
- current_pointer++;
- }
- current_pointer++;
-
- //this is to pick up the country ( the "US" from "[en-US-utf8]" )
- while(!isSeparator(guess_str[current_pointer])){
- country+=guess_str[current_pointer];
- current_pointer++;
- }
- current_pointer++;
-
- //this is to pick up the encoding ( the "utf8" from "[en-US-utf8]" )
- while(!isSeparator(guess_str[current_pointer])){
- enc+=guess_str[current_pointer];
- current_pointer++;
- }
-
- if(lang!=""){//if not we use the default value
- language_str=lang;
- }
- country_str=country;
-
- if(enc!=""){//if not we use the default value
- encoding_str=enc;
- }
- }
-}
-
-Guess::~Guess(){}
-
-string Guess::GetLanguage()
-{
- return language_str;
-}
-
-string Guess::GetCountry()
-{
- return country_str;
-}
-
-string Guess::GetEncoding()
-{
- return encoding_str;
-}
-
-bool Guess::operator==(string lang)
-{
- string toString;
- toString += GetLanguage();
- toString += "-";
- toString += GetCountry();
- toString += "-";
- toString += GetEncoding();
- return start(toString, lang);
-}
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/languageguessing/guess.hxx b/lingucomponent/source/languageguessing/guess.hxx
deleted file mode 100644
index b05913bb2..000000000
--- a/lingucomponent/source/languageguessing/guess.hxx
+++ /dev/null
@@ -1,74 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/***************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-#ifndef GUESS_H
-#define GUESS_H
-
-#define GUESS_SEPARATOR_OPEN '['
-#define GUESS_SEPARATOR_CLOSE ']'
-#define GUESS_SEPARATOR_SEP '-'
-#define DEFAULT_LANGUAGE ""
-#define DEFAULT_COUNTRY ""
-#define DEFAULT_ENCODING ""
-
-#include <string>
-
-using namespace std;
-
-/**
-@author Jocelyn Merand
- */
-class Guess{
- public:
-
- /**
- * Default init
- */
- Guess();
-
- /**
- * Init from a string like [en-UK-utf8] and the rank
- */
- Guess(char * guess_str);
-
- ~Guess();
-
- string GetLanguage();
- string GetCountry();
- string GetEncoding();
-
- bool operator==(string lang);
-
- protected:
- string language_str;
- string country_str;
- string encoding_str;
-};
-
-#endif
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/languageguessing/guesslang.component b/lingucomponent/source/languageguessing/guesslang.component
deleted file mode 100644
index 633a489c3..000000000
--- a/lingucomponent/source/languageguessing/guesslang.component
+++ /dev/null
@@ -1,34 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--**********************************************************************
-*
-* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-*
-* Copyright 2000, 2010 Oracle and/or its affiliates.
-*
-* OpenOffice.org - a multi-platform office productivity suite
-*
-* This file is part of OpenOffice.org.
-*
-* OpenOffice.org is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Lesser General Public License version 3
-* only, as published by the Free Software Foundation.
-*
-* OpenOffice.org is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Lesser General Public License version 3 for more details
-* (a copy is included in the LICENSE file that accompanied this code).
-*
-* You should have received a copy of the GNU Lesser General Public License
-* version 3 along with OpenOffice.org. If not, see
-* <http://www.openoffice.org/license.html>
-* for a copy of the LGPLv3 License.
-*
-**********************************************************************-->
-
-<component loader="com.sun.star.loader.SharedLibrary"
- xmlns="http://openoffice.org/2010/uno-components">
- <implementation name="com.sun.star.lingu2.LanguageGuessing">
- <service name="com.sun.star.linguistic2.LanguageGuessing"/>
- </implementation>
-</component>
diff --git a/lingucomponent/source/languageguessing/guesslang.cxx b/lingucomponent/source/languageguessing/guesslang.cxx
deleted file mode 100644
index 49e033040..000000000
--- a/lingucomponent/source/languageguessing/guesslang.cxx
+++ /dev/null
@@ -1,435 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_lingucomponent.hxx"
-
-#include <iostream>
-
-#include <tools/debug.hxx>
-
-#include <sal/config.h>
-#include <cppuhelper/factory.hxx>
-#include <cppuhelper/implementationentry.hxx>
-#include <cppuhelper/implbase2.hxx>
-#include <tools/string.hxx>
-
-#include <simpleguesser.hxx>
-#include <guess.hxx>
-
-#include <com/sun/star/registry/XRegistryKey.hpp>
-#include <com/sun/star/lang/XServiceInfo.hpp>
-#include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
-#include <unotools/pathoptions.hxx>
-#include <unotools/localfilehelper.hxx>
-#include <osl/thread.h>
-
-#include <sal/macros.h>
-
-using namespace ::rtl;
-using namespace ::osl;
-using namespace ::cppu;
-using namespace ::com::sun::star;
-using namespace ::com::sun::star::uno;
-using namespace ::com::sun::star::lang;
-using namespace ::com::sun::star::linguistic2;
-
-namespace css = ::com::sun::star;
-
-//==================================================================================================
-
-#define A2OU(x) ::rtl::OUString::createFromAscii( x )
-
-#define SERVICENAME "com.sun.star.linguistic2.LanguageGuessing"
-#define IMPLNAME "com.sun.star.lingu2.LanguageGuessing"
-
-static Sequence< OUString > getSupportedServiceNames_LangGuess_Impl()
-{
- Sequence<OUString> names(1);
- names[0] = A2OU( SERVICENAME );
- return names;
-}
-
-static OUString getImplementationName_LangGuess_Impl()
-{
- return A2OU( IMPLNAME );
-}
-
-static osl::Mutex & GetLangGuessMutex()
-{
- static osl::Mutex aMutex;
- return aMutex;
-}
-
-
-class LangGuess_Impl :
- public ::cppu::WeakImplHelper2<
- XLanguageGuessing,
- XServiceInfo >
-{
- SimpleGuesser m_aGuesser;
- bool m_bInitialized;
- css::uno::Reference< css::uno::XComponentContext > m_xContext;
-
- LangGuess_Impl( const LangGuess_Impl & ); // not defined
- LangGuess_Impl & operator =( const LangGuess_Impl & ); // not defined
-
- virtual ~LangGuess_Impl() {}
- void EnsureInitialized();
-
-public:
- explicit LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext);
-
- // XServiceInfo implementation
- virtual OUString SAL_CALL getImplementationName( ) throw(RuntimeException);
- virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) throw(RuntimeException);
- virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) throw(RuntimeException);
- static Sequence< OUString > SAL_CALL getSupportedServiceNames_Static( );
-
- // XLanguageGuessing implementation
- virtual ::com::sun::star::lang::Locale SAL_CALL guessPrimaryLanguage( const ::rtl::OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
- virtual void SAL_CALL disableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
- virtual void SAL_CALL enableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
- virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getAvailableLanguages( ) throw (::com::sun::star::uno::RuntimeException);
- virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getEnabledLanguages( ) throw (::com::sun::star::uno::RuntimeException);
- virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getDisabledLanguages( ) throw (::com::sun::star::uno::RuntimeException);
-
- // implementation specific
- void SetFingerPrintsDB( const rtl::OUString &fileName ) throw (RuntimeException);
-
- static const OUString & SAL_CALL getImplementationName_Static() throw();
-
-};
-
-//*************************************************************************
-
-LangGuess_Impl::LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext) :
- m_bInitialized( false ),
- m_xContext( rxContext )
-{
-}
-
-//*************************************************************************
-
-void LangGuess_Impl::EnsureInitialized()
-{
- if (!m_bInitialized)
- {
- // set this to true at the very start to prevent loops because of
- // implicitly called functions below
- m_bInitialized = true;
-
- // set default fingerprint path to where those get installed
- String aPhysPath;
- String aURL( SvtPathOptions().GetFingerprintPath() );
- utl::LocalFileHelper::ConvertURLToPhysicalName( aURL, aPhysPath );
-#ifdef WNT
- aPhysPath += '\\';
-#else
- aPhysPath += '/';
-#endif
-
- SetFingerPrintsDB( aPhysPath );
-
- //
- // disable currently not functional languages...
- //
- struct LangCountry
- {
- const char *pLang;
- const char *pCountry;
- };
- LangCountry aDisable[] =
- {
- {"gv", ""}, {"sco", ""}, // no lang-id available yet...
-// {"hy", ""}, {"drt", ""}, // 0 bytes fingerprints...
- {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""}, // not yet correct functional...
- {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""},
- {"sa", ""}, {"ta", ""}, {"th", ""},
- {"qu", ""}, {"yi", ""}
- };
- sal_Int32 nNum = SAL_N_ELEMENTS(aDisable);
- Sequence< Locale > aDisableSeq( nNum );
- Locale *pDisableSeq = aDisableSeq.getArray();
- for (sal_Int32 i = 0; i < nNum; ++i)
- {
- Locale aLocale;
- aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
- aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry );
- pDisableSeq[i] = aLocale;
- }
- disableLanguages( aDisableSeq );
- DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
- }
-}
-
-//*************************************************************************
-
-Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage(
- const ::rtl::OUString& rText,
- ::sal_Int32 nStartPos,
- ::sal_Int32 nLen )
- throw (lang::IllegalArgumentException, uno::RuntimeException)
-{
- osl::MutexGuard aGuard( GetLangGuessMutex() );
-
- EnsureInitialized();
-
- lang::Locale aRes;
- if (nStartPos >=0 && nLen >= 0 && nStartPos + nLen <= rText.getLength())
- {
- OString o( OUStringToOString( rText.copy(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
- Guess g = m_aGuesser.GuessPrimaryLanguage((char*)o.getStr());
- aRes.Language = OUString::createFromAscii(g.GetLanguage().c_str());
- aRes.Country = OUString::createFromAscii(g.GetCountry().c_str());
- }
- else
- throw lang::IllegalArgumentException();
-
- return aRes;
-}
-
-//*************************************************************************
-#define DEFAULT_CONF_FILE_NAME "fpdb.conf"
-
-void LangGuess_Impl::SetFingerPrintsDB(
- const rtl::OUString &filePath )
- throw (RuntimeException)
-{
- //! text encoding for file name / path needs to be in the same encoding the OS uses
- OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
- OString conf_file_name( DEFAULT_CONF_FILE_NAME );
- OString conf_file_path(path);
- conf_file_path += conf_file_name;
-
- m_aGuesser.SetDBPath((const char*)conf_file_path.getStr(), (const char*)path.getStr());
-}
-
-//*************************************************************************
-uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( )
- throw (uno::RuntimeException)
-{
- osl::MutexGuard aGuard( GetLangGuessMutex() );
-
- EnsureInitialized();
-
- Sequence< com::sun::star::lang::Locale > aRes;
- vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
- aRes.realloc(gs.size());
-
- com::sun::star::lang::Locale *pRes = aRes.getArray();
-
- for(size_t i = 0; i < gs.size() ; i++ ){
- com::sun::star::lang::Locale current_aRes;
- current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() );
- current_aRes.Country = A2OU( gs[i].GetCountry().c_str() );
- pRes[i] = current_aRes;
- }
-
- return aRes;
-}
-
-//*************************************************************************
-uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( )
- throw (uno::RuntimeException)
-{
- osl::MutexGuard aGuard( GetLangGuessMutex() );
-
- EnsureInitialized();
-
- Sequence< com::sun::star::lang::Locale > aRes;
- vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
- aRes.realloc(gs.size());
-
- com::sun::star::lang::Locale *pRes = aRes.getArray();
-
- for(size_t i = 0; i < gs.size() ; i++ ){
- com::sun::star::lang::Locale current_aRes;
- current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() );
- current_aRes.Country = A2OU( gs[i].GetCountry().c_str() );
- pRes[i] = current_aRes;
- }
-
- return aRes;
-}
-
-//*************************************************************************
-uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( )
- throw (uno::RuntimeException)
-{
- osl::MutexGuard aGuard( GetLangGuessMutex() );
-
- EnsureInitialized();
-
- Sequence< com::sun::star::lang::Locale > aRes;
- vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
- aRes.realloc(gs.size());
-
- com::sun::star::lang::Locale *pRes = aRes.getArray();
-
- for(size_t i = 0; i < gs.size() ; i++ ){
- com::sun::star::lang::Locale current_aRes;
- current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() );
- current_aRes.Country = A2OU( gs[i].GetCountry().c_str() );
- pRes[i] = current_aRes;
- }
-
- return aRes;
-}
-
-//*************************************************************************
-void SAL_CALL LangGuess_Impl::disableLanguages(
- const uno::Sequence< Locale >& rLanguages )
- throw (lang::IllegalArgumentException, uno::RuntimeException)
-{
- osl::MutexGuard aGuard( GetLangGuessMutex() );
-
- EnsureInitialized();
-
- sal_Int32 nLanguages = rLanguages.getLength();
- const Locale *pLanguages = rLanguages.getConstArray();
-
- for (sal_Int32 i = 0; i < nLanguages; ++i)
- {
- string language;
-
- OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
- OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
-
- language += l.getStr();
- language += "-";
- language += c.getStr();
- m_aGuesser.DisableLanguage(language);
- }
-}
-
-//*************************************************************************
-void SAL_CALL LangGuess_Impl::enableLanguages(
- const uno::Sequence< Locale >& rLanguages )
- throw (lang::IllegalArgumentException, uno::RuntimeException)
-{
- osl::MutexGuard aGuard( GetLangGuessMutex() );
-
- EnsureInitialized();
-
- sal_Int32 nLanguages = rLanguages.getLength();
- const Locale *pLanguages = rLanguages.getConstArray();
-
- for (sal_Int32 i = 0; i < nLanguages; ++i)
- {
- string language;
-
- OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
- OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
-
- language += l.getStr();
- language += "-";
- language += c.getStr();
- m_aGuesser.EnableLanguage(language);
- }
-}
-
-//*************************************************************************
-OUString SAL_CALL LangGuess_Impl::getImplementationName( )
- throw(RuntimeException)
-{
- osl::MutexGuard aGuard( GetLangGuessMutex() );
- return A2OU( IMPLNAME );
-}
-
-//*************************************************************************
-sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
- throw(RuntimeException)
-{
- osl::MutexGuard aGuard( GetLangGuessMutex() );
- Sequence< OUString > aSNL = getSupportedServiceNames();
- const OUString * pArray = aSNL.getArray();
- for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
- if( pArray[i] == ServiceName )
- return sal_True;
- return sal_False;
-}
-
-//*************************************************************************
-Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( )
- throw(RuntimeException)
-{
- osl::MutexGuard aGuard( GetLangGuessMutex() );
- return getSupportedServiceNames_Static();
-}
-
-//*************************************************************************
-Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames_Static( )
-{
- OUString aName( A2OU( SERVICENAME ) );
- return Sequence< OUString >( &aName, 1 );
-}
-
-//*************************************************************************
-
-
-/**
- * Function to create a new component instance; is needed by factory helper implementation.
- * @param xMgr service manager to if the components needs other component instances
- */
-Reference< XInterface > SAL_CALL LangGuess_Impl_create(
- Reference< XComponentContext > const & xContext )
- SAL_THROW( () )
-{
- return static_cast< ::cppu::OWeakObject * >( new LangGuess_Impl(xContext) );
-}
-
-//##################################################################################################
-//#### EXPORTED ### functions to allow for registration and creation of the UNO component
-//##################################################################################################
-
-static struct ::cppu::ImplementationEntry s_component_entries [] =
-{
- {
- LangGuess_Impl_create, getImplementationName_LangGuess_Impl,
- getSupportedServiceNames_LangGuess_Impl,
- ::cppu::createSingleComponentFactory,
- 0, 0
- },
- { 0, 0, 0, 0, 0, 0 }
-};
-
-extern "C"
-{
-
-SAL_DLLPUBLIC_EXPORT void * SAL_CALL component_getFactory(
- sal_Char const * implName, lang::XMultiServiceFactory * xMgr,
- registry::XRegistryKey * xRegistry )
-{
- return ::cppu::component_getFactoryHelper(
- implName, xMgr, xRegistry, s_component_entries );
-}
-
-}
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/languageguessing/makefile.mk b/lingucomponent/source/languageguessing/makefile.mk
deleted file mode 100644
index 624ccba3c..000000000
--- a/lingucomponent/source/languageguessing/makefile.mk
+++ /dev/null
@@ -1,88 +0,0 @@
-#*************************************************************************
-#
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# Copyright 2000, 2010 Oracle and/or its affiliates.
-#
-# OpenOffice.org - a multi-platform office productivity suite
-#
-# This file is part of OpenOffice.org.
-#
-# OpenOffice.org is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License version 3
-# only, as published by the Free Software Foundation.
-#
-# OpenOffice.org is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License version 3 for more details
-# (a copy is included in the LICENSE file that accompanied this code).
-#
-# You should have received a copy of the GNU Lesser General Public License
-# version 3 along with OpenOffice.org. If not, see
-# <http://www.openoffice.org/license.html>
-# for a copy of the LGPLv3 License.
-#
-#*************************************************************************
-
-PRJ=..$/..
-PRJNAME=lingucomponent
-
-TARGET=guesslang
-
-ENABLE_EXCEPTIONS=TRUE
-
-#----- Settings ---------------------------------------------------------
-
-.INCLUDE : settings.mk
-
-# --- Files --------------------------------------------------------
-
-.IF "$(GUI)"=="UNX" || "$(GUI)"=="MAC" || "$(GUI)$(COM)"=="WNTGCC"
-LIBTEXTCATLIB=-ltextcat
-.ELSE # "$(GUI)"=="UNX" || "$(GUI)"=="MAC"
-LIBTEXTCATLIB=ilibtextcat.lib
-.ENDIF # "$(GUI)"=="UNX" || "$(GUI)"=="MAC"
-
-SLOFILES = \
- $(SLO)$/altstrfunc.obj \
- $(SLO)$/guess.obj \
- $(SLO)$/guesslang.obj \
- $(SLO)$/simpleguesser.obj
-
-
-SHL1TARGET= $(TARGET)$(DLLPOSTFIX)
-
-SHL1STDLIBS= \
- $(CPPUHELPERLIB) \
- $(CPPULIB) \
- $(LIBTEXTCATLIB) \
- $(SALLIB) \
- $(SVLLIB) \
- $(TOOLSLIB) \
- $(UNOTOOLSLIB)
-
-# build DLL
-SHL1LIBS= $(SLB)$/$(TARGET).lib
-SHL1IMPLIB= i$(TARGET)
-SHL1DEPN= $(SHL1LIBS)
-SHL1DEF= $(MISC)$/$(SHL1TARGET).def
-.IF "$(OS)"!="MACOSX"
-SHL1VERSIONMAP=$(SOLARENV)/src/component.map
-.ENDIF
-
-# build DEF file
-DEF1NAME =$(SHL1TARGET)
-
-# --- Targets ------------------------------------------------------
-
-.INCLUDE : target.mk
-
-
-ALLTAR : $(MISC)/guesslang.component
-
-$(MISC)/guesslang.component .ERRREMOVE : $(SOLARENV)/bin/createcomponent.xslt \
- guesslang.component
- $(XSLTPROC) --nonet --stringparam uri \
- '$(COMPONENTPREFIX_BASIS_NATIVE)$(SHL1TARGETN:f)' -o $@ \
- $(SOLARENV)/bin/createcomponent.xslt guesslang.component
diff --git a/lingucomponent/source/languageguessing/simpleguesser.cxx b/lingucomponent/source/languageguessing/simpleguesser.cxx
deleted file mode 100644
index 68c53318f..000000000
--- a/lingucomponent/source/languageguessing/simpleguesser.cxx
+++ /dev/null
@@ -1,237 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/***************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
- /**
- *
- *
- *
- *
- * TODO
- * - Add exception throwing when h == NULL
- * - Not init h when implicit constructor is launched
- */
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_lingucomponent.hxx"
-
-#include <string.h>
-#include <sstream>
-#include <iostream>
-
-#include <libtextcat/textcat.h>
-#include <libtextcat/common.h>
-#include <libtextcat/constants.h>
-#include <libtextcat/fingerprint.h>
-#include <libtextcat/utf8misc.h>
-
-#include <sal/types.h>
-
-#include "altstrfunc.hxx"
-#include "simpleguesser.hxx"
-
-#ifndef _UTF8_
-#define _UTF8_
-#endif
-
-
-using namespace std;
-
-
-/**
- * This 3 following structures are from fingerprint.c and textcat.c
- */
-
-typedef struct ngram_t {
-
- sint2 rank;
- char str[MAXNGRAMSIZE+1];
-
-} ngram_t;
-
-typedef struct fp_t {
-
- const char *name;
- ngram_t *fprint;
- uint4 size;
-
-} fp_t;
-
-typedef struct textcat_t{
-
- void **fprint;
- char *fprint_disable;
- uint4 size;
- uint4 maxsize;
-
- char output[MAXOUTPUTSIZE];
-
-} textcat_t;
-/** end of the 3 structs */
-
-SimpleGuesser::SimpleGuesser()
-{
- h = NULL;
-}
-
-void SimpleGuesser::operator=(SimpleGuesser& sg){
- if(h){textcat_Done(h);}
- h = sg.h;
-}
-
-SimpleGuesser::~SimpleGuesser()
-{
- if(h){textcat_Done(h);}
-}
-
-
-/*!
- \fn SimpleGuesser::GuessLanguage(char* text)
- */
-vector<Guess> SimpleGuesser::GuessLanguage(char* text)
-{
- vector<Guess> guesses;
-
- if(!h){return guesses;}
-
- //calculate le number of unicode charcters (symbols)
- int len = utfstrlen(text);
-
- if( len > MAX_STRING_LENGTH_TO_ANALYSE ){len = MAX_STRING_LENGTH_TO_ANALYSE ;}
-
- char *guess_list = textcat_Classify(h, text, len);
-
- if(strcmp(guess_list, _TEXTCAT_RESULT_SHORT) == 0){
- return guesses;
- }
-
- int current_pointer = 0;
-
- for(int i = 0; guess_list[current_pointer] != '\0'; i++)
- {
- while(guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0'){
- current_pointer++;
- }
- if(guess_list[current_pointer] != '\0')
- {
- Guess g((char*)(guess_list + current_pointer));
-
- guesses.push_back(g);
-
- current_pointer++;
- }
- }
-
- return guesses;
-}
-
-/*!
- \fn SimpleGuesser::GuessPrimaryLanguage(char* text)
- */
-Guess SimpleGuesser::GuessPrimaryLanguage(char* text)
-{
- vector<Guess> ret = GuessLanguage(text);
- if(!ret.empty()){
- return GuessLanguage(text)[0];
- }
- else{
- return Guess();
- }
-}
-/**
- * Is used to know wich language is available, unavailable or both
- * when mask = 0xF0, return only Available
- * when mask = 0x0F, return only Unavailable
- * when mask = 0xFF, return both Available and Unavailable
- */
-vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
-{
- size_t i;
- textcat_t *tables = (textcat_t*)h;
-
- vector<Guess> lang;
- if(!h){return lang;}
-
- for (i=0; i<tables->size; i++) {
- if(tables->fprint_disable[i] & mask){
- string langStr = "[";
- langStr += (char*)fp_Name(tables->fprint[i]);
- Guess g( (char *)langStr.c_str());
- lang.push_back(g);
- }
- }
-
- return lang;
-}
-
-vector<Guess> SimpleGuesser::GetAvailableLanguages(){
- return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
-}
-
-vector<Guess> SimpleGuesser::GetUnavailableLanguages(){
- return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
-}
-
-vector<Guess> SimpleGuesser::GetAllManagedLanguages(){
- return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
-}
-
-void SimpleGuesser::XableLanguage(string lang, char mask){
- size_t i;
- textcat_t *tables = (textcat_t*)h;
-
- if(!h){return;}
-
- for (i=0; i<tables->size; i++) {
- string language(fp_Name(tables->fprint[i]));
- if(start(language,lang) == 0){
- //cout << language << endl;
- tables->fprint_disable[i] = mask;
- //continue;
- }
- }
-}
-
-void SimpleGuesser::EnableLanguage(string lang){
- XableLanguage(lang, sal::static_int_cast< char >( 0xF0 ));
-}
-
-void SimpleGuesser::DisableLanguage(string lang){
- XableLanguage(lang, sal::static_int_cast< char >( 0x0F ));
-}
-
-/**
-*
-*/
-void SimpleGuesser::SetDBPath(const char* path, const char* prefix){
- if(h){
- textcat_Done(h);
- }
- h = special_textcat_Init(path, prefix);
-}
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lingucomponent/source/languageguessing/simpleguesser.hxx b/lingucomponent/source/languageguessing/simpleguesser.hxx
deleted file mode 100644
index ee76b0781..000000000
--- a/lingucomponent/source/languageguessing/simpleguesser.hxx
+++ /dev/null
@@ -1,124 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/***************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-#ifndef SIMPLEGUESSER_H
-#define SIMPLEGUESSER_H
-
-#include <string.h>
-#include <string>
-#include <cstdlib>
-#include <vector>
-#include <guess.hxx>
-
-#define MAX_STRING_LENGTH_TO_ANALYSE 200
-
-using namespace std;
-
-/**
-@author Jocelyn Merand
-*/
-class SimpleGuesser{
-public:
- /**inits the object with conf file "./conf.txt"*/
- SimpleGuesser();
-
- /** Compares the current Simpleguesser with an other
- * @param SimpleGuesser& sg the other guesser to compare
- */
- void operator=(SimpleGuesser& sg);
-
- /**
- * destroy the object
- */
- ~SimpleGuesser();
-
- /**
- * Analyze a text and return the most probable languages of the text
- * @param char* text is the text to analyze
- * @return the list of guess
- */
- vector<Guess> GuessLanguage(char* text);
-
- /**
- * Analyze a text and return the most probable language of the text
- * @param char* text is the text to analyze
- * @return the guess (containing language)
- */
- Guess GuessPrimaryLanguage(char* text);
-
- /**
- * List all available languages (possibly to be in guesses)
- * @return the list of languages
- */
- vector<Guess> GetAvailableLanguages();
-
- /**
- * List all languages (possibly in guesses or not)
- * @return the list of languages
- */
- vector<Guess> GetAllManagedLanguages();
-
- /**
- * List all Unavailable languages (disable for any reason)
- * @return the list of languages
- */
- vector<Guess> GetUnavailableLanguages();
-
- /**
- * Mark a language enabled
- * @param string lang the language to enable (build like language-COUNTRY-encoding)
- */
- void EnableLanguage(string lang);
-
- /**
- * Mark a language disabled
- * @param string lang the language to disable (build like language-COUNTRY-encoding)
- */
- void DisableLanguage(string lang);
-
- /**
- * Load a new DB of fingerprints
- * @param const char* thePathOfConfFile self explaining
- * @param const char* prefix is the path where the directory witch contains fingerprint files is stored
- */
- void SetDBPath(const char* thePathOfConfFile, const char* prefix);
-
-protected:
-
- //Where typical fingerprints (n-gram tables) are stored
- void* h;
-
- //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both
- vector<Guess> GetManagedLanguages(const char mask);
-
- //Like getManagedLanguages, this function enable or disable a language and it depends of the mask
- void XableLanguage(string lang, char mask);
-};
-
-#endif
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */