summaryrefslogtreecommitdiff
path: root/lingucomponent/source/languageguessing/simpleguesser.cxx
diff options
context:
space:
mode:
authorMichael Stahl <mstahl@redhat.com>2012-01-28 20:52:45 +0100
committerMichael Stahl <mstahl@redhat.com>2012-01-28 20:52:45 +0100
commit2e626373db2412ac22e8c5c27a60d11cd29e875b (patch)
tree9e9f67205cd5b72f1031721273e1534a3a1e5b0f /lingucomponent/source/languageguessing/simpleguesser.cxx
parentf7ee7bbd5174b084f018c2ec94d8c70c98ee04da (diff)
replace obsolete "master" branch with README that points at new repoHEADmaster-deletedmaster
Diffstat (limited to 'lingucomponent/source/languageguessing/simpleguesser.cxx')
-rw-r--r--lingucomponent/source/languageguessing/simpleguesser.cxx237
1 files changed, 0 insertions, 237 deletions
diff --git a/lingucomponent/source/languageguessing/simpleguesser.cxx b/lingucomponent/source/languageguessing/simpleguesser.cxx
deleted file mode 100644
index 68c53318f..000000000
--- a/lingucomponent/source/languageguessing/simpleguesser.cxx
+++ /dev/null
@@ -1,237 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/***************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
- /**
- *
- *
- *
- *
- * TODO
- * - Add exception throwing when h == NULL
- * - Not init h when implicit constructor is launched
- */
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_lingucomponent.hxx"
-
-#include <string.h>
-#include <sstream>
-#include <iostream>
-
-#include <libtextcat/textcat.h>
-#include <libtextcat/common.h>
-#include <libtextcat/constants.h>
-#include <libtextcat/fingerprint.h>
-#include <libtextcat/utf8misc.h>
-
-#include <sal/types.h>
-
-#include "altstrfunc.hxx"
-#include "simpleguesser.hxx"
-
-#ifndef _UTF8_
-#define _UTF8_
-#endif
-
-
-using namespace std;
-
-
-/**
- * This 3 following structures are from fingerprint.c and textcat.c
- */
-
-typedef struct ngram_t {
-
- sint2 rank;
- char str[MAXNGRAMSIZE+1];
-
-} ngram_t;
-
-typedef struct fp_t {
-
- const char *name;
- ngram_t *fprint;
- uint4 size;
-
-} fp_t;
-
-typedef struct textcat_t{
-
- void **fprint;
- char *fprint_disable;
- uint4 size;
- uint4 maxsize;
-
- char output[MAXOUTPUTSIZE];
-
-} textcat_t;
-/** end of the 3 structs */
-
-SimpleGuesser::SimpleGuesser()
-{
- h = NULL;
-}
-
-void SimpleGuesser::operator=(SimpleGuesser& sg){
- if(h){textcat_Done(h);}
- h = sg.h;
-}
-
-SimpleGuesser::~SimpleGuesser()
-{
- if(h){textcat_Done(h);}
-}
-
-
-/*!
- \fn SimpleGuesser::GuessLanguage(char* text)
- */
-vector<Guess> SimpleGuesser::GuessLanguage(char* text)
-{
- vector<Guess> guesses;
-
- if(!h){return guesses;}
-
- //calculate le number of unicode charcters (symbols)
- int len = utfstrlen(text);
-
- if( len > MAX_STRING_LENGTH_TO_ANALYSE ){len = MAX_STRING_LENGTH_TO_ANALYSE ;}
-
- char *guess_list = textcat_Classify(h, text, len);
-
- if(strcmp(guess_list, _TEXTCAT_RESULT_SHORT) == 0){
- return guesses;
- }
-
- int current_pointer = 0;
-
- for(int i = 0; guess_list[current_pointer] != '\0'; i++)
- {
- while(guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0'){
- current_pointer++;
- }
- if(guess_list[current_pointer] != '\0')
- {
- Guess g((char*)(guess_list + current_pointer));
-
- guesses.push_back(g);
-
- current_pointer++;
- }
- }
-
- return guesses;
-}
-
-/*!
- \fn SimpleGuesser::GuessPrimaryLanguage(char* text)
- */
-Guess SimpleGuesser::GuessPrimaryLanguage(char* text)
-{
- vector<Guess> ret = GuessLanguage(text);
- if(!ret.empty()){
- return GuessLanguage(text)[0];
- }
- else{
- return Guess();
- }
-}
-/**
- * Is used to know wich language is available, unavailable or both
- * when mask = 0xF0, return only Available
- * when mask = 0x0F, return only Unavailable
- * when mask = 0xFF, return both Available and Unavailable
- */
-vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
-{
- size_t i;
- textcat_t *tables = (textcat_t*)h;
-
- vector<Guess> lang;
- if(!h){return lang;}
-
- for (i=0; i<tables->size; i++) {
- if(tables->fprint_disable[i] & mask){
- string langStr = "[";
- langStr += (char*)fp_Name(tables->fprint[i]);
- Guess g( (char *)langStr.c_str());
- lang.push_back(g);
- }
- }
-
- return lang;
-}
-
-vector<Guess> SimpleGuesser::GetAvailableLanguages(){
- return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
-}
-
-vector<Guess> SimpleGuesser::GetUnavailableLanguages(){
- return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
-}
-
-vector<Guess> SimpleGuesser::GetAllManagedLanguages(){
- return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
-}
-
-void SimpleGuesser::XableLanguage(string lang, char mask){
- size_t i;
- textcat_t *tables = (textcat_t*)h;
-
- if(!h){return;}
-
- for (i=0; i<tables->size; i++) {
- string language(fp_Name(tables->fprint[i]));
- if(start(language,lang) == 0){
- //cout << language << endl;
- tables->fprint_disable[i] = mask;
- //continue;
- }
- }
-}
-
-void SimpleGuesser::EnableLanguage(string lang){
- XableLanguage(lang, sal::static_int_cast< char >( 0xF0 ));
-}
-
-void SimpleGuesser::DisableLanguage(string lang){
- XableLanguage(lang, sal::static_int_cast< char >( 0x0F ));
-}
-
-/**
-*
-*/
-void SimpleGuesser::SetDBPath(const char* path, const char* prefix){
- if(h){
- textcat_Done(h);
- }
- h = special_textcat_Init(path, prefix);
-}
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */