summaryrefslogtreecommitdiff
path: root/lingucomponent/source/thesaurus/mythes/mythes.cxx
diff options
context:
space:
mode:
authorThomas Lange [tl] <tl@openoffice.org>2010-08-05 17:16:04 +0200
committerThomas Lange [tl] <tl@openoffice.org>2010-08-05 17:16:04 +0200
commitc1141aade116acee20e54859e75113775e011933 (patch)
tree928c9bc2ccb161bae1231a4da92a948fa26f5ea2 /lingucomponent/source/thesaurus/mythes/mythes.cxx
parentc1a68ed683718e351567eb0fdd1b29e83313a012 (diff)
parentf18da7dd3a28225ff2c34182a06f932addfcf022 (diff)
cws tl82: merge with DEV300_m86
Diffstat (limited to 'lingucomponent/source/thesaurus/mythes/mythes.cxx')
-rw-r--r--lingucomponent/source/thesaurus/mythes/mythes.cxx403
1 files changed, 0 insertions, 403 deletions
diff --git a/lingucomponent/source/thesaurus/mythes/mythes.cxx b/lingucomponent/source/thesaurus/mythes/mythes.cxx
deleted file mode 100644
index b8cd8e680..000000000
--- a/lingucomponent/source/thesaurus/mythes/mythes.cxx
+++ /dev/null
@@ -1,403 +0,0 @@
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_lingucomponent.hxx"
-#include "license.readme"
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <errno.h>
-
-#include "mythes.hxx"
-
-
-
-MyThes::MyThes(const char* idxpath, const char * datpath)
-{
- nw = 0;
- encoding = NULL;
- list = NULL;
- offst = NULL;
-
- if (thInitialize(idxpath, datpath) != 1) {
- fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
- fflush(stderr);
- thCleanup();
- // did not initialize properly - throw exception?
- }
-}
-
-
-MyThes::~MyThes()
-{
- thCleanup();
-}
-
-
-int MyThes::thInitialize(const char* idxpath, const char* datpath)
-{
-
- // open the index file
- FILE * pifile = fopen(idxpath,"r");
- if (!pifile) {
- return 0;
- }
-
- // parse in encoding and index size */
- char * wrd;
- wrd = (char *)calloc(1, MAX_WD_LEN);
- if (!wrd) {
- fprintf(stderr,"Error - bad memory allocation\n");
- fflush(stderr);
- fclose(pifile);
- return 0;
- }
- int len = readLine(pifile,wrd,MAX_WD_LEN);
- encoding = mystrdup(wrd);
- len = readLine(pifile,wrd,MAX_WD_LEN);
- int idxsz = atoi(wrd);
-
-
- // now allocate list, offst for the given size
- list = (char**) calloc(idxsz,sizeof(char*));
- offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
-
- if ( (!(list)) || (!(offst)) ) {
- fprintf(stderr,"Error - bad memory allocation\n");
- fflush(stderr);
- fclose(pifile);
- return 0;
- }
-
- // now parse the remaining lines of the index
- len = readLine(pifile,wrd,MAX_WD_LEN);
- while (len > 0)
- {
- int np = mystr_indexOfChar(wrd,'|');
- if (nw < idxsz) {
- if (np >= 0) {
- *(wrd+np) = '\0';
- list[nw] = (char *)calloc(1,(np+1));
- if (!list[nw]) {
- fprintf(stderr,"Error - bad memory allocation\n");
- fflush(stderr);
- fclose(pifile);
- return 0;
- }
- memcpy((list[nw]),wrd,np);
- offst[nw] = atoi(wrd+np+1);
- nw++;
- }
- }
- len = readLine(pifile,wrd,MAX_WD_LEN);
- }
-
- free((void *)wrd);
- fclose(pifile);
-
- /* next open the data file */
- pdfile = fopen(datpath,"r");
- if (!pdfile) {
- return 0;
- }
-
- return 1;
-}
-
-
-void MyThes::thCleanup()
-{
- /* first close the data file */
- if (pdfile) {
- fclose(pdfile);
- pdfile=NULL;
- }
-
- if (list)
- {
- /* now free up all the allocated strings on the list */
- for (int i=0; i < nw; i++)
- {
- if (list[i]) {
- free(list[i]);
- list[i] = 0;
- }
- }
- free((void*)list);
- }
-
- if (encoding) free((void*)encoding);
- if (offst) free((void*)offst);
-
- encoding = NULL;
- list = NULL;
- offst = NULL;
- nw = 0;
-}
-
-
-
-// lookup text in index and count of meanings and a list of meaning entries
-// with each entry having a synonym count and pointer to an
-// array of char * (i.e the synonyms)
-//
-// note: calling routine should call CleanUpAfterLookup with the original
-// meaning point and count to properly deallocate memory
-
-int MyThes::Lookup(const char * pText, int len, mentry** pme)
-{
-
- *pme = NULL;
-
- // handle the case of missing file or file related errors
- if (! pdfile) return 0;
-
- long offset = 0;
-
- /* copy search word and make sure null terminated */
- char * wrd = (char *) calloc(1,(len+1));
- memcpy(wrd,pText,len);
-
- /* find it in the list */
- int idx = nw > 0 ? binsearch(wrd,list,nw) : -1;
- free(wrd);
- if (idx < 0) return 0;
-
- // now seek to the offset
- offset = (long) offst[idx];
- int rc = fseek(pdfile,offset,SEEK_SET);
- if (rc) {
- return 0;
- }
-
- // grab the count of the number of meanings
- // and allocate a list of meaning entries
- char * buf = NULL;
- buf = (char *) malloc( MAX_LN_LEN );
- if (!buf) return 0;
- readLine(pdfile, buf, (MAX_LN_LEN-1));
- int np = mystr_indexOfChar(buf,'|');
- if (np < 0) {
- free(buf);
- return 0;
- }
- int nmeanings = atoi(buf+np+1);
- *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
- if (!(*pme)) {
- free(buf);
- return 0;
- }
-
- // now read in each meaning and parse it to get defn, count and synonym lists
- mentry* pm = *(pme);
- char dfn[MAX_WD_LEN];
-
- for (int j = 0; j < nmeanings; j++) {
- readLine(pdfile, buf, (MAX_LN_LEN-1));
-
- pm->count = 0;
- pm->psyns = NULL;
- pm->defn = NULL;
-
- // store away the part of speech for later use
- char * p = buf;
- char * pos = NULL;
- np = mystr_indexOfChar(p,'|');
- if (np >= 0) {
- *(buf+np) = '\0';
- pos = mystrdup(p);
- p = p + np + 1;
- } else {
- pos = mystrdup("");
- }
-
- // count the number of fields in the remaining line
- int nf = 1;
- char * d = p;
- np = mystr_indexOfChar(d,'|');
- while ( np >= 0 ) {
- nf++;
- d = d + np + 1;
- np = mystr_indexOfChar(d,'|');
- }
- pm->count = nf;
- pm->psyns = (char **) malloc(nf*sizeof(char*));
-
- // fill in the synonym list
- d = p;
- for (int jj = 0; jj < nf; jj++)
- {
- np = mystr_indexOfChar(d,'|');
- if (np > 0)
- {
- *(d+np) = '\0';
- pm->psyns[jj] = mystrdup(d);
- d = d + np + 1;
- }
- else
- {
- pm->psyns[jj] = mystrdup(d);
- }
- }
-
- // add pos to first synonym to create the definition
- int k = strlen(pos);
- int m = strlen(pm->psyns[0]);
- if ((k+m) < (MAX_WD_LEN - 1)) {
- strncpy(dfn,pos,k);
- *(dfn+k) = ' ';
- strncpy((dfn+k+1),(pm->psyns[0]),m+1);
- pm->defn = mystrdup(dfn);
- } else {
- pm->defn = mystrdup(pm->psyns[0]);
- }
- free(pos);
- pm++;
-
- }
- free(buf);
-
- return nmeanings;
-}
-
-
-
-void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
-{
-
- if (nmeanings == 0) return;
- if ((*pme) == NULL) return;
-
- mentry * pm = *pme;
-
- for (int i = 0; i < nmeanings; i++) {
- int count = pm->count;
- for (int j = 0; j < count; j++) {
- if (pm->psyns[j]) free(pm->psyns[j]);
- pm->psyns[j] = NULL;
- }
- if (pm->psyns) free(pm->psyns);
- pm->psyns = NULL;
- if (pm->defn) free(pm->defn);
- pm->defn = NULL;
- pm->count = 0;
- pm++;
- }
- pm = *pme;
- free(pm);
- *pme = NULL;
- return;
-}
-
-
-// read a line of text from a text file stripping
-// off the line terminator and replacing it with
-// a null string terminator.
-// returns: -1 on error or the number of characters in
-// in the returning string
-
-// A maximum of nc characters will be returned
-
-int MyThes::readLine(FILE * pf, char * buf, int nc)
-{
-
- if (fgets(buf,nc,pf)) {
- mychomp(buf);
- return strlen(buf);
- }
- return -1;
-}
-
-
-
-// performs a binary search on null terminated character
-// strings
-//
-// returns: -1 on not found
-// index of wrd in the list[]
-
-int MyThes::binsearch(char * sw, char* _list[], int nlst)
-{
- int lp, up, mp, j, indx;
- lp = 0;
- up = nlst-1;
- indx = -1;
- if (strcmp(sw,_list[lp]) < 0) return -1;
- if (strcmp(sw,_list[up]) > 0) return -1;
- while (indx < 0 ) {
- mp = (int)((lp+up) >> 1);
- j = strcmp(sw,_list[mp]);
- if ( j > 0) {
- lp = mp + 1;
- } else if (j < 0 ) {
- up = mp - 1;
- } else {
- indx = mp;
- }
- if (lp > up) return -1;
- }
- return indx;
-}
-
-char * MyThes::get_th_encoding()
-{
- if (encoding) return encoding;
- return NULL;
-}
-
-
-// string duplication routine
-char * MyThes::mystrdup(const char * p)
-{
- int sl = strlen(p) + 1;
- char * d = (char *)malloc(sl);
- if (d) {
- memcpy(d,p,sl);
- return d;
- }
- return NULL;
-}
-
-// remove cross-platform text line end characters
-void MyThes::mychomp(char * s)
-{
- int k = strlen(s);
- if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
- if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
-}
-
-
-// return index of char in string
-int MyThes::mystr_indexOfChar(const char * d, int c)
-{
- char * p = strchr((char *)d,c);
- if (p) return (int)(p-d);
- return -1;
-}
-