diff options
author | Vladimir Glazunov <vg@openoffice.org> | 2011-01-27 16:53:34 +0100 |
---|---|---|
committer | Vladimir Glazunov <vg@openoffice.org> | 2011-01-27 16:53:34 +0100 |
commit | 6998f16d9aa1b4d93648e0bbdee9050726b89ad8 (patch) | |
tree | 497e9cec62aa68f44c373648a2e91e1dc412047e | |
parent | 3397c96e25fe093535edf209fb57e91be6136dd3 (diff) | |
parent | bfd44bdd03589aa1a09f23568fc5203999b4d378 (diff) |
CWS-TOOLING: integrate CWS tl84ooo/DEV300_m99
-rw-r--r-- | hyphen/hyphen-2.4.patch | 169 | ||||
-rw-r--r-- | hyphen/hyphen-2.7.1-read-charset.patch | 20 | ||||
-rw-r--r-- | hyphen/hyphen-2.7.1.patch | 72 | ||||
-rw-r--r-- | hyphen/makefile.mk | 8 | ||||
-rw-r--r-- | hyphen/prj/d.lst | 4 | ||||
-rw-r--r-- | libtextcat/data/new_fingerprints/fpdb.conf | 3 | ||||
-rw-r--r-- | libtextcat/data/new_fingerprints/lm/serbian-latin.lm | 400 | ||||
-rw-r--r-- | libtextcat/data/new_fingerprints/lm/serbian.lm | 400 | ||||
-rw-r--r-- | libtextcat/data/new_fingerprints/lm/serbian_ascii.lm | 400 |
9 files changed, 901 insertions, 575 deletions
diff --git a/hyphen/hyphen-2.4.patch b/hyphen/hyphen-2.4.patch deleted file mode 100644 index 33947f5..0000000 --- a/hyphen/hyphen-2.4.patch +++ /dev/null @@ -1,169 +0,0 @@ -diff -u misc/hyphen-2.4/csutil.c misc/build/hyphen-2.4/csutil.c ---- misc/hyphen-2.4/csutil.c 2003-06-01 02:04:00.000000000 +0200 -+++ misc/build/hyphen-2.4/csutil.c 2008-06-04 10:03:40.000000000 +0200 -@@ -3493,7 +3493,7 @@ - }; - - --struct enc_entry encds[] = { -+static struct enc_entry encds[] = { - {"ISO8859-1",iso1_tbl}, - {"ISO8859-2",iso2_tbl}, - {"ISO8859-3",iso3_tbl}, -Common subdirectories: misc/hyphen-2.4/doc and misc/build/hyphen-2.4/doc -diff -u misc/hyphen-2.4/hyphen.c misc/build/hyphen-2.4/hyphen.c ---- misc/hyphen-2.4/hyphen.c 2008-05-01 02:18:15.000000000 +0200 -+++ misc/build/hyphen-2.4/hyphen.c 2008-06-04 10:06:57.000000000 +0200 -@@ -326,7 +326,7 @@ - } else { - hnj_strchomp(repl + 1); - replindex = 0; -- replcut = strlen(buf); -+ replcut = (signed char) strlen(buf); - } - repl = hnj_strdup(repl + 1); - } -@@ -359,10 +359,10 @@ - if ((((unsigned char) word[pc]) >> 6) != 2) pu++; - if ((ps < 0) && (replindex == pu)) { - ps = replindex; -- replindex = pc; -+ replindex = (signed char) pc; - } - if ((ps >= 0) && ((pu - ps) == replcut)) { -- replcut = (pc - replindex); -+ replcut = (signed char) (pc - replindex); - break; - } - } -@@ -379,7 +379,7 @@ - dict[k]->states[state_num].repl = repl; - dict[k]->states[state_num].replindex = replindex; - if (!replcut) { -- dict[k]->states[state_num].replcut = strlen(word); -+ dict[k]->states[state_num].replcut = (signed char) strlen(word); - } else { - dict[k]->states[state_num].replcut = replcut; - } -@@ -702,7 +702,7 @@ - prep_word[j++] = '.'; - prep_word[j] = '\0'; - -- for (i = 0; i < j; i++) -+ for (i = 0; i < word_size + 5; i++) - hyphens[i] = '0'; - - #ifdef VERBOSE -@@ -941,13 +941,13 @@ - int hnj_hyphen_norm(const char *word, int word_size, char * hyphens, - char *** rep, int ** pos, int ** cut) - { -+ int i, j, k; - if ((((unsigned char) word[0]) >> 6) == 2) { - fprintf(stderr, "error - bad, non UTF-8 input: %s\n", word); - return 1; - } - - /* calculate UTF-8 character positions */ -- int i, j, k; - for (i = 0, j = -1; i < word_size; i++) { - /* beginning of an UTF-8 character (not '10' start bits) */ - if ((((unsigned char) word[i]) >> 6) != 2) j++; -diff -u misc/hyphen-2.4/Makefile.am misc/build/hyphen-2.4/Makefile.am ---- misc/hyphen-2.4/Makefile.am 2008-04-30 12:33:44.000000000 +0200 -+++ misc/build/hyphen-2.4/Makefile.am 2008-06-04 11:33:23.000000000 +0200 -@@ -24,12 +24,12 @@ - - hyphen.us3: - cp -f $(srcdir)/hyphen.tex hyphen.us -- patch < $(srcdir)/hyphen.patch -+ $(GNUPATCH) < $(srcdir)/hyphen.patch - $(srcdir)/tbhyphext.sh <$(srcdir)/tbhyphext.tex >hyphen.us2 - cat hyphen.us hyphen.us2 >hyphen.us3 - --hyph_en_US.dic: hyphen.us3 -- perl $(srcdir)/substrings.pl hyphen.us3 hyph_en_US.dic ISO8859-1 2 3 >/dev/null -+hyph_en_US.dic: -+ @echo "hyph_en_US.txt distributed with Hyphen library" - - clean-local: - rm -rf hyphen.us* hyph_en_US.dic -diff -u misc/hyphen-2.4/Makefile.in misc/build/hyphen-2.4/Makefile.in ---- misc/hyphen-2.4/Makefile.in 2008-04-30 14:29:57.000000000 +0200 -+++ misc/build/hyphen-2.4/Makefile.in 2008-06-04 11:33:44.000000000 +0200 -@@ -795,12 +795,12 @@ - - hyphen.us3: - cp -f $(srcdir)/hyphen.tex hyphen.us -- patch < $(srcdir)/hyphen.patch -+ $(GNUPATCH) < $(srcdir)/hyphen.patch - $(srcdir)/tbhyphext.sh <$(srcdir)/tbhyphext.tex >hyphen.us2 - cat hyphen.us hyphen.us2 >hyphen.us3 - --hyph_en_US.dic: hyphen.us3 -- perl $(srcdir)/substrings.pl hyphen.us3 hyph_en_US.dic ISO8859-1 2 3 >/dev/null -+hyph_en_US.dic: -+ @echo "hyph_en_US.txt distributed with Hyphen library" - - clean-local: - rm -rf hyphen.us* hyph_en_US.dic -diff -u misc/hyphen-2.4/makefile.mk misc/build/hyphen-2.4/makefile.mk ---- misc/hyphen-2.4/makefile.mk 2008-06-04 10:43:21.000000000 +0200 -+++ misc/build/hyphen-2.4/makefile.mk 2008-06-04 12:40:46.000000000 +0200 -@@ -1 +1,54 @@ --dummy -+#************************************************************************* -+# -+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+# -+# Copyright 2000, 2010 Oracle and/or its affiliates. -+# -+# OpenOffice.org - a multi-platform office productivity suite -+# -+# This file is part of OpenOffice.org. -+# -+# OpenOffice.org is free software: you can redistribute it and/or modify -+# it under the terms of the GNU Lesser General Public License version 3 -+# only, as published by the Free Software Foundation. -+# -+# OpenOffice.org is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU Lesser General Public License version 3 for more details -+# (a copy is included in the LICENSE file that accompanied this code). -+# -+# You should have received a copy of the GNU Lesser General Public License -+# version 3 along with OpenOffice.org. If not, see -+# <http://www.openoffice.org/license.html> -+# for a copy of the LGPLv3 License. -+# -+#************************************************************************* -+ -+PRJ = ..$/..$/..$/.. -+ -+PRJNAME = hyphen -+TARGET = hyphen -+CFLAGSCALL=gsd -+ -+USE_DEFFILE=TRUE -+EXTERNAL_WARNINGS_NOT_ERRORS := TRUE -+UWINAPILIB=
-+ -+.INCLUDE : settings.mk -+ -+# --- Files -------------------------------------------------------- -+ -+# !! not to be compiled because those belong to a stand alone programs: !! -+# $(SLO)$/createfp.obj\ -+# $(SLO)$/testtextcat.obj -+ -+SLOFILES= \ -+ $(SLO)$/hyphen.obj\ -+ $(SLO)$/hnjalloc.obj -+ -+# --- Targets ------------------------------------------------------ -+ -+ALL: ALLTAR -+ -+.INCLUDE : target.mk -Common subdirectories: misc/hyphen-2.4/tests and misc/build/hyphen-2.4/tests diff --git a/hyphen/hyphen-2.7.1-read-charset.patch b/hyphen/hyphen-2.7.1-read-charset.patch new file mode 100644 index 0000000..e846955 --- /dev/null +++ b/hyphen/hyphen-2.7.1-read-charset.patch @@ -0,0 +1,20 @@ +--- misc/hyphen-2.7.1/hyphen.c 2010-12-01 01:47:22.000000000 +0100 ++++ misc/build/hyphen-2.7.1/hyphen.c 2011-01-18 16:26:50.953125000 +0100 +@@ -291,13 +291,10 @@ + /* read in character set info */ + if (k == 0) { + for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; +- if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { +- for (i=0;i<MAX_NAME;i++) +- if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) +- dict[k]->cset[i] = 0; +- } else { +- dict[k]->cset[0] = 0; +- } ++ fgets(dict[k]->cset, sizeof(dict[k]->cset),f); ++ for (i=0;i<MAX_NAME;i++) ++ if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) ++ dict[k]->cset[i] = 0; + dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); + } else { + strcpy(dict[k]->cset, dict[0]->cset); diff --git a/hyphen/hyphen-2.7.1.patch b/hyphen/hyphen-2.7.1.patch new file mode 100644 index 0000000..0e8eafa --- /dev/null +++ b/hyphen/hyphen-2.7.1.patch @@ -0,0 +1,72 @@ +--- misc/hyphen-2.7.1/Makefile.am 2010-07-19 11:23:17.000000000 +0200 ++++ misc/build/hyphen-2.7.1/Makefile.am 2010-12-02 10:15:44.390625000 +0100 +@@ -25,14 +25,13 @@ + + hyphen.us3: + cp -f $(srcdir)/hyphen.tex hyphen.us +- patch < $(srcdir)/hyphen.patch ++ $(GNUPATCH) < $(srcdir)/hyphen.patch + $(srcdir)/tbhyphext.sh <$(srcdir)/tbhyphext.tex >hyphen.us2 + cat hyphen.us hyphen.us2 | $(AWK) -f $(srcdir)/lig.awk >hyphen.us3 + cat $(srcdir)/ligpatch.txt >>hyphen.us3 + +-hyph_en_US.dic: hyphen.us3 +- perl $(srcdir)/substrings.pl hyphen.us3 hyphen.us4 UTF-8 2 3 >/dev/null +- cat hyphen.us4 | $(SED) -f $(srcdir)/ooopatch.sed >hyph_en_US.dic ++hyph_en_US.dic: ++ @echo "hyph_en_US.txt distributed with Hyphen library" + + clean-local: + rm -rf hyphen.us* hyph_en_US.dic +--- misc/hyphen-2.7.1/Makefile.in 2010-12-01 02:31:29.000000000 +0100 ++++ misc/build/hyphen-2.7.1/Makefile.in 2010-12-02 10:17:16.546875000 +0100 +@@ -940,14 +940,13 @@ + + hyphen.us3: + cp -f $(srcdir)/hyphen.tex hyphen.us +- patch < $(srcdir)/hyphen.patch ++ $(GNUPATCH) < $(srcdir)/hyphen.patch + $(srcdir)/tbhyphext.sh <$(srcdir)/tbhyphext.tex >hyphen.us2 + cat hyphen.us hyphen.us2 | $(AWK) -f $(srcdir)/lig.awk >hyphen.us3 + cat $(srcdir)/ligpatch.txt >>hyphen.us3 + +-hyph_en_US.dic: hyphen.us3 +- perl $(srcdir)/substrings.pl hyphen.us3 hyphen.us4 UTF-8 2 3 >/dev/null +- cat hyphen.us4 | $(SED) -f $(srcdir)/ooopatch.sed >hyph_en_US.dic ++hyph_en_US.dic: ++ @echo "hyph_en_US.txt distributed with Hyphen library" + + clean-local: + rm -rf hyphen.us* hyph_en_US.dic +--- misc/hyphen-2.7.1/makefile.mk 2010-12-02 10:35:40.265625000 +0100 ++++ misc/build/hyphen-2.7.1/makefile.mk 2010-12-02 10:25:45.750000000 +0100 +@@ -1 +1,28 @@ +-dummy ++PRJ = ..$/..$/..$/.. ++ ++PRJNAME = hyphen ++TARGET = hyphen ++CFLAGSCALL=gsd ++ ++USE_DEFFILE=TRUE ++EXTERNAL_WARNINGS_NOT_ERRORS := TRUE ++UWINAPILIB= ++ ++.INCLUDE : settings.mk ++ ++# --- Files -------------------------------------------------------- ++ ++# !! not to be compiled because those belong to a stand alone programs: !! ++# $(SLO)$/createfp.obj\ ++# $(SLO)$/testtextcat.obj ++ ++SLOFILES= \ ++ $(SLO)$/hyphen.obj\ ++ $(SLO)$/hnjalloc.obj ++ ++# --- Targets ------------------------------------------------------ ++ ++ALL: ALLTAR ++ ++.INCLUDE : target.mk ++ diff --git a/hyphen/makefile.mk b/hyphen/makefile.mk index b733398..94c1592 100644 --- a/hyphen/makefile.mk +++ b/hyphen/makefile.mk @@ -36,12 +36,14 @@ TARGET=hyphen # --- Files -------------------------------------------------------- -TARFILE_NAME=hyphen-2.4 -TARFILE_MD5=d0b5af6e408b8d2958f3d83b5244f5e8 +TARFILE_NAME=hyphen-2.7.1 +TARFILE_MD5=48a9f787f43a09c0a9b7b00cd1fddbbf ADDITIONAL_FILES += makefile.mk -PATCH_FILES=hyphen-2.4.patch +PATCH_FILES= \ + hyphen-2.7.1.patch \ + hyphen-2.7.1-read-charset.patch .IF "$(GUI)"=="UNX" CONFIGURE_DIR=$(BUILD_DIR) diff --git a/hyphen/prj/d.lst b/hyphen/prj/d.lst index 0223cc6..4b7ff0b 100644 --- a/hyphen/prj/d.lst +++ b/hyphen/prj/d.lst @@ -1,5 +1,5 @@ ..\%__SRC%\slb\hyphen.lib %_DEST%\lib%_EXT%\hyphen.lib ..\%__SRC%\inc\hyphen.h %_DEST%\inc%_EXT%\hyphen.h -..\%__SRC%\misc\build\hyphen-2.4\.libs\libhyphen.a %_DEST%\lib%_EXT%\libhyphen.a -..\%__SRC%\misc\build\hyphen-2.4\hyph_en_US.dic %_DEST%\bin%_EXT%\hyph_en_US.dic +..\%__SRC%\misc\build\hyphen-2.7.1\.libs\libhyphen.a %_DEST%\lib%_EXT%\libhyphen.a +..\%__SRC%\misc\build\hyphen-2.7.1\hyph_en_US.dic %_DEST%\bin%_EXT%\hyph_en_US.dic diff --git a/libtextcat/data/new_fingerprints/fpdb.conf b/libtextcat/data/new_fingerprints/fpdb.conf index df56f9e..329184d 100644 --- a/libtextcat/data/new_fingerprints/fpdb.conf +++ b/libtextcat/data/new_fingerprints/fpdb.conf @@ -68,7 +68,8 @@ russian.lm ru--utf8 sanskrit.lm sa--utf8 scots.lm sco--utf8 scots_gaelic.lm gd--utf8 -serbian_ascii.lm sh-YU-utf8 +serbian.lm sr--utf-8 +serbian-latin.lm sh--utf-8 slovak_ascii.lm sk-SK-utf8 slovenian.lm sl--utf8 spanish.lm es--utf8 diff --git a/libtextcat/data/new_fingerprints/lm/serbian-latin.lm b/libtextcat/data/new_fingerprints/lm/serbian-latin.lm new file mode 100644 index 0000000..0a02831 --- /dev/null +++ b/libtextcat/data/new_fingerprints/lm/serbian-latin.lm @@ -0,0 +1,400 @@ +_ 56298 +a 17374 +i 15500 +e 13745 +o 13087 +n 9704 +r 8535 +s 7383 +t 6692 +j 6664 +u 6390 +k 6060 +a_ 5221 +v 5177 +l 5082 +d 4923 +e_ 4729 +m 4663 +p 4121 +i_ 3992 +_s 2964 +je 2847 +g 2703 +z 2575 +u_ 2521 +_p 2491 +ra 2430 +_i 2355 +na 2337 +. 2169 +, 2142 +,_ 2134 +._ 2112 +st 2091 +o_ 2004 +ni 1971 +b 1933 +ko 1894 +je_ 1700 +an 1671 +ij 1628 +no 1559 +č 1550 +_n 1522 +_u 1516 +re 1465 +ti 1409 +_o 1368 +en 1365 +_k 1353 +_j 1346 +_d 1341 +ja 1325 +li 1314 +ta 1309 +pr 1303 +c 1287 +ka 1269 +_je 1233 +po 1224 +ne 1221 +_i_ 1220 +ri 1198 +va 1197 +ov 1151 +od 1146 +la 1139 +sk 1132 +m_ 1124 +_pr 1120 +os 1086 +in 1060 +š 1053 +ve 1048 +oj 1035 +ma 1029 +om 1027 +og 1024 +im 1006 +av 1002 +al 982 +me 976 +vi 971 +_po 966 +_na 963 +na_ 944 +da 928 +ro 905 +nj 900 +ik 891 +_je_ 889 +to 884 +ad 881 +ar 873 +h 862 +or 858 +se 840 +_m 836 +te 819 +is 816 +_u_ 810 +aj 809 +ed 800 +_t 791 +et 772 +at 740 +vo 735 +ju 731 +gr 723 +di 722 +lo 722 +za 709 +il 709 +ak 707 +_r 704 +ja_ 703 +ji 700 +ne_ 694 +_ko 691 +ki 689 +er 681 +ci 680 +ć 673 +_se 670 +_v 664 +ž 663 +el 662 +on 658 +_z 656 +S 638 +iz 635 +bi 622 +ek 616 +_S 612 +su 607 +gra 606 +sa 603 +a, 601 +a,_ 598 +_b 598 +ih 597 +om_ 591 +_g 588 +ost 587 +ije 586 +d_ 579 +tr 574 +se_ 567 +ija 561 +de 559 +em 552 +_se_ 552 +le 549 +a. 548 +lj 548 +a._ 542 +do 540 +_su 533 +zi 529 +ič 525 +sta 520 +h_ 520 +ke 517 +ih_ 511 +f 507 +go 505 +ol 504 +dn 498 +sti 496 +ka_ 493 +_. 489 +_._ 473 +rad 472 +_a 468 +g_ 463 +ic 461 +as 461 +_za 461 +it 456 +koj 454 +ob 448 +iv 442 +da_ 442 +az 441 +su_ 436 +ku 433 +ma_ 430 +mo 429 +ju_ 429 +_sa 427 +ke_ 421 +ni_ 421 +ist 421 +og_ 418 +_od 417 +am 416 +anj 412 +đ 407 +_su_ 407 +ru 400 +nje 398 +sl 397 +ok 392 +op 391 +_koj 391 +_na_ 389 +tn 388 +ji_ 384 +e, 383 +_do 381 +e,_ 380 +ima 379 +ač 378 +nos 378 +vn 377 +B 377 +_ka 373 +ti_ 372 +li_ 370 +eo 370 +pre 367 +_iz 364 +P 361 +sko 361 +io 360 +n_ 360 +" 356 +vr 354 +_st 354 +mi 352 +čk 351 +ao 350 +im_ 347 +es 346 +_B 346 +ev 344 +ski 343 +ez 343 +j_ 341 +ije_ 341 +ig 339 +_ra 338 +ko_ 336 +tv 336 +grad 335 +no_ 335 +la_ 334 +_P 333 +_da 333 +št 332 +od_ 330 +- 326 +dr 323 +va_ 322 +tu 320 +_l 316 +pro 315 +ori 315 +N 314 +ika 311 +ija_ 311 +sto 309 +e. 307 +ir 303 +e._ 302 +_pro 301 +_pre 300 +ki_ 300 +ot 299 +_N 297 +sv 294 +pe 291 +ns 291 +sn 291 +met 290 +t_ 289 +pri 289 +ba 288 +ili 288 +pa 288 +ut 287 +ao_ 286 +oji 285 +_ne 285 +če 284 +ova 283 +kom 282 +um 281 +ičk 279 +nost 279 +k_ 279 +si 279 +ada 278 +van 278 +cij 276 +Sr 276 +lik 275 +_Sr 275 +nt 275 +ogr 274 +ug 274 +_ve 274 +ran 273 +br 273 +ani 272 +ine 272 +ac 271 +edn 271 +red 268 +_bi 266 +_pri 266 +ud 266 +ogra 265 +nja 265 +odi 264 +_f 263 +_re 262 +ga 258 +ati 258 +zn 257 +ovi 255 +rij 254 +_sv 254 +ako 252 +nu 252 +nij 251 +ana 251 +ča 251 +rav 250 +din 248 +kr 247 +iš 247 +či 245 +up 245 +ce 245 +ta_ 244 +rv 244 +men 244 +un 243 +rb 243 +aju 241 +ava 241 +ra_ 241 +etn 239 +oj_ 239 +ln 238 +T 238 +pos 237 +eni 237 +M 234 +_go 233 +_od_ 232 +du 231 +ali 231 +ini 229 +ima_ 229 +_da_ 228 +nov 227 +_te 227 +ps 225 +_e 225 +Srb 223 +ca 223 +_M 223 +_Srb 222 +ara 222 +_mo 221 +Be 221 +_de 221 +i. 220 +bij 220 +K 220 +jed 219 +sa_ 219 +oji_ 218 +čn 218 +_ob 218 +pi 218 +ur 218 +eogr 217 +ove 217 +avi 217 +tno 217 +eog 217 +eogra 217 +stv 216 +zv 216 +_Be 216 +i._ 216 +nik 215 +bo 214 +_koji 214 +nic 214 +koji 214 +_pos 214 +_K 213 +ume 213 +za_ 211 +i, 211 +i,_ 211 diff --git a/libtextcat/data/new_fingerprints/lm/serbian.lm b/libtextcat/data/new_fingerprints/lm/serbian.lm new file mode 100644 index 0000000..55b5906 --- /dev/null +++ b/libtextcat/data/new_fingerprints/lm/serbian.lm @@ -0,0 +1,400 @@ +_ 56294 +а 17374 +и 15500 +е 13745 +о 13087 +н 8809 +р 8535 +с 7383 +т 6692 +у 6390 +к 6060 +а_ 5221 +ј 5197 +в 5177 +д 4882 +е_ 4729 +м 4663 +л 4534 +п 4121 +и_ 3992 +_с 2964 +г 2703 +з 2575 +у_ 2521 +_п 2491 +ра 2430 +_и 2355 +на 2337 +је 2281 +. 2169 +, 2142 +,_ 2134 +._ 2112 +ст 2091 +о_ 2004 +ни 1971 +б 1933 +ко 1894 +иј 1628 +но 1559 +ч 1550 +_у 1516 +је_ 1465 +ре 1465 +_н 1437 +ти 1409 +_о 1368 +_к 1353 +_ј 1346 +_д 1315 +ли 1314 +та 1309 +пр 1303 +ц 1287 +ка 1269 +ан 1260 +_је 1233 +по 1224 +не 1221 +_и_ 1220 +ен 1198 +ри 1198 +ва 1197 +ов 1151 +од 1146 +ла 1139 +ск 1132 +м_ 1124 +_пр 1120 +ос 1086 +ш 1053 +ве 1048 +ој 1035 +ма 1029 +ом 1027 +ин 1025 +ог 1024 +им 1006 +ав 1002 +ме 976 +ви 971 +_по 966 +_на 963 +на_ 944 +да 928 +ја 906 +ро 905 +њ 895 +ал 893 +ик 891 +_је_ 889 +то 884 +ад 876 +ар 873 +х 862 +ор 858 +се 840 +_м 836 +те 819 +ис 816 +_у_ 810 +ај 809 +ед 800 +_т 791 +ет 772 +ат 740 +во 735 +гр 723 +ди 722 +ло 722 +за 709 +ак 707 +_р 704 +не_ 694 +_ко 691 +ки 689 +ер 681 +ил 681 +ци 680 +ћ 673 +_се 670 +_в 664 +_з 656 +он 651 +С 638 +из 635 +би 622 +ж 617 +ек 616 +_С 612 +ел 611 +су 607 +гра 606 +са 603 +а, 601 +а,_ 598 +_б 598 +их 597 +ом_ 591 +_г 588 +ост 587 +ије 586 +д_ 579 +тр 574 +се_ 567 +ија 561 +де 559 +_се_ 552 +ем 552 +ле 549 +а. 548 +љ 548 +а._ 542 +до 540 +_су 533 +ју 529 +зи 529 +ји 525 +ич 525 +ста 520 +х_ 520 +ке 517 +их_ 511 +ф 507 +го 505 +сти 496 +ка_ 493 +_. 489 +_._ 473 +_а 468 +рад 467 +ја_ 467 +г_ 463 +иц 461 +ас 461 +_за 461 +ит 456 +кој 454 +об 448 +да_ 442 +ив 442 +аз 441 +су_ 436 +ку 433 +ма_ 430 +мо 429 +_са 427 +ке_ 421 +ол 421 +ни_ 421 +ист 421 +дн 421 +ог_ 418 +_од 417 +ам 416 +ањ 411 +ђ 407 +_су_ 407 +ру 400 +ње 398 +сл 397 +ок 392 +оп 391 +_кој 391 +_на_ 389 +е, 383 +тн 381 +_до 381 +е,_ 380 +има 379 +нос 378 +ач 378 +вн 377 +Б 377 +_ка 373 +ти_ 372 +ео 370 +ли_ 370 +пре 367 +_из 364 +ско 361 +ио 360 +н_ 360 +П 359 +ју_ 359 +" 356 +вр 354 +_ст 354 +ји_ 354 +ми 352 +чк 351 +ао 350 +им_ 347 +_Б 346 +ес 346 +ев 344 +ски 343 +ез 343 +ије_ 341 +иг 339 +_ра 338 +тв 336 +ко_ 336 +град 335 +но_ 335 +ла_ 334 +_да 333 +_П 333 +шт 332 +од_ 330 +- 326 +ј_ 326 +др 323 +ва_ 322 +ту 320 +ори 315 +про 315 +ија_ 311 +ика 311 +сто 309 +е. 307 +ир 303 +е._ 302 +_про 301 +_пре 300 +ки_ 300 +от 299 +Н 296 +св 294 +пе 291 +сн 291 +нс 291 +мет 290 +т_ 289 +при 289 +ба 288 +па 288 +или 288 +ут 287 +ао_ 286 +оји 285 +_не 285 +че 284 +ова 283 +ком 282 +ум 281 +_Н 280 +си 279 +ичк 279 +ност 279 +к_ 279 +ада 278 +Ср 276 +циј 276 +лик 275 +_Ср 275 +нт 275 +огр 274 +_ве 274 +уг 274 +бр 273 +ани 272 +ине 272 +ац 271 +ред 268 +_би 266 +_при 266 +уд 265 +ња 265 +огра 265 +оди 264 +_ф 263 +_ре 262 +_л 259 +ати 258 +га 258 +зн 257 +ови 255 +риј 254 +_св 254 +ну 252 +ако 252 +ча 251 +ниј 251 +ана 251 +рав 250 +иш 247 +кр 247 +чи 245 +уп 245 +це 245 +дин 244 +та_ 244 +рв 244 +рб 243 +ра_ 241 +ају 241 +ава 241 +ун 240 +ој_ 239 +Т 238 +лн 238 +пос 237 +ени 237 +етн 234 +М 234 +_го 233 +_од_ 232 +али 231 +ду 231 +ини 229 +има_ 229 +_да_ 228 +_те 227 +нов 227 +пс 225 +_е 225 +_М 223 +Срб 223 +ца 223 +_Срб 222 +ара 222 +ран 221 +_мо 221 +Бе 221 +_де 221 +биј 220 +и. 220 +јед 219 +К 219 +едн 219 +са_ 219 +пи 218 +оји_ 218 +ур 218 +_об 218 +ове 217 +чн 217 +тно 217 +ави 217 +еогр 217 +еогра 217 +еог 217 +и._ 216 +ств 216 +зв 216 +_Бе 216 +ник 215 +_који 214 +ниц 214 +бо 214 +који 214 +_пос 214 +уме 213 +_К 212 +за_ 211 +и,_ 211 +и, 211 +тра 209 +сп 209 diff --git a/libtextcat/data/new_fingerprints/lm/serbian_ascii.lm b/libtextcat/data/new_fingerprints/lm/serbian_ascii.lm deleted file mode 100644 index 9471be6..0000000 --- a/libtextcat/data/new_fingerprints/lm/serbian_ascii.lm +++ /dev/null @@ -1,400 +0,0 @@ -_ 34122 -a 9113 -o 8135 -i 7736 -e 7535 -n 5207 -s 4860 -j 3995 -t 3797 -r 3660 -u 3224 -l 3065 -d 3061 -e_ 2941 -v 2786 -a_ 2746 -k 2701 -m 2492 -o_ 2328 -y 2238 -p 2151 -_s 2148 -i_ 2094 -je 1887 -c 1854 -z 1731 -_n 1437 -_p 1432 -g 1418 -b 1368 -u_ 1333 -je_ 1237 -, 1226 -,_ 1214 -_i 1130 -st 1105 -na 1076 -. 1056 -_d 1033 -._ 1030 -_j 967 -ra 934 -ko 908 -ni 900 -cy 893 -sy 875 -_je 871 -_o 824 -ta 799 -no 780 -_u 777 -re 766 -_b 764 -_k 763 -da 760 -ne 754 -li 750 -ti 745 -se 722 -po 713 -to 713 -_je_ 696 -an 688 -ja 683 -pr 665 -va 651 -lo 634 -_z 626 -m_ 625 -is 625 -il 622 -ov 621 -la 621 -_m 615 -bi 604 -_t 603 -_po 594 -en 586 -_se 578 -os 578 -in 576 -od 576 -ka 552 -ve 548 -ij 538 -_pr 536 -al 536 -vo 535 -om 530 -_i_ 525 -nj 515 -ed 509 -_na 507 -na_ 503 -og 499 -oj 498 -ma 493 -_bi 492 -on 489 -ak 482 -im 481 -ye 481 -ro 480 -vi 473 -sa 469 -ri 464 -da_ 451 -av 450 -at 449 -se_ 447 -es 446 -h 443 -ao 441 -ji 437 -yi 436 -_da 433 -ad 432 -_se_ 430 -lj 428 -zy 426 -za 426 -_ne 425 -de 422 -tr 417 -cj 415 -_u_ 414 -_c 412 -le 402 -_v 397 -ar 390 -_g 390 -ic 384 -n_ 382 -ju 379 -lo_ 377 -aj 376 -_ko 369 -ao_ 366 -ek 361 -_da_ 359 -et 356 -go 354 -iz 346 -_za 345 -_r 344 -or 342 -mo 341 -el 340 -as 339 -ik 336 -te 332 -_sa 329 -d_ 323 -am 320 -me 318 -sto 317 -di 315 -ec 311 -ol 310 -a,_ 307 -a, 307 -_ni 302 -ya 296 -do 295 -yt 294 -su 292 -syt 289 -li_ 288 -sta 286 -ije 284 -ko_ 277 -ti_ 277 -la_ 277 -ga 276 -bil 275 -no_ 274 -a. 273 -nu 272 -a._ 271 -ne_ 271 -om_ 268 -_cy 266 -_na_ 263 -_bil 263 -sv 263 -ru 259 -to_ 256 -_od 253 -cyi 253 -nje 251 -it 251 -pa 250 -az 248 -e,_ 245 -e, 245 -ob 244 -dn 243 -ac 242 -ost 242 -k_ 240 -iv 239 -io 238 -_su 238 -_iz 237 -ilo 235 -_sv 234 -_ka 233 -koj 231 -mi 229 -im_ 229 -ije_ 227 -g_ 226 -em 223 -su_ 223 -ih 223 -ji_ 221 -kr 220 -ut 220 -_koj 220 -V 218 -_st 218 -ye_ 217 -_l 214 -_V 213 -ovo 211 -j_ 210 -uc 208 -ja_ 208 -h_ 207 -nij 206 -sk 206 -ot 203 -io_ 203 -gl 203 -_do 201 -ok 200 -ns 199 -ilo_ 199 -er 197 -ih_ 195 -pre 193 -ci 193 -og_ 193 -ki 192 -sl 191 -t_ 189 -ni_ 189 -_a 189 -vr 188 -ati 187 -_su_ 186 -nije 181 -pro 181 -be 180 -yn 179 -cye 178 -ju_ 178 -ku 177 -isy 177 -ta_ 174 -sye 172 -_tr 172 -O 172 -jen 172 -_to 171 -pi 168 -_pre 168 -S 168 -ima 167 -nije_ 167 -_mo 166 -eg 166 -e._ 164 -za_ 164 -e. 164 -_pro 164 -gov 163 -N 162 -dr 162 -ako 162 -tv 162 -_S 160 -P 159 -ma_ 159 -_on 159 -sp 158 -nst 158 -anj 158 -dj 157 -oc 157 -_sy 156 -ev 155 -ce 155 -lik 154 -_nij 153 -_N 152 -ist 151 -_P 151 -_nije 151 -- 151 -ba 150 -jed 150 -sti 150 -ova 149 -_is 148 -id 148 -ton 148 -ke 147 -pos 147 -od_ 147 -osy 146 -Vi 146 -ila 145 -ins 145 -bo 145 -_Vi 145 -ir 144 -_za_ 144 -oz 144 -ecj 144 -cje 143 -on_ 143 -zn 142 -_O 141 -us 141 -i, 141 -i,_ 141 -mu 140 -inst 140 -cya 140 -oji 139 -esy 139 -icy 139 -lja 138 -_go 138 -i. 138 -_re 137 -_bilo 137 -edn 137 -acy 137 -rat 137 -bilo 137 -ali 136 -ecy 136 -ija 135 -pri 135 -ad_ 135 -lic 135 -i._ 135 -Vins 134 -Vin 134 -ston 134 -Vinst 134 -ga_ 134 -nston 134 -insto 134 -nsto 134 -_Vins 133 -_Vin 133 -zi 132 -ran 131 -le_ 130 -ili 130 -bilo_ 130 -_pos 129 -ila_ 129 -est 128 -_ve 128 -tre 128 -zye 127 -_nj 127 -si 126 -f 126 -alo 125 -ako_ 125 -tra 125 -sa_ 125 -pu 124 -ud 124 -z_ 124 -_ra 124 -iti 124 -_de 124 -odi 123 -T 123 --_ 122 -o,_ 121 -o, 121 -du 121 -rs 121 -B 120 -ka_ 119 -red 119 -_od_ 118 -an_ 118 -nu_ 118 -iko 117 -dno 117 -_pa 117 -s_ 116 |