summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Glazunov <vg@openoffice.org>2011-01-27 16:53:34 +0100
committerVladimir Glazunov <vg@openoffice.org>2011-01-27 16:53:34 +0100
commit6998f16d9aa1b4d93648e0bbdee9050726b89ad8 (patch)
tree497e9cec62aa68f44c373648a2e91e1dc412047e
parent3397c96e25fe093535edf209fb57e91be6136dd3 (diff)
parentbfd44bdd03589aa1a09f23568fc5203999b4d378 (diff)
CWS-TOOLING: integrate CWS tl84ooo/DEV300_m99
-rw-r--r--hyphen/hyphen-2.4.patch169
-rw-r--r--hyphen/hyphen-2.7.1-read-charset.patch20
-rw-r--r--hyphen/hyphen-2.7.1.patch72
-rw-r--r--hyphen/makefile.mk8
-rw-r--r--hyphen/prj/d.lst4
-rw-r--r--libtextcat/data/new_fingerprints/fpdb.conf3
-rw-r--r--libtextcat/data/new_fingerprints/lm/serbian-latin.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/serbian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/serbian_ascii.lm400
9 files changed, 901 insertions, 575 deletions
diff --git a/hyphen/hyphen-2.4.patch b/hyphen/hyphen-2.4.patch
deleted file mode 100644
index 33947f5..0000000
--- a/hyphen/hyphen-2.4.patch
+++ /dev/null
@@ -1,169 +0,0 @@
-diff -u misc/hyphen-2.4/csutil.c misc/build/hyphen-2.4/csutil.c
---- misc/hyphen-2.4/csutil.c 2003-06-01 02:04:00.000000000 +0200
-+++ misc/build/hyphen-2.4/csutil.c 2008-06-04 10:03:40.000000000 +0200
-@@ -3493,7 +3493,7 @@
- };
-
-
--struct enc_entry encds[] = {
-+static struct enc_entry encds[] = {
- {"ISO8859-1",iso1_tbl},
- {"ISO8859-2",iso2_tbl},
- {"ISO8859-3",iso3_tbl},
-Common subdirectories: misc/hyphen-2.4/doc and misc/build/hyphen-2.4/doc
-diff -u misc/hyphen-2.4/hyphen.c misc/build/hyphen-2.4/hyphen.c
---- misc/hyphen-2.4/hyphen.c 2008-05-01 02:18:15.000000000 +0200
-+++ misc/build/hyphen-2.4/hyphen.c 2008-06-04 10:06:57.000000000 +0200
-@@ -326,7 +326,7 @@
- } else {
- hnj_strchomp(repl + 1);
- replindex = 0;
-- replcut = strlen(buf);
-+ replcut = (signed char) strlen(buf);
- }
- repl = hnj_strdup(repl + 1);
- }
-@@ -359,10 +359,10 @@
- if ((((unsigned char) word[pc]) >> 6) != 2) pu++;
- if ((ps < 0) && (replindex == pu)) {
- ps = replindex;
-- replindex = pc;
-+ replindex = (signed char) pc;
- }
- if ((ps >= 0) && ((pu - ps) == replcut)) {
-- replcut = (pc - replindex);
-+ replcut = (signed char) (pc - replindex);
- break;
- }
- }
-@@ -379,7 +379,7 @@
- dict[k]->states[state_num].repl = repl;
- dict[k]->states[state_num].replindex = replindex;
- if (!replcut) {
-- dict[k]->states[state_num].replcut = strlen(word);
-+ dict[k]->states[state_num].replcut = (signed char) strlen(word);
- } else {
- dict[k]->states[state_num].replcut = replcut;
- }
-@@ -702,7 +702,7 @@
- prep_word[j++] = '.';
- prep_word[j] = '\0';
-
-- for (i = 0; i < j; i++)
-+ for (i = 0; i < word_size + 5; i++)
- hyphens[i] = '0';
-
- #ifdef VERBOSE
-@@ -941,13 +941,13 @@
- int hnj_hyphen_norm(const char *word, int word_size, char * hyphens,
- char *** rep, int ** pos, int ** cut)
- {
-+ int i, j, k;
- if ((((unsigned char) word[0]) >> 6) == 2) {
- fprintf(stderr, "error - bad, non UTF-8 input: %s\n", word);
- return 1;
- }
-
- /* calculate UTF-8 character positions */
-- int i, j, k;
- for (i = 0, j = -1; i < word_size; i++) {
- /* beginning of an UTF-8 character (not '10' start bits) */
- if ((((unsigned char) word[i]) >> 6) != 2) j++;
-diff -u misc/hyphen-2.4/Makefile.am misc/build/hyphen-2.4/Makefile.am
---- misc/hyphen-2.4/Makefile.am 2008-04-30 12:33:44.000000000 +0200
-+++ misc/build/hyphen-2.4/Makefile.am 2008-06-04 11:33:23.000000000 +0200
-@@ -24,12 +24,12 @@
-
- hyphen.us3:
- cp -f $(srcdir)/hyphen.tex hyphen.us
-- patch < $(srcdir)/hyphen.patch
-+ $(GNUPATCH) < $(srcdir)/hyphen.patch
- $(srcdir)/tbhyphext.sh <$(srcdir)/tbhyphext.tex >hyphen.us2
- cat hyphen.us hyphen.us2 >hyphen.us3
-
--hyph_en_US.dic: hyphen.us3
-- perl $(srcdir)/substrings.pl hyphen.us3 hyph_en_US.dic ISO8859-1 2 3 >/dev/null
-+hyph_en_US.dic:
-+ @echo "hyph_en_US.txt distributed with Hyphen library"
-
- clean-local:
- rm -rf hyphen.us* hyph_en_US.dic
-diff -u misc/hyphen-2.4/Makefile.in misc/build/hyphen-2.4/Makefile.in
---- misc/hyphen-2.4/Makefile.in 2008-04-30 14:29:57.000000000 +0200
-+++ misc/build/hyphen-2.4/Makefile.in 2008-06-04 11:33:44.000000000 +0200
-@@ -795,12 +795,12 @@
-
- hyphen.us3:
- cp -f $(srcdir)/hyphen.tex hyphen.us
-- patch < $(srcdir)/hyphen.patch
-+ $(GNUPATCH) < $(srcdir)/hyphen.patch
- $(srcdir)/tbhyphext.sh <$(srcdir)/tbhyphext.tex >hyphen.us2
- cat hyphen.us hyphen.us2 >hyphen.us3
-
--hyph_en_US.dic: hyphen.us3
-- perl $(srcdir)/substrings.pl hyphen.us3 hyph_en_US.dic ISO8859-1 2 3 >/dev/null
-+hyph_en_US.dic:
-+ @echo "hyph_en_US.txt distributed with Hyphen library"
-
- clean-local:
- rm -rf hyphen.us* hyph_en_US.dic
-diff -u misc/hyphen-2.4/makefile.mk misc/build/hyphen-2.4/makefile.mk
---- misc/hyphen-2.4/makefile.mk 2008-06-04 10:43:21.000000000 +0200
-+++ misc/build/hyphen-2.4/makefile.mk 2008-06-04 12:40:46.000000000 +0200
-@@ -1 +1,54 @@
--dummy
-+#*************************************************************************
-+#
-+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+#
-+# Copyright 2000, 2010 Oracle and/or its affiliates.
-+#
-+# OpenOffice.org - a multi-platform office productivity suite
-+#
-+# This file is part of OpenOffice.org.
-+#
-+# OpenOffice.org is free software: you can redistribute it and/or modify
-+# it under the terms of the GNU Lesser General Public License version 3
-+# only, as published by the Free Software Foundation.
-+#
-+# OpenOffice.org is distributed in the hope that it will be useful,
-+# but WITHOUT ANY WARRANTY; without even the implied warranty of
-+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+# GNU Lesser General Public License version 3 for more details
-+# (a copy is included in the LICENSE file that accompanied this code).
-+#
-+# You should have received a copy of the GNU Lesser General Public License
-+# version 3 along with OpenOffice.org. If not, see
-+# <http://www.openoffice.org/license.html>
-+# for a copy of the LGPLv3 License.
-+#
-+#*************************************************************************
-+
-+PRJ = ..$/..$/..$/..
-+
-+PRJNAME = hyphen
-+TARGET = hyphen
-+CFLAGSCALL=gsd
-+
-+USE_DEFFILE=TRUE
-+EXTERNAL_WARNINGS_NOT_ERRORS := TRUE
-+UWINAPILIB=
-+
-+.INCLUDE : settings.mk
-+
-+# --- Files --------------------------------------------------------
-+
-+# !! not to be compiled because those belong to a stand alone programs: !!
-+# $(SLO)$/createfp.obj\
-+# $(SLO)$/testtextcat.obj
-+
-+SLOFILES= \
-+ $(SLO)$/hyphen.obj\
-+ $(SLO)$/hnjalloc.obj
-+
-+# --- Targets ------------------------------------------------------
-+
-+ALL: ALLTAR
-+
-+.INCLUDE : target.mk
-Common subdirectories: misc/hyphen-2.4/tests and misc/build/hyphen-2.4/tests
diff --git a/hyphen/hyphen-2.7.1-read-charset.patch b/hyphen/hyphen-2.7.1-read-charset.patch
new file mode 100644
index 0000000..e846955
--- /dev/null
+++ b/hyphen/hyphen-2.7.1-read-charset.patch
@@ -0,0 +1,20 @@
+--- misc/hyphen-2.7.1/hyphen.c 2010-12-01 01:47:22.000000000 +0100
++++ misc/build/hyphen-2.7.1/hyphen.c 2011-01-18 16:26:50.953125000 +0100
+@@ -291,13 +291,10 @@
+ /* read in character set info */
+ if (k == 0) {
+ for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
+- if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) {
+- for (i=0;i<MAX_NAME;i++)
+- if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
+- dict[k]->cset[i] = 0;
+- } else {
+- dict[k]->cset[0] = 0;
+- }
++ fgets(dict[k]->cset, sizeof(dict[k]->cset),f);
++ for (i=0;i<MAX_NAME;i++)
++ if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
++ dict[k]->cset[i] = 0;
+ dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
+ } else {
+ strcpy(dict[k]->cset, dict[0]->cset);
diff --git a/hyphen/hyphen-2.7.1.patch b/hyphen/hyphen-2.7.1.patch
new file mode 100644
index 0000000..0e8eafa
--- /dev/null
+++ b/hyphen/hyphen-2.7.1.patch
@@ -0,0 +1,72 @@
+--- misc/hyphen-2.7.1/Makefile.am 2010-07-19 11:23:17.000000000 +0200
++++ misc/build/hyphen-2.7.1/Makefile.am 2010-12-02 10:15:44.390625000 +0100
+@@ -25,14 +25,13 @@
+
+ hyphen.us3:
+ cp -f $(srcdir)/hyphen.tex hyphen.us
+- patch < $(srcdir)/hyphen.patch
++ $(GNUPATCH) < $(srcdir)/hyphen.patch
+ $(srcdir)/tbhyphext.sh <$(srcdir)/tbhyphext.tex >hyphen.us2
+ cat hyphen.us hyphen.us2 | $(AWK) -f $(srcdir)/lig.awk >hyphen.us3
+ cat $(srcdir)/ligpatch.txt >>hyphen.us3
+
+-hyph_en_US.dic: hyphen.us3
+- perl $(srcdir)/substrings.pl hyphen.us3 hyphen.us4 UTF-8 2 3 >/dev/null
+- cat hyphen.us4 | $(SED) -f $(srcdir)/ooopatch.sed >hyph_en_US.dic
++hyph_en_US.dic:
++ @echo "hyph_en_US.txt distributed with Hyphen library"
+
+ clean-local:
+ rm -rf hyphen.us* hyph_en_US.dic
+--- misc/hyphen-2.7.1/Makefile.in 2010-12-01 02:31:29.000000000 +0100
++++ misc/build/hyphen-2.7.1/Makefile.in 2010-12-02 10:17:16.546875000 +0100
+@@ -940,14 +940,13 @@
+
+ hyphen.us3:
+ cp -f $(srcdir)/hyphen.tex hyphen.us
+- patch < $(srcdir)/hyphen.patch
++ $(GNUPATCH) < $(srcdir)/hyphen.patch
+ $(srcdir)/tbhyphext.sh <$(srcdir)/tbhyphext.tex >hyphen.us2
+ cat hyphen.us hyphen.us2 | $(AWK) -f $(srcdir)/lig.awk >hyphen.us3
+ cat $(srcdir)/ligpatch.txt >>hyphen.us3
+
+-hyph_en_US.dic: hyphen.us3
+- perl $(srcdir)/substrings.pl hyphen.us3 hyphen.us4 UTF-8 2 3 >/dev/null
+- cat hyphen.us4 | $(SED) -f $(srcdir)/ooopatch.sed >hyph_en_US.dic
++hyph_en_US.dic:
++ @echo "hyph_en_US.txt distributed with Hyphen library"
+
+ clean-local:
+ rm -rf hyphen.us* hyph_en_US.dic
+--- misc/hyphen-2.7.1/makefile.mk 2010-12-02 10:35:40.265625000 +0100
++++ misc/build/hyphen-2.7.1/makefile.mk 2010-12-02 10:25:45.750000000 +0100
+@@ -1 +1,28 @@
+-dummy
++PRJ = ..$/..$/..$/..
++
++PRJNAME = hyphen
++TARGET = hyphen
++CFLAGSCALL=gsd
++
++USE_DEFFILE=TRUE
++EXTERNAL_WARNINGS_NOT_ERRORS := TRUE
++UWINAPILIB=
++
++.INCLUDE : settings.mk
++
++# --- Files --------------------------------------------------------
++
++# !! not to be compiled because those belong to a stand alone programs: !!
++# $(SLO)$/createfp.obj\
++# $(SLO)$/testtextcat.obj
++
++SLOFILES= \
++ $(SLO)$/hyphen.obj\
++ $(SLO)$/hnjalloc.obj
++
++# --- Targets ------------------------------------------------------
++
++ALL: ALLTAR
++
++.INCLUDE : target.mk
++
diff --git a/hyphen/makefile.mk b/hyphen/makefile.mk
index b733398..94c1592 100644
--- a/hyphen/makefile.mk
+++ b/hyphen/makefile.mk
@@ -36,12 +36,14 @@ TARGET=hyphen
# --- Files --------------------------------------------------------
-TARFILE_NAME=hyphen-2.4
-TARFILE_MD5=d0b5af6e408b8d2958f3d83b5244f5e8
+TARFILE_NAME=hyphen-2.7.1
+TARFILE_MD5=48a9f787f43a09c0a9b7b00cd1fddbbf
ADDITIONAL_FILES += makefile.mk
-PATCH_FILES=hyphen-2.4.patch
+PATCH_FILES= \
+ hyphen-2.7.1.patch \
+ hyphen-2.7.1-read-charset.patch
.IF "$(GUI)"=="UNX"
CONFIGURE_DIR=$(BUILD_DIR)
diff --git a/hyphen/prj/d.lst b/hyphen/prj/d.lst
index 0223cc6..4b7ff0b 100644
--- a/hyphen/prj/d.lst
+++ b/hyphen/prj/d.lst
@@ -1,5 +1,5 @@
..\%__SRC%\slb\hyphen.lib %_DEST%\lib%_EXT%\hyphen.lib
..\%__SRC%\inc\hyphen.h %_DEST%\inc%_EXT%\hyphen.h
-..\%__SRC%\misc\build\hyphen-2.4\.libs\libhyphen.a %_DEST%\lib%_EXT%\libhyphen.a
-..\%__SRC%\misc\build\hyphen-2.4\hyph_en_US.dic %_DEST%\bin%_EXT%\hyph_en_US.dic
+..\%__SRC%\misc\build\hyphen-2.7.1\.libs\libhyphen.a %_DEST%\lib%_EXT%\libhyphen.a
+..\%__SRC%\misc\build\hyphen-2.7.1\hyph_en_US.dic %_DEST%\bin%_EXT%\hyph_en_US.dic
diff --git a/libtextcat/data/new_fingerprints/fpdb.conf b/libtextcat/data/new_fingerprints/fpdb.conf
index df56f9e..329184d 100644
--- a/libtextcat/data/new_fingerprints/fpdb.conf
+++ b/libtextcat/data/new_fingerprints/fpdb.conf
@@ -68,7 +68,8 @@ russian.lm ru--utf8
sanskrit.lm sa--utf8
scots.lm sco--utf8
scots_gaelic.lm gd--utf8
-serbian_ascii.lm sh-YU-utf8
+serbian.lm sr--utf-8
+serbian-latin.lm sh--utf-8
slovak_ascii.lm sk-SK-utf8
slovenian.lm sl--utf8
spanish.lm es--utf8
diff --git a/libtextcat/data/new_fingerprints/lm/serbian-latin.lm b/libtextcat/data/new_fingerprints/lm/serbian-latin.lm
new file mode 100644
index 0000000..0a02831
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/serbian-latin.lm
@@ -0,0 +1,400 @@
+_ 56298
+a 17374
+i 15500
+e 13745
+o 13087
+n 9704
+r 8535
+s 7383
+t 6692
+j 6664
+u 6390
+k 6060
+a_ 5221
+v 5177
+l 5082
+d 4923
+e_ 4729
+m 4663
+p 4121
+i_ 3992
+_s 2964
+je 2847
+g 2703
+z 2575
+u_ 2521
+_p 2491
+ra 2430
+_i 2355
+na 2337
+. 2169
+, 2142
+,_ 2134
+._ 2112
+st 2091
+o_ 2004
+ni 1971
+b 1933
+ko 1894
+je_ 1700
+an 1671
+ij 1628
+no 1559
+č 1550
+_n 1522
+_u 1516
+re 1465
+ti 1409
+_o 1368
+en 1365
+_k 1353
+_j 1346
+_d 1341
+ja 1325
+li 1314
+ta 1309
+pr 1303
+c 1287
+ka 1269
+_je 1233
+po 1224
+ne 1221
+_i_ 1220
+ri 1198
+va 1197
+ov 1151
+od 1146
+la 1139
+sk 1132
+m_ 1124
+_pr 1120
+os 1086
+in 1060
+š 1053
+ve 1048
+oj 1035
+ma 1029
+om 1027
+og 1024
+im 1006
+av 1002
+al 982
+me 976
+vi 971
+_po 966
+_na 963
+na_ 944
+da 928
+ro 905
+nj 900
+ik 891
+_je_ 889
+to 884
+ad 881
+ar 873
+h 862
+or 858
+se 840
+_m 836
+te 819
+is 816
+_u_ 810
+aj 809
+ed 800
+_t 791
+et 772
+at 740
+vo 735
+ju 731
+gr 723
+di 722
+lo 722
+za 709
+il 709
+ak 707
+_r 704
+ja_ 703
+ji 700
+ne_ 694
+_ko 691
+ki 689
+er 681
+ci 680
+ć 673
+_se 670
+_v 664
+ž 663
+el 662
+on 658
+_z 656
+S 638
+iz 635
+bi 622
+ek 616
+_S 612
+su 607
+gra 606
+sa 603
+a, 601
+a,_ 598
+_b 598
+ih 597
+om_ 591
+_g 588
+ost 587
+ije 586
+d_ 579
+tr 574
+se_ 567
+ija 561
+de 559
+em 552
+_se_ 552
+le 549
+a. 548
+lj 548
+a._ 542
+do 540
+_su 533
+zi 529
+ič 525
+sta 520
+h_ 520
+ke 517
+ih_ 511
+f 507
+go 505
+ol 504
+dn 498
+sti 496
+ka_ 493
+_. 489
+_._ 473
+rad 472
+_a 468
+g_ 463
+ic 461
+as 461
+_za 461
+it 456
+koj 454
+ob 448
+iv 442
+da_ 442
+az 441
+su_ 436
+ku 433
+ma_ 430
+mo 429
+ju_ 429
+_sa 427
+ke_ 421
+ni_ 421
+ist 421
+og_ 418
+_od 417
+am 416
+anj 412
+đ 407
+_su_ 407
+ru 400
+nje 398
+sl 397
+ok 392
+op 391
+_koj 391
+_na_ 389
+tn 388
+ji_ 384
+e, 383
+_do 381
+e,_ 380
+ima 379
+ač 378
+nos 378
+vn 377
+B 377
+_ka 373
+ti_ 372
+li_ 370
+eo 370
+pre 367
+_iz 364
+P 361
+sko 361
+io 360
+n_ 360
+" 356
+vr 354
+_st 354
+mi 352
+čk 351
+ao 350
+im_ 347
+es 346
+_B 346
+ev 344
+ski 343
+ez 343
+j_ 341
+ije_ 341
+ig 339
+_ra 338
+ko_ 336
+tv 336
+grad 335
+no_ 335
+la_ 334
+_P 333
+_da 333
+št 332
+od_ 330
+- 326
+dr 323
+va_ 322
+tu 320
+_l 316
+pro 315
+ori 315
+N 314
+ika 311
+ija_ 311
+sto 309
+e. 307
+ir 303
+e._ 302
+_pro 301
+_pre 300
+ki_ 300
+ot 299
+_N 297
+sv 294
+pe 291
+ns 291
+sn 291
+met 290
+t_ 289
+pri 289
+ba 288
+ili 288
+pa 288
+ut 287
+ao_ 286
+oji 285
+_ne 285
+če 284
+ova 283
+kom 282
+um 281
+ičk 279
+nost 279
+k_ 279
+si 279
+ada 278
+van 278
+cij 276
+Sr 276
+lik 275
+_Sr 275
+nt 275
+ogr 274
+ug 274
+_ve 274
+ran 273
+br 273
+ani 272
+ine 272
+ac 271
+edn 271
+red 268
+_bi 266
+_pri 266
+ud 266
+ogra 265
+nja 265
+odi 264
+_f 263
+_re 262
+ga 258
+ati 258
+zn 257
+ovi 255
+rij 254
+_sv 254
+ako 252
+nu 252
+nij 251
+ana 251
+ča 251
+rav 250
+din 248
+kr 247
+iš 247
+či 245
+up 245
+ce 245
+ta_ 244
+rv 244
+men 244
+un 243
+rb 243
+aju 241
+ava 241
+ra_ 241
+etn 239
+oj_ 239
+ln 238
+T 238
+pos 237
+eni 237
+M 234
+_go 233
+_od_ 232
+du 231
+ali 231
+ini 229
+ima_ 229
+_da_ 228
+nov 227
+_te 227
+ps 225
+_e 225
+Srb 223
+ca 223
+_M 223
+_Srb 222
+ara 222
+_mo 221
+Be 221
+_de 221
+i. 220
+bij 220
+K 220
+jed 219
+sa_ 219
+oji_ 218
+čn 218
+_ob 218
+pi 218
+ur 218
+eogr 217
+ove 217
+avi 217
+tno 217
+eog 217
+eogra 217
+stv 216
+zv 216
+_Be 216
+i._ 216
+nik 215
+bo 214
+_koji 214
+nic 214
+koji 214
+_pos 214
+_K 213
+ume 213
+za_ 211
+i, 211
+i,_ 211
diff --git a/libtextcat/data/new_fingerprints/lm/serbian.lm b/libtextcat/data/new_fingerprints/lm/serbian.lm
new file mode 100644
index 0000000..55b5906
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/serbian.lm
@@ -0,0 +1,400 @@
+_ 56294
+а 17374
+и 15500
+е 13745
+о 13087
+н 8809
+р 8535
+с 7383
+т 6692
+у 6390
+к 6060
+а_ 5221
+ј 5197
+в 5177
+д 4882
+е_ 4729
+м 4663
+л 4534
+п 4121
+и_ 3992
+_с 2964
+г 2703
+з 2575
+у_ 2521
+_п 2491
+ра 2430
+_и 2355
+на 2337
+је 2281
+. 2169
+, 2142
+,_ 2134
+._ 2112
+ст 2091
+о_ 2004
+ни 1971
+б 1933
+ко 1894
+иј 1628
+но 1559
+ч 1550
+_у 1516
+је_ 1465
+ре 1465
+_н 1437
+ти 1409
+_о 1368
+_к 1353
+_ј 1346
+_д 1315
+ли 1314
+та 1309
+пр 1303
+ц 1287
+ка 1269
+ан 1260
+_је 1233
+по 1224
+не 1221
+_и_ 1220
+ен 1198
+ри 1198
+ва 1197
+ов 1151
+од 1146
+ла 1139
+ск 1132
+м_ 1124
+_пр 1120
+ос 1086
+ш 1053
+ве 1048
+ој 1035
+ма 1029
+ом 1027
+ин 1025
+ог 1024
+им 1006
+ав 1002
+ме 976
+ви 971
+_по 966
+_на 963
+на_ 944
+да 928
+ја 906
+ро 905
+њ 895
+ал 893
+ик 891
+_је_ 889
+то 884
+ад 876
+ар 873
+х 862
+ор 858
+се 840
+_м 836
+те 819
+ис 816
+_у_ 810
+ај 809
+ед 800
+_т 791
+ет 772
+ат 740
+во 735
+гр 723
+ди 722
+ло 722
+за 709
+ак 707
+_р 704
+не_ 694
+_ко 691
+ки 689
+ер 681
+ил 681
+ци 680
+ћ 673
+_се 670
+_в 664
+_з 656
+он 651
+С 638
+из 635
+би 622
+ж 617
+ек 616
+_С 612
+ел 611
+су 607
+гра 606
+са 603
+а, 601
+а,_ 598
+_б 598
+их 597
+ом_ 591
+_г 588
+ост 587
+ије 586
+д_ 579
+тр 574
+се_ 567
+ија 561
+де 559
+_се_ 552
+ем 552
+ле 549
+а. 548
+љ 548
+а._ 542
+до 540
+_су 533
+ју 529
+зи 529
+ји 525
+ич 525
+ста 520
+х_ 520
+ке 517
+их_ 511
+ф 507
+го 505
+сти 496
+ка_ 493
+_. 489
+_._ 473
+_а 468
+рад 467
+ја_ 467
+г_ 463
+иц 461
+ас 461
+_за 461
+ит 456
+кој 454
+об 448
+да_ 442
+ив 442
+аз 441
+су_ 436
+ку 433
+ма_ 430
+мо 429
+_са 427
+ке_ 421
+ол 421
+ни_ 421
+ист 421
+дн 421
+ог_ 418
+_од 417
+ам 416
+ањ 411
+ђ 407
+_су_ 407
+ру 400
+ње 398
+сл 397
+ок 392
+оп 391
+_кој 391
+_на_ 389
+е, 383
+тн 381
+_до 381
+е,_ 380
+има 379
+нос 378
+ач 378
+вн 377
+Б 377
+_ка 373
+ти_ 372
+ео 370
+ли_ 370
+пре 367
+_из 364
+ско 361
+ио 360
+н_ 360
+П 359
+ју_ 359
+" 356
+вр 354
+_ст 354
+ји_ 354
+ми 352
+чк 351
+ао 350
+им_ 347
+_Б 346
+ес 346
+ев 344
+ски 343
+ез 343
+ије_ 341
+иг 339
+_ра 338
+тв 336
+ко_ 336
+град 335
+но_ 335
+ла_ 334
+_да 333
+_П 333
+шт 332
+од_ 330
+- 326
+ј_ 326
+др 323
+ва_ 322
+ту 320
+ори 315
+про 315
+ија_ 311
+ика 311
+сто 309
+е. 307
+ир 303
+е._ 302
+_про 301
+_пре 300
+ки_ 300
+от 299
+Н 296
+св 294
+пе 291
+сн 291
+нс 291
+мет 290
+т_ 289
+при 289
+ба 288
+па 288
+или 288
+ут 287
+ао_ 286
+оји 285
+_не 285
+че 284
+ова 283
+ком 282
+ум 281
+_Н 280
+си 279
+ичк 279
+ност 279
+к_ 279
+ада 278
+Ср 276
+циј 276
+лик 275
+_Ср 275
+нт 275
+огр 274
+_ве 274
+уг 274
+бр 273
+ани 272
+ине 272
+ац 271
+ред 268
+_би 266
+_при 266
+уд 265
+ња 265
+огра 265
+оди 264
+_ф 263
+_ре 262
+_л 259
+ати 258
+га 258
+зн 257
+ови 255
+риј 254
+_св 254
+ну 252
+ако 252
+ча 251
+ниј 251
+ана 251
+рав 250
+иш 247
+кр 247
+чи 245
+уп 245
+це 245
+дин 244
+та_ 244
+рв 244
+рб 243
+ра_ 241
+ају 241
+ава 241
+ун 240
+ој_ 239
+Т 238
+лн 238
+пос 237
+ени 237
+етн 234
+М 234
+_го 233
+_од_ 232
+али 231
+ду 231
+ини 229
+има_ 229
+_да_ 228
+_те 227
+нов 227
+пс 225
+_е 225
+_М 223
+Срб 223
+ца 223
+_Срб 222
+ара 222
+ран 221
+_мо 221
+Бе 221
+_де 221
+биј 220
+и. 220
+јед 219
+К 219
+едн 219
+са_ 219
+пи 218
+оји_ 218
+ур 218
+_об 218
+ове 217
+чн 217
+тно 217
+ави 217
+еогр 217
+еогра 217
+еог 217
+и._ 216
+ств 216
+зв 216
+_Бе 216
+ник 215
+_који 214
+ниц 214
+бо 214
+који 214
+_пос 214
+уме 213
+_К 212
+за_ 211
+и,_ 211
+и, 211
+тра 209
+сп 209
diff --git a/libtextcat/data/new_fingerprints/lm/serbian_ascii.lm b/libtextcat/data/new_fingerprints/lm/serbian_ascii.lm
deleted file mode 100644
index 9471be6..0000000
--- a/libtextcat/data/new_fingerprints/lm/serbian_ascii.lm
+++ /dev/null
@@ -1,400 +0,0 @@
-_ 34122
-a 9113
-o 8135
-i 7736
-e 7535
-n 5207
-s 4860
-j 3995
-t 3797
-r 3660
-u 3224
-l 3065
-d 3061
-e_ 2941
-v 2786
-a_ 2746
-k 2701
-m 2492
-o_ 2328
-y 2238
-p 2151
-_s 2148
-i_ 2094
-je 1887
-c 1854
-z 1731
-_n 1437
-_p 1432
-g 1418
-b 1368
-u_ 1333
-je_ 1237
-, 1226
-,_ 1214
-_i 1130
-st 1105
-na 1076
-. 1056
-_d 1033
-._ 1030
-_j 967
-ra 934
-ko 908
-ni 900
-cy 893
-sy 875
-_je 871
-_o 824
-ta 799
-no 780
-_u 777
-re 766
-_b 764
-_k 763
-da 760
-ne 754
-li 750
-ti 745
-se 722
-po 713
-to 713
-_je_ 696
-an 688
-ja 683
-pr 665
-va 651
-lo 634
-_z 626
-m_ 625
-is 625
-il 622
-ov 621
-la 621
-_m 615
-bi 604
-_t 603
-_po 594
-en 586
-_se 578
-os 578
-in 576
-od 576
-ka 552
-ve 548
-ij 538
-_pr 536
-al 536
-vo 535
-om 530
-_i_ 525
-nj 515
-ed 509
-_na 507
-na_ 503
-og 499
-oj 498
-ma 493
-_bi 492
-on 489
-ak 482
-im 481
-ye 481
-ro 480
-vi 473
-sa 469
-ri 464
-da_ 451
-av 450
-at 449
-se_ 447
-es 446
-h 443
-ao 441
-ji 437
-yi 436
-_da 433
-ad 432
-_se_ 430
-lj 428
-zy 426
-za 426
-_ne 425
-de 422
-tr 417
-cj 415
-_u_ 414
-_c 412
-le 402
-_v 397
-ar 390
-_g 390
-ic 384
-n_ 382
-ju 379
-lo_ 377
-aj 376
-_ko 369
-ao_ 366
-ek 361
-_da_ 359
-et 356
-go 354
-iz 346
-_za 345
-_r 344
-or 342
-mo 341
-el 340
-as 339
-ik 336
-te 332
-_sa 329
-d_ 323
-am 320
-me 318
-sto 317
-di 315
-ec 311
-ol 310
-a,_ 307
-a, 307
-_ni 302
-ya 296
-do 295
-yt 294
-su 292
-syt 289
-li_ 288
-sta 286
-ije 284
-ko_ 277
-ti_ 277
-la_ 277
-ga 276
-bil 275
-no_ 274
-a. 273
-nu 272
-a._ 271
-ne_ 271
-om_ 268
-_cy 266
-_na_ 263
-_bil 263
-sv 263
-ru 259
-to_ 256
-_od 253
-cyi 253
-nje 251
-it 251
-pa 250
-az 248
-e,_ 245
-e, 245
-ob 244
-dn 243
-ac 242
-ost 242
-k_ 240
-iv 239
-io 238
-_su 238
-_iz 237
-ilo 235
-_sv 234
-_ka 233
-koj 231
-mi 229
-im_ 229
-ije_ 227
-g_ 226
-em 223
-su_ 223
-ih 223
-ji_ 221
-kr 220
-ut 220
-_koj 220
-V 218
-_st 218
-ye_ 217
-_l 214
-_V 213
-ovo 211
-j_ 210
-uc 208
-ja_ 208
-h_ 207
-nij 206
-sk 206
-ot 203
-io_ 203
-gl 203
-_do 201
-ok 200
-ns 199
-ilo_ 199
-er 197
-ih_ 195
-pre 193
-ci 193
-og_ 193
-ki 192
-sl 191
-t_ 189
-ni_ 189
-_a 189
-vr 188
-ati 187
-_su_ 186
-nije 181
-pro 181
-be 180
-yn 179
-cye 178
-ju_ 178
-ku 177
-isy 177
-ta_ 174
-sye 172
-_tr 172
-O 172
-jen 172
-_to 171
-pi 168
-_pre 168
-S 168
-ima 167
-nije_ 167
-_mo 166
-eg 166
-e._ 164
-za_ 164
-e. 164
-_pro 164
-gov 163
-N 162
-dr 162
-ako 162
-tv 162
-_S 160
-P 159
-ma_ 159
-_on 159
-sp 158
-nst 158
-anj 158
-dj 157
-oc 157
-_sy 156
-ev 155
-ce 155
-lik 154
-_nij 153
-_N 152
-ist 151
-_P 151
-_nije 151
-- 151
-ba 150
-jed 150
-sti 150
-ova 149
-_is 148
-id 148
-ton 148
-ke 147
-pos 147
-od_ 147
-osy 146
-Vi 146
-ila 145
-ins 145
-bo 145
-_Vi 145
-ir 144
-_za_ 144
-oz 144
-ecj 144
-cje 143
-on_ 143
-zn 142
-_O 141
-us 141
-i, 141
-i,_ 141
-mu 140
-inst 140
-cya 140
-oji 139
-esy 139
-icy 139
-lja 138
-_go 138
-i. 138
-_re 137
-_bilo 137
-edn 137
-acy 137
-rat 137
-bilo 137
-ali 136
-ecy 136
-ija 135
-pri 135
-ad_ 135
-lic 135
-i._ 135
-Vins 134
-Vin 134
-ston 134
-Vinst 134
-ga_ 134
-nston 134
-insto 134
-nsto 134
-_Vins 133
-_Vin 133
-zi 132
-ran 131
-le_ 130
-ili 130
-bilo_ 130
-_pos 129
-ila_ 129
-est 128
-_ve 128
-tre 128
-zye 127
-_nj 127
-si 126
-f 126
-alo 125
-ako_ 125
-tra 125
-sa_ 125
-pu 124
-ud 124
-z_ 124
-_ra 124
-iti 124
-_de 124
-odi 123
-T 123
--_ 122
-o,_ 121
-o, 121
-du 121
-rs 121
-B 120
-ka_ 119
-red 119
-_od_ 118
-an_ 118
-nu_ 118
-iko 117
-dno 117
-_pa 117
-s_ 116