summaryrefslogtreecommitdiff
path: root/libtextcat/data/new_fingerprints/fpdb.conf
blob: 329184d514d117c3a3baf90ba34557e30411eaf1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#
# A sample config file for the language models
# provided with Gertjan van Noords language guesser
# (http://odur.let.rug.nl/~vannoord/TextCat/)
#
# Notes:
# - You may consider eliminating a couple of small languages from this
# list because they cause false positives with big languages and are
# bad for performance. (Do you really want to recognize Drents?)
# - Putting the most probable languages at the top of the list
# improves performance, because this will raise the threshold for
# likely candidates more quickly.
#

# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding
# guess strings are made as following : language-country-encoding

afrikaans.lm                         af--utf8
albanian.lm                          sq--utf8
amharic_utf.lm                       am--utf8
arabic.lm                            ar--utf8
basque.lm                            eu--utf8
belarus.lm                           be--utf8
bosnian.lm                           bs--utf8
breton.lm                            br--utf8
catalan.lm                           ca--utf8
chinese_simplified.lm                zh-CN-utf8
chinese_traditional.lm               zh-TW-utf8
croatian.lm                          hr--utf8
czech.lm                             cs--utf8
danish.lm                            da--utf8
dutch.lm                             nl--utf8
english.lm                           en--utf8
esperanto.lm                         eo--utf8
estonian.lm                          et--utf8
finnish.lm                           fi--utf8
french.lm                            fr--utf8
frisian.lm                           fy--utf8
georgian.lm                          ka--utf8
german.lm                            de--utf8
greek.lm                             el--utf8
hebrew.lm                            he--utf8
hindi.lm                             hi--utf8
hungarian.lm                         hu--utf8
icelandic.lm                         is--utf8
indonesian.lm                        id--utf8
irish_gaelic.lm                      ga--utf8
italian.lm                           it--utf8
japanese.lm                          ja--utf8
korean.lm                            ko--utf8
latin.lm                             la--utf8
latvian.lm                           lv--utf8
lithuanian.lm                        lt--utf8
luxembourgish.lm                     lb--utf8
malay.lm                             ms--utf8
manx_gaelic.lm                       gv--utf8
marathi.lm                           mr--utf8
mongolian_cyrillic.lm                mn--utf8
nepali.lm                            ne--utf8
norwegian.lm                         nb--utf8       # Norwegian (Bokmal)
persian.lm                           fa--utf8       # Farsi
polish.lm                            pl--utf8
portuguese.lm                        pt-PT-utf8
quechua.lm                           qu--utf8
romanian.lm                          ro--utf8
romansh.lm                           rm--utf8
russian.lm                           ru--utf8
sanskrit.lm                          sa--utf8
scots.lm                             sco--utf8
scots_gaelic.lm                      gd--utf8
serbian.lm                           sr--utf-8
serbian-latin.lm                     sh--utf-8
slovak_ascii.lm                      sk-SK-utf8
slovenian.lm                         sl--utf8
spanish.lm                           es--utf8
swahili.lm                           sw--utf8
swedish.lm                           sv--utf8
tagalog.lm                           tl--utf8
tamil.lm                             ta--utf8
thai.lm                              th--utf8
turkish.lm                           tr--utf8
ukrainian.lm                         uk--utf8
vietnamese.lm                        vi--utf8
welsh.lm                             cy--utf8
yiddish_utf.lm                       yi--utf8
zulu.lm                              zu--utf8